Introduce a new file to hold ACPI based NUMA infomation parsing from SRAT and SLIT.
SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory ranges to Proximity Domain mapping; SLIT has the information of distance of NUMA node, so parse those two tables and get the mapping of NUMA node to CPU and memory.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org --- arch/arm64/include/asm/acpi.h | 7 +- arch/arm64/include/asm/numa.h | 27 +++++-- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/acpi_numa.c | 176 ++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + arch/arm64/mm/numa.c | 69 +++++++++++++---- drivers/acpi/processor_core.c | 5 ++ include/acpi/processor.h | 1 + 8 files changed, 266 insertions(+), 22 deletions(-) create mode 100644 arch/arm64/kernel/acpi_numa.c
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 483ff45..a0e6f74 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -88,7 +88,6 @@ static inline bool acpi_has_cpu_in_madt(void)
static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_smp_init_cpus(void); - #else static inline void disable_acpi(void) { } static inline bool acpi_psci_present(void) { return false; } @@ -96,4 +95,10 @@ static inline bool acpi_psci_use_hvc(void) { return false; } static inline void acpi_smp_init_cpus(void) { } #endif /* CONFIG_ACPI */
+#ifdef CONFIG_ACPI_NUMA +int arm64_acpi_numa_init(void); +#else +static inline int arm64_acpi_numa_init(void) { return -ENODEV; } +#endif /* CONFIG_ACPI_NUMA */ + #endif /*_ASM_ACPI_H*/ diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index e4c2ed0..8165bc9 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -1,5 +1,5 @@ -#ifndef _ASM_ARM64_NUMA_H -#define _ASM_ARM64_NUMA_H +#ifndef _ASM_NUMA_H +#define _ASM_NUMA_H
#include <linux/nodemask.h> #include <asm/topology.h> @@ -16,6 +16,14 @@ #define pcibus_to_node(node) 0 #define cpumask_of_pcibus(bus) 0
+struct node_cpu_hwid { + u32 node_id; /* logical node containing this CPU */ + u64 cpu_hwid; /* MPIDR for this CPU */ +}; + +extern struct node_cpu_hwid node_cpuid[NR_CPUS]; +extern nodemask_t numa_nodes_parsed __initdata; + const struct cpumask *cpumask_of_node(int node); /* Mappings between node number and cpus on that node. */ extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; @@ -23,13 +31,16 @@ extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; void __init arm64_numa_init(void); int __init numa_add_memblk(u32 nodeid, u64 start, u64 end); void numa_store_cpu_info(int cpu); -void numa_set_node(int cpu, int node); void numa_clear_node(int cpu); -void numa_add_cpu(int cpu); -void numa_remove_cpu(int cpu); +void __init build_cpu_to_node_map(void); +void __init numa_set_distance(int from, int to, int distance); #else /* CONFIG_NUMA */ -static inline void arm64_numa_init(void); +static inline void arm64_numa_init(void); { } static inline void numa_store_cpu_info(int cpu) { } -static inline void arm64_numa_init() { } +static inline void __init build_cpu_to_node_map(void) { } +static inline void __init numa_set_distance(int from, int to, int distance) +{ + return; +} #endif /* CONFIG_NUMA */ -#endif /* _ASM_ARM64_NUMA_H */ +#endif /* _ASM_NUMA_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index ec576e6..c23c872 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -32,6 +32,7 @@ arm64-obj-$(CONFIG_KGDB) += kgdb.o arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
obj-y += $(arm64-obj-y) vdso/ diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c new file mode 100644 index 0000000..44de133 --- /dev/null +++ b/arch/arm64/kernel/acpi_numa.c @@ -0,0 +1,176 @@ +/* + * ACPI 5.1 based NUMA setup for ARM64 + * Lots of code was borrowed from arch/x86/mm/srat.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2014, Linaro Ltd. + * Author: Hanjun Guo hanjun.guo@linaro.org + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: SRAT: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/bootmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <acpi/processor.h> +#include <asm/numa.h> + +int acpi_numa __initdata; + +static __init int setup_node(int pxm) +{ + return acpi_map_pxm_to_node(pxm); +} + +static __init void bad_srat(void) +{ + pr_err("SRAT not used.\n"); + acpi_numa = -1; +} + +static __init inline int srat_disabled(void) +{ + return acpi_numa < 0; +} + +/* + * Callback for SLIT parsing. + * It will get the distance information presented by SLIT + * and init the distance matrix of numa nodes + */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ + int i, j; + + for (i = 0; i < slit->locality_count; i++) { + const int from_node = pxm_to_node(i); + + if (from_node == NUMA_NO_NODE) + continue; + + for (j = 0; j < slit->locality_count; j++) { + const int to_node = pxm_to_node(j); + + if (to_node == NUMA_NO_NODE) + continue; + + numa_set_distance(from_node, to_node, + slit->entry[slit->locality_count * i + j]); + } + } +} + +/* Callback for Proximity Domain -> ACPI processor UID mapping */ +void __init +acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) +{ + int pxm, node; + int acpi_id; + int acpi_phys_id; + u64 mpidr; + static int cpus_in_srat; + + if (srat_disabled()) + return; + if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) { + bad_srat(); + return; + } + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return; + + if (cpus_in_srat >= ARRAY_SIZE(node_cpuid)) { + printk_once(KERN_WARNING + "node_cpuid[%ld] is too small, may not be able to use all cpus\n", + ARRAY_SIZE(node_cpuid)); + return; + } + + pxm = pa->proximity_domain; + node = setup_node(pxm); + if (node < 0) { + pr_err("Too many proximity domains %x\n", pxm); + bad_srat(); + return; + } + + acpi_id = pa->acpi_processor_uid; + acpi_phys_id = acpi_get_phys_id_in_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + acpi_id); + if (acpi_phys_id < 0) { + pr_info("PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", pxm, acpi_id); + return; + } + + /* MPIDR was packed into 32 bits */ + mpidr = ((acpi_phys_id & 0xff000000) << 8) | (acpi_phys_id & 0x00ffffff); + node_cpuid[cpus_in_srat].node_id = node; + node_cpuid[cpus_in_srat].cpu_hwid = mpidr; + node_set(node, numa_nodes_parsed); + acpi_numa = 1; + cpus_in_srat++; + pr_info("PXM %u -> MPIDR 0x%Lx -> Node %u\n", pxm, mpidr, node); +} + +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ +int __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) +{ + u64 start, end; + int node, pxm; + + if (srat_disabled()) + goto out_err; + if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) + goto out_err_bad_srat; + + start = ma->base_address; + end = start + ma->length; + pxm = ma->proximity_domain; + + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains.\n"); + goto out_err_bad_srat; + } + + if (numa_add_memblk(node, start, end) < 0) + goto out_err_bad_srat; + + node_set(node, numa_nodes_parsed); + + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", + node, pxm, + (unsigned long long) start, (unsigned long long) end - 1); + + return 0; +out_err_bad_srat: + bad_srat(); +out_err: + return -EINVAL; +} + +void __init acpi_numa_arch_fixup(void) {} + +int __init arm64_acpi_numa_init(void) +{ + int ret; + + ret = acpi_numa_init(); + if (ret < 0) + return ret; + return srat_disabled() ? -EINVAL : 0; +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2ba96b2..d5bd782 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -414,6 +414,8 @@ void __init setup_arch(char **cmdline_p) acpi_smp_init_cpus(); }
+ build_cpu_to_node_map(); + #ifdef CONFIG_SMP smp_build_mpidr_hash(); #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index dbe76a3..7bead6a 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,7 @@ * along with this program. If not, see http://www.gnu.org/licenses/. */
+#include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> @@ -31,7 +32,9 @@ #include <linux/topology.h> #include <linux/of.h> #include <linux/of_fdt.h> + #include <asm/smp_plat.h> +#include <asm/acpi.h>
int __initdata numa_off; nodemask_t numa_nodes_parsed __initdata; @@ -39,14 +42,10 @@ static int numa_distance_cnt; static u8 *numa_distance;
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; - -static struct { - u32 node_id; - u64 cpu_hwid; -}cpu_info[NR_CPUS]; - EXPORT_SYMBOL(node_data);
+struct node_cpu_hwid node_cpuid[NR_CPUS]; + static struct numa_meminfo numa_meminfo;
static __init int numa_setup(char *opt) @@ -85,7 +84,6 @@ const struct cpumask *cpumask_of_node(int node) } EXPORT_SYMBOL(cpumask_of_node);
- int cpu_to_node_map[NR_CPUS]; EXPORT_SYMBOL(cpu_to_node_map);
@@ -94,6 +92,46 @@ void numa_clear_node(int cpu) cpu_to_node_map[cpu] = NUMA_NO_NODE; }
+void map_cpu_to_node(int cpu, int nid) +{ + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + + if (!node_online(nid)) + nid = first_online_node; /* FIXME: find nearest node instead */ + + cpu_to_node_map[cpu] = nid; + cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); + set_numa_node(nid); +} + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI or DT. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for (node = 0; node < MAX_NUMNODES; node++) + cpumask_clear(node_to_cpumask_map[node]); + + for_each_possible_cpu(cpu) { + node = NUMA_NO_NODE; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_logical_map(cpu) == node_cpuid[i].cpu_hwid) { + node = node_cpuid[i].node_id; + break; + } + } + map_cpu_to_node(cpu, node); + } +} + /* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. @@ -122,9 +160,11 @@ void __init setup_node_to_cpumask_map(void) */ void numa_store_cpu_info(cpu) { - cpu_to_node_map[cpu] = cpu_info[cpu].node_id; + if (!acpi_disabled) /* TODO: should be updated with new patches */ + return; + cpu_to_node_map[cpu] = node_cpuid[cpu].node_id; /* mapping of MPIDR/hwid, node and logical id */ - cpu_info[cpu].cpu_hwid = cpu_logical_map(cpu); + node_cpuid[cpu].cpu_hwid = cpu_logical_map(cpu); cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node_map[cpu]]); set_numa_node(cpu_to_node_map[cpu]); set_numa_mem(local_memory_node(cpu_to_node_map[cpu])); @@ -576,7 +616,7 @@ int __init early_init_dt_scan_numa_map(unsigned long node, const char *uname, cpue = dt_mem_next_cell(nr_size_cells, &numa_prop); node_id = dt_mem_next_cell(nr_size_cells, &numa_prop); for (j = cpus; j <= cpue; j++) - cpu_info[j].node_id = node_id; + node_cpuid[j].node_id = node_id; pr_debug("NUMA-DT: start cpu = %d end cpu = %d node-id %d\n", cpus, cpue, node_id); } @@ -608,6 +648,9 @@ int __init early_init_dt_scan_numa_map(unsigned long node, const char *uname, /* DT node mapping is done already early_init_dt_scan_memory */ static inline int __init arm64_dt_numa_init(void) { + if (!IS_ENABLED(CONFIG_ARM64_DT_NUMA)) + return -ENXIO; + return of_scan_flat_dt(early_init_dt_scan_numa_map, NULL); }
@@ -621,10 +664,10 @@ static inline int __init arm64_dt_numa_init(void) void __init arm64_numa_init(void) { if (!numa_off) { -#ifdef CONFIG_ARM64_DT_NUMA - if (!numa_init(arm64_dt_numa_init)) + if (acpi_disabled && !numa_init(arm64_dt_numa_init)) + return; + else if (!numa_init(arm64_acpi_numa_init)) return; -#endif }
numa_init(dummy_numa_init); diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 5c84e0d..b12fc7a 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -175,6 +175,11 @@ exit: return apic_id; }
+int acpi_get_phys_id_in_madt(int type, u32 acpi_id) +{ + return map_madt_entry(type, acpi_id); +} + int acpi_get_apicid(acpi_handle handle, int type, u32 acpi_id) { int apic_id; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 9b9b6f2..d88c985 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -317,6 +317,7 @@ void acpi_processor_set_pdc(acpi_handle handle); int acpi_get_apicid(acpi_handle, int type, u32 acpi_id); int acpi_map_cpuid(int apic_id, u32 acpi_id); int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id); +int acpi_get_phys_id_in_madt(int type, u32 acpi_id);
/* in processor_throttling.c */ int acpi_processor_tstate_has_changed(struct acpi_processor *pr);