Introduce a new file to hold ACPI based NUMA infomation parsing from SRAT and SLIT.
SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory ranges to Proximity Domain mapping; SLIT has the information of distance of NUMA node, so parse those two tables and get the mapping of NUMA node to CPU and memory.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org --- arch/arm64/include/asm/acpi.h | 6 ++ arch/arm64/include/asm/numa.h | 13 ++- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/acpi_numa.c | 208 ++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + arch/arm64/mm/numa.c | 55 ++++++++++- 6 files changed, 281 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/kernel/acpi_numa.c
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index a720a61..86567b1 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -93,4 +93,10 @@ static inline bool acpi_psci_use_hvc(void) { return false; } static inline void acpi_init_cpus(void) { } #endif /* CONFIG_ACPI */
+#ifdef CONFIG_ACPI_NUMA +int arm64_acpi_numa_init(void); +#else +static inline int arm64_acpi_numa_init(void) { return -ENODEV; } +#endif /* CONFIG_ACPI_NUMA */ + #endif /*_ASM_ACPI_H*/ diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 0962075..09697aa 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -1,5 +1,5 @@ -#ifndef _ASM_ARM64_NUMA_H -#define _ASM_ARM64_NUMA_H +#ifndef _ASM_NUMA_H +#define _ASM_NUMA_H
#include <linux/nodemask.h> #include <asm/topology.h> @@ -21,6 +21,9 @@ struct __node_cpu_hwid { u64 cpu_hwid; /* MPIDR for this CPU */ };
+extern struct __node_cpu_hwid node_cpu_hwid[NR_CPUS]; +extern nodemask_t numa_nodes_parsed __initdata; + const struct cpumask *cpumask_of_node(int node); /* Mappings between node number and cpus on that node. */ extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; @@ -32,11 +35,17 @@ void numa_set_node(int cpu, int node); void numa_clear_node(int cpu); void numa_add_cpu(int cpu); void numa_remove_cpu(int cpu); +void __init build_cpu_to_node_map(void); void __init numa_set_distance(int from, int to, int distance); int dt_get_cpu_node_id(int cpu); int __init arm64_dt_numa_init(void); #else /* CONFIG_NUMA */ static inline void numa_store_cpu_info(int cpu) { } static inline void arm64_numa_init(void) { } +static inline void build_cpu_to_node_map(void) { } +static inline void numa_set_distance(int from, int to, int distance) +{ + return; +} #endif /* CONFIG_NUMA */ #endif /* _ASM_ARM64_NUMA_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2d98872..6530dc0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -36,6 +36,7 @@ arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o acpi_sleep.o arm64-obj-$(CONFIG_ARM64_DT_NUMA) += dt_numa.o +arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c new file mode 100644 index 0000000..aba86dc --- /dev/null +++ b/arch/arm64/kernel/acpi_numa.c @@ -0,0 +1,208 @@ +/* + * ACPI 5.1 based NUMA setup for ARM64 + * Lots of code was borrowed from arch/x86/mm/srat.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2014, Linaro Ltd. + * Author: Hanjun Guo hanjun.guo@linaro.org + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: SRAT: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/bootmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <acpi/processor.h> +#include <asm/numa.h> + +int acpi_numa __initdata; + +static __init int setup_node(int pxm) +{ + return acpi_map_pxm_to_node(pxm); +} + +static __init void bad_srat(void) +{ + pr_err("SRAT not used.\n"); + acpi_numa = -1; +} + +static __init inline int srat_disabled(void) +{ + return acpi_numa < 0; +} + +/* + * Callback for SLIT parsing. + * It will get the distance information presented by SLIT + * and init the distance matrix of numa nodes + */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ + int i, j; + + for (i = 0; i < slit->locality_count; i++) { + const int from_node = pxm_to_node(i); + + if (from_node == NUMA_NO_NODE) + continue; + + for (j = 0; j < slit->locality_count; j++) { + const int to_node = pxm_to_node(j); + + if (to_node == NUMA_NO_NODE) + continue; + + numa_set_distance(from_node, to_node, + slit->entry[slit->locality_count * i + j]); + } + } +} + +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr) +{ + unsigned long madt_end, entry; + struct acpi_table_madt *madt; + acpi_size tbl_size; + + if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0, + (struct acpi_table_header **)&madt, &tbl_size))) + return -ENODEV; + + entry = (unsigned long)madt; + madt_end = entry + madt->header.length; + + /* Parse all entries looking for a match. */ + entry += sizeof(struct acpi_table_madt); + while (entry + sizeof(struct acpi_subtable_header) < madt_end) { + struct acpi_subtable_header *header = + (struct acpi_subtable_header *)entry; + + if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) { + struct acpi_madt_generic_interrupt *gicc = + container_of(header, + struct acpi_madt_generic_interrupt, header); + + if ((gicc->flags & ACPI_MADT_ENABLED) && + (gicc->uid == acpi_id)) { + *mpidr = gicc->arm_mpidr; + early_acpi_os_unmap_memory(madt, tbl_size); + return 0; + } + } + entry += header->length; + } + + early_acpi_os_unmap_memory(madt, tbl_size); + return -ENODEV; +} + +/* Callback for Proximity Domain -> ACPI processor UID mapping */ +void __init +acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) +{ + int pxm, node; + u64 mpidr; + static int cpus_in_srat; + + if (srat_disabled()) + return; + if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) { + bad_srat(); + return; + } + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return; + + if (cpus_in_srat >= ARRAY_SIZE(node_cpu_hwid)) { + pr_warn_once("node_cpu_hwid[%ld] is too small, may not be able to use all cpus\n", + ARRAY_SIZE(node_cpu_hwid)); + return; + } + + pxm = pa->proximity_domain; + node = setup_node(pxm); + if (node < 0) { + pr_err("Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) { + pr_warn("PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", + pxm, pa->acpi_processor_uid); + bad_srat(); + return; + } + + node_cpu_hwid[cpus_in_srat].node_id = node; + node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr; + node_set(node, numa_nodes_parsed); + acpi_numa = 1; + cpus_in_srat++; + pr_info("PXM %d -> MPIDR 0x%Lx -> Node %d\n", pxm, mpidr, node); +} + +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ +int __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) +{ + u64 start, end; + int node, pxm; + + if (srat_disabled()) + goto out_err; + if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) + goto out_err_bad_srat; + + start = ma->base_address; + end = start + ma->length; + pxm = ma->proximity_domain; + + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains.\n"); + goto out_err_bad_srat; + } + + if (numa_add_memblk(node, start, (end - start)) < 0) + goto out_err_bad_srat; + + node_set(node, numa_nodes_parsed); + + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", + node, pxm, + (unsigned long long) start, (unsigned long long) end - 1); + + return 0; +out_err_bad_srat: + bad_srat(); +out_err: + return -EINVAL; +} + +void __init acpi_numa_arch_fixup(void) {} + +int __init arm64_acpi_numa_init(void) +{ + int ret; + + ret = acpi_numa_init(); + if (ret < 0) + return ret; + return srat_disabled() ? -EINVAL : 0; +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index e69b532..f291f90 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,6 +402,8 @@ void __init setup_arch(char **cmdline_p) acpi_init_cpus(); }
+ build_cpu_to_node_map(); + #ifdef CONFIG_SMP smp_build_mpidr_hash(); #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index da1d301..071b175 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,7 @@ * along with this program. If not, see http://www.gnu.org/licenses/. */
+#include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> @@ -30,7 +31,9 @@ #include <linux/sched.h> #include <linux/topology.h> #include <linux/of.h> + #include <asm/smp_plat.h> +#include <asm/acpi.h>
int __initdata numa_off; nodemask_t numa_nodes_parsed __initdata; @@ -40,7 +43,7 @@ static u8 *numa_distance; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data);
-static struct __node_cpu_hwid node_cpu_hwid[NR_CPUS]; +struct __node_cpu_hwid node_cpu_hwid[NR_CPUS]; static struct numa_meminfo numa_meminfo;
static __init int numa_setup(char *opt) @@ -58,6 +61,9 @@ early_param("numa", numa_setup); cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map);
+int cpu_to_node_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_to_node_map); + /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ @@ -84,6 +90,46 @@ void numa_clear_node(int cpu) node_cpu_hwid[cpu].node_id = NUMA_NO_NODE; }
+void map_cpu_to_node(int cpu, int nid) +{ + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + + if (!node_online(nid)) + nid = first_online_node; /* FIXME: find nearest node instead */ + + cpu_to_node_map[cpu] = nid; + cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); + set_numa_node(nid); +} + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI or DT. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for (node = 0; node < MAX_NUMNODES; node++) + cpumask_clear(node_to_cpumask_map[node]); + + for_each_possible_cpu(cpu) { + node = NUMA_NO_NODE; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_logical_map(cpu) == node_cpu_hwid[i].cpu_hwid) { + node = node_cpu_hwid[i].node_id; + break; + } + } + map_cpu_to_node(cpu, node); + } +} + /* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. @@ -112,6 +158,9 @@ void __init setup_node_to_cpumask_map(void) */ void numa_store_cpu_info(int cpu) { + if (!acpi_disabled) /* TODO: should be updated with new patches */ + return; + if (IS_ENABLED(CONFIG_ARM64_DT_NUMA)) node_cpu_hwid[cpu].node_id = dt_get_cpu_node_id(cpu); else @@ -510,8 +559,10 @@ void __init arm64_numa_init(void) { int (*init_func)(void) = NULL;
- if (IS_ENABLED(CONFIG_ARM64_DT_NUMA)) + if (acpi_disabled && IS_ENABLED(CONFIG_ARM64_DT_NUMA)) init_func = arm64_dt_numa_init; + else + init_func = arm64_acpi_numa_init;
if (!numa_off && init_func) { if (!numa_init(init_func))