Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org --- arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP
If you don't know what to do here, say N.
+config ARM64_CPU_TOPOLOGY + bool "Support cpu topology definition" + depends on SMP && ARM64 + default y + help + Support ARM64 cpu topology definition. The MPIDR register defines + affinity between processors which is then used to describe the cpu + topology of an ARM64 System. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on ARM64_CPU_TOPOLOGY + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_SMT + bool "SMT scheduler support" + depends on ARM64_CPU_TOPOLOGY + help + Improves the CPU scheduler's decision making when dealing with + MultiThreading at a cost of slightly increased overhead in some + places. If unsure say N here. + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@
#define MPIDR_HWID_BITMASK 0xff00ffffff
+#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24) + +#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK) + #define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \ diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H
+#ifdef CONFIG_ARM64_CPU_TOPOLOGY + +#include <linux/cpumask.h> + +struct cputopo_arm64 { + int thread_id; + int core_id; + int socket_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology); + +#define cpu_topo(cpu) per_cpu(cpu_topology, cpu) + +#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling) + +#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot);
+#else + +static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } + +#endif + #include <asm-generic/topology.h>
#endif /* _ASM_ARM64_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o
obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso - -# Hack for now -obj-y += topology.o - diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
+ store_cpu_topology(cpu); + /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online @@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
+ init_cpu_topology(); + store_cpu_topology(smp_processor_id()); + /* * are we trying to boot more cores than exist? */ diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Hanjun Guo + * + * based on arch/arm/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h> @@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h>
+DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topo(cpu).core_sibling; +} + +void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid); + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + topo = &cpu_topo(cpu); + + if (cpuid_topo->socket_id != topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + +/* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, + * which prevents simultaneous write access to cpu_topology array + */ +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid); + u64 mpidr; + + /* If the cpu topology has been already set, just return */ + if (cpuid_topo->core_id != -1) + return; + + mpidr = read_cpuid_mpidr(); + + /* create cpu topology mapping */ + if (!(mpidr & MPIDR_SMP_BITMASK)) { + /* + * This is a multiprocessor system + * multiprocessor format & multiprocessor mode field are set + */ + + if (mpidr & MPIDR_MT_BITMASK) { + /* core performance interdependency */ + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr); + } else { + /* largely independent cores */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr); + } + } else { + /* + * This is an uniprocessor system + * we are in multiprocessor format but uniprocessor system + * or in the old uniprocessor format + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = 0; + cpuid_topo->socket_id = -1; + } + + update_siblings_masks(cpuid); + + pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n", + cpuid, cpu_topo(cpuid).thread_id, + cpu_topo(cpuid).core_id, + cpu_topo(cpuid).socket_id, mpidr); +} + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask */ + for_each_possible_cpu(cpu) { + struct cputopo_arm64 *topo = &cpu_topo(cpu); + + topo->thread_id = -1; + topo->core_id = -1; + topo->socket_id = -1; + cpumask_clear(&topo->core_sibling); + cpumask_clear(&topo->thread_sibling); + } + smp_wmb(); +} + void arch_fix_phys_package_id(int num, u32 slot) { }
Hi Graeme,
I modified my patch for cpu topology, and tested the patch on armv8 foundation model,
It works fine, here is the boot log for cpu topology: [...] CPU0: thread -1, cpu 0, socket 0, mpidr 0x80000000 hw perfevents: enabled with arm/armv8-pmuv3 PMU driver, 9 counters available CPU1: Booted secondary processor CPU1: thread -1, cpu 1, socket 0, mpidr 0x80000001 CPU2: Booted secondary processor CPU2: thread -1, cpu 2, socket 0, mpidr 0x80000002 CPU3: Booted secondary processor CPU3: thread -1, cpu 3, socket 0, mpidr 0x80000003 Brought up 4 CPUs [...]
and cup topology shows in sys is correct too:
root@genericarmv8:/sys/devices/system/cpu/cpu2/topology# ls core_id core_siblings_list thread_siblings core_siblings physical_package_id thread_siblings_list root@genericarmv8:/sys/devices/system/cpu/cpu2/topology# cat * 2 f 0-3 0 4 2
Before a better solution for cpu topology(ACPI or FDT), could you accept this patch first?
Thanks Hanjun
On 2013-7-31 12:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
#define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \ diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
#include <asm-generic/topology.h> #endif /* _ASM_ARM64_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
- /*
- OK, now it's safe to let the boot CPU continue. Wait for
- the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
- /*
*/
- are we trying to boot more cores than exist?
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
#include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h> @@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
void arch_fix_phys_package_id(int num, u32 slot) { }
On 31/07/13 10:14, Hanjun Guo wrote:
Hi Graeme,
I modified my patch for cpu topology, and tested the patch on armv8 foundation model,
It works fine, here is the boot log for cpu topology: [...] CPU0: thread -1, cpu 0, socket 0, mpidr 0x80000000 hw perfevents: enabled with arm/armv8-pmuv3 PMU driver, 9 counters available CPU1: Booted secondary processor CPU1: thread -1, cpu 1, socket 0, mpidr 0x80000001 CPU2: Booted secondary processor CPU2: thread -1, cpu 2, socket 0, mpidr 0x80000002 CPU3: Booted secondary processor CPU3: thread -1, cpu 3, socket 0, mpidr 0x80000003 Brought up 4 CPUs [...]
and cup topology shows in sys is correct too:
root@genericarmv8:/sys/devices/system/cpu/cpu2/topology# ls core_id core_siblings_list thread_siblings core_siblings physical_package_id thread_siblings_list root@genericarmv8:/sys/devices/system/cpu/cpu2/topology# cat * 2 f 0-3 0 4 2
Before a better solution for cpu topology(ACPI or FDT), could you accept this patch first?
Looks good to me, I have a few other things to do first today but Ill pull it in later or tomorrow.
Thanks for the patch.
Graeme
Thanks Hanjun
On 2013-7-31 12:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
- config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
- #define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
- #include <asm-generic/topology.h>
#endif /* _ASM_ARM64_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
- /*
- OK, now it's safe to let the boot CPU continue. Wait for
- the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
- /*
*/
- are we trying to boot more cores than exist?
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
- #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h>
@@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
- void arch_fix_phys_package_id(int num, u32 slot) { }
Hi Graeme,
I modified my patch for cpu topology, and tested the patch on armv8 foundation model,
It works fine, here is the boot log for cpu topology: [...] CPU0: thread -1, cpu 0, socket 0, mpidr 0x80000000 hw perfevents: enabled with arm/armv8-pmuv3 PMU driver, 9 counters available CPU1: Booted secondary processor CPU1: thread -1, cpu 1, socket 0, mpidr 0x80000001 CPU2: Booted secondary processor CPU2: thread -1, cpu 2, socket 0, mpidr 0x80000002 CPU3: Booted secondary processor CPU3: thread -1, cpu 3, socket 0, mpidr 0x80000003 Brought up 4 CPUs [...]
and cup topology shows in sys is correct too:
root@genericarmv8:/sys/devices/system/cpu/cpu2/topology# ls core_id core_siblings_list thread_siblings core_siblings physical_package_id thread_siblings_list root@genericarmv8:/sys/devices/system/cpu/cpu2/topology# cat * 2 f 0-3 0 4 2
Before a better solution for cpu topology(ACPI or FDT), could you accept this patch first?
Thanks Hanjun
On 2013-7-31 12:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
#define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \ diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
#include <asm-generic/topology.h> #endif /* _ASM_ARM64_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
- /*
- OK, now it's safe to let the boot CPU continue. Wait for
- the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
- /*
*/
- are we trying to boot more cores than exist?
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
#include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h> @@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
void arch_fix_phys_package_id(int num, u32 slot) { }
Hi Hanjun,
This patch does not apply to the acpi branch, I think it is probably missing some other patches you have created.
Could you put it with any other patches required as a series I can apply?
Thanks
Graeme
On 31/07/13 05:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
- config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
- #define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
- #include <asm-generic/topology.h>
#endif /* _ASM_ARM64_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
- /*
- OK, now it's safe to let the boot CPU continue. Wait for
- the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
- /*
*/
- are we trying to boot more cores than exist?
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
- #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h>
@@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
- void arch_fix_phys_package_id(int num, u32 slot) { }
On 2013-8-2 1:14, Graeme Gregory wrote:
Hi Hanjun,
This patch does not apply to the acpi branch, I think it is probably missing some other patches you have created.
Could you put it with any other patches required as a series I can apply?
This patch based on the acpi branch but not the updated (3.10 based), my bad. I will rebase my patch and send it out.
Thanks
Graeme
On 31/07/13 05:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
- config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
- #define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
- #include <asm-generic/topology.h> #endif /* _ASM_ARM64_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
/* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
/* * are we trying to boot more cores than exist? */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
- #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h>
@@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
- void arch_fix_phys_package_id(int num, u32 slot) { }
Linaro-acpi mailing list Linaro-acpi@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-acpi
Hi Hanjun,
I managed to apply this patch by hand, but foundation model does not boot with it.
Thanks
Graeme
Initializing cgroup subsys cpu Linux version 3.11.0-rc3+ (graeme@xora-build) (gcc version 4.7.3 20121001 (prerelease) [gcc-linaro/4.7 revision 115029] (crosstool-NG linaro-1.13.1+bzr2506 - ARM aarch64 via Linaro) ) #234 SMP Fri Aug 2 09:52:56 BST 2013 CPU: AArch64 Processor [410fd000] revision 0 Machine: Foundation-v8A bootconsole [earlycon0] enabled ACPI: RSDP 0000000088100008 00024 (v02 LINARO) ACPI: XSDT 000000008810002c 000C4 (v01 LINARO FOUNDATI 00000014 INTL 20130517) ACPI: FACP 00000000881000f0 0010C (v05 LINARO FOUNDATI 00000000 INTL 20130517) ACPI: DSDT 00000000881001fc 003CB (v01 LINARO FOUNDATI 00000002 INTL 20130517) ACPI: MSCT 0000000088100607 00090 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: MCHI 0000000088100697 00045 (v01 LINARO FOUNDATI 02000715 INTL 20130517) ACPI: FPDT 00000000881006dc 00064 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: ERST 0000000088100740 00230 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: EINJ 0000000088100970 00130 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: CPEP 0000000088100aa0 00034 (v01 LINARO FOUNDATI 00000000 INTL 20130517) ACPI: UEFI 0000000088100ad4 00036 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: SRAT 0000000088100b0a 00080 (v03 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: SPMI 0000000088100b8a 00041 (v04 LINARO FOUNDATI 00000000 INTL 20130517) ACPI: SLIT 0000000088100bcb 001BC (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: OEM0 0000000088100d87 00024 (v01 LINARO FOUNDATI 0000000A INTL 20130517) ACPI: MPST 0000000088100dab 000B6 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: MCFG 0000000088100e61 0003C (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: APIC 0000000088100e9d 00184 (v03 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: HEST 0000000088101021 001D4 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: GTDT 00000000881011f5 00050 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: ECDT 0000000088101245 00042 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: BERT 0000000088101287 00030 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: SSDT 00000000881012b7 00024 (v02 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: Local APIC address 0x2c002000 ACPI: Local APIC address 0x2c002000 ACPI: GIC (acpi_id[0x0000] gic_id[0x0000] enabled) ACPI: GIC (acpi_id[0x0001] gic_id[0x0001] enabled) ACPI: GIC (acpi_id[0x0002] gic_id[0x0002] enabled) ACPI: GIC (acpi_id[0x0003] gic_id[0x0003] enabled) ACPI: GIC (acpi_id[0x0004] gic_id[0x0004] disabled) ACPI: GIC (acpi_id[0x0005] gic_id[0x0005] disabled) ACPI: GIC (acpi_id[0x0006] gic_id[0x0006] disabled) ACPI: GIC (acpi_id[0x0007] gic_id[0x0007] disabled) 4 CPUs available, 8 CPUs total ACPI: GIC Distributor (id[0x0000] address[0x2c001000] gsi_base[0]) Using ACPI (MADT) for SMP configuration information SMP: the system is limited to 4 CPUs SMP: Allowing 4 CPUs, 0 hotplug CPUs PERCPU: Embedded 10 pages/cpu @ffffffc87ffaa000 s11520 r8192 d21248 u40960 Built 1 zonelists in Zone order, mobility grouping on. Total pages: 1034240 Kernel command line: root=/dev/vda2 consolelog=9 rw console=ttyAMA0 earlyprintk=pl011,0x1c090000 debug loglevel=7 PID hash table entries: 4096 (order: 3, 32768 bytes) Dentry cache hash table entries: 524288 (order: 10, 4194304 bytes) Inode-cache hash table entries: 262144 (order: 9, 2097152 bytes) software IO TLB [mem 0x8f8000000-0x8fc000000] (64MB) mapped at [ffffffc878000000-ffffffc87bffffff] Memory: 4059132K/4194304K available (3495K kernel code, 209K rwdata, 1248K rodata, 187K init, 153K bss, 135172K reserved) Virtual kernel memory layout: vmalloc : 0xffffff8000000000 - 0xffffffbbffff0000 (245759 MB) vmemmap : 0xffffffbc01c00000 - 0xffffffbc1f800000 ( 476 MB) modules : 0xffffffbffc000000 - 0xffffffc000000000 ( 64 MB) memory : 0xffffffc000000000 - 0xffffffc880000000 ( 34816 MB) .init : 0xffffffc000523000 - 0xffffffc000551d00 ( 188 kB) .text : 0xffffffc000080000 - 0xffffffc000522d00 ( 4748 kB) .data : 0xffffffc000554000 - 0xffffffc0005887a0 ( 210 kB) SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1 Hierarchical RCU implementation. NR_IRQS:64 nr_irqs:64 0 Architected local timer running at 100.00MHz (phys). Console: colour dummy device 80x25 Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=1000000) pid_max: default: 32768 minimum: 301 Mount-cache hash table entries: 256 ACPI: Core revision 20130517 ACPI: All ACPI Tables successfully acquired CPU0: thread -1, cpu 0, socket 0, mpidr 0x80000000 hw perfevents: enabled with arm/armv8-pmuv3 PMU driver, 9 counters available CPU1: Booted secondary processor CPU1: thread -1, cpu 1, socket 0, mpidr 0x80000001 BUG: failure at kernel/smpboot.c:134/smpboot_thread_fn()! Kernel panic - not syncing: BUG! CPU: 0 PID: 11 Comm: ksoftirqd/1 Not tainted 3.11.0-rc3+ #234 Call trace: [<ffffffc000087064>] dump_backtrace+0x0/0x12c [<ffffffc0000871a4>] show_stack+0x14/0x1c [<ffffffc0003de54c>] dump_stack+0x70/0x8c [<ffffffc0003db7b4>] panic+0xe8/0x214 [<ffffffc0000ba96c>] smpboot_thread_fn+0x1b0/0x1c8 [<ffffffc0000b2f74>] kthread+0xb4/0xbc CPU1: stopping CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.11.0-rc3+ #234 Call trace: [<ffffffc000087064>] dump_backtrace+0x0/0x12c [<ffffffc0000871a4>] show_stack+0x14/0x1c [<ffffffc0003de54c>] dump_stack+0x70/0x8c [<ffffffc00008dcf4>] handle_IPI+0x120/0x130 [<ffffffc00008131c>] gic_handle_irq+0x7c/0x80 Exception stack(0xffffffc877c97e90 to 0xffffffc877c97fb0) 7e80: 00000001 00000000 0058b160 ffffffc0 7ea0: 77c97fd0 ffffffc8 0008da08 ffffffc0 00000000 00000000 0000000a 00000000 7ec0: 00000001 00000000 000bb17c ffffffc0 fffffffa 00000000 00000000 00000000 7ee0: 00000000 00000000 3b9aca00 00000000 0000001d 00000000 00562000 ffffffc0 7f00: 00000001 00000000 00000058 00000000 00002758 00000000 00000000 00000000 7f20: 000000c6 00000000 00000006 00000000 0058f000 ffffffc0 00000000 00000000 7f40: 00000000 00000000 00000001 00000000 0058b160 ffffffc0 0058b158 ffffffc0 7f60: 410fd000 00000000 80545068 00000000 80000000 00000000 8007b000 00000000 7f80: 8007d000 00000000 00080140 ffffffc0 80080128 00000000 77c97fd0 ffffffc8 7fa0: 0008da04 ffffffc0 77c97fd0 ffffffc8 [<ffffffc0000835ac>] el1_irq+0x6c/0xc0
On 31/07/13 05:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
- config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
- #define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
- #include <asm-generic/topology.h>
#endif /* _ASM_ARM64_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
- /*
- OK, now it's safe to let the boot CPU continue. Wait for
- the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
- /*
*/
- are we trying to boot more cores than exist?
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
- #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h>
@@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
- void arch_fix_phys_package_id(int num, u32 slot) { }
On 2013-8-2 16:57, Graeme Gregory wrote:
Hi Hanjun,
I managed to apply this patch by hand, but foundation model does not boot with it.
Thanks for the testing. I tested the patch on 3.10 based acpi branch and it worked as expected, I will check what's happening here.
Thanks
Graeme
Initializing cgroup subsys cpu Linux version 3.11.0-rc3+ (graeme@xora-build) (gcc version 4.7.3 20121001 (prerelease) [gcc-linaro/4.7 revision 115029] (crosstool-NG linaro-1.13.1+bzr2506 - ARM aarch64 via Linaro) ) #234 SMP Fri Aug 2 09:52:56 BST 2013 CPU: AArch64 Processor [410fd000] revision 0 Machine: Foundation-v8A bootconsole [earlycon0] enabled ACPI: RSDP 0000000088100008 00024 (v02 LINARO) ACPI: XSDT 000000008810002c 000C4 (v01 LINARO FOUNDATI 00000014 INTL 20130517) ACPI: FACP 00000000881000f0 0010C (v05 LINARO FOUNDATI 00000000 INTL 20130517) ACPI: DSDT 00000000881001fc 003CB (v01 LINARO FOUNDATI 00000002 INTL 20130517) ACPI: MSCT 0000000088100607 00090 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: MCHI 0000000088100697 00045 (v01 LINARO FOUNDATI 02000715 INTL 20130517) ACPI: FPDT 00000000881006dc 00064 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: ERST 0000000088100740 00230 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: EINJ 0000000088100970 00130 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: CPEP 0000000088100aa0 00034 (v01 LINARO FOUNDATI 00000000 INTL 20130517) ACPI: UEFI 0000000088100ad4 00036 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: SRAT 0000000088100b0a 00080 (v03 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: SPMI 0000000088100b8a 00041 (v04 LINARO FOUNDATI 00000000 INTL 20130517) ACPI: SLIT 0000000088100bcb 001BC (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: OEM0 0000000088100d87 00024 (v01 LINARO FOUNDATI 0000000A INTL 20130517) ACPI: MPST 0000000088100dab 000B6 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: MCFG 0000000088100e61 0003C (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: APIC 0000000088100e9d 00184 (v03 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: HEST 0000000088101021 001D4 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: GTDT 00000000881011f5 00050 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: ECDT 0000000088101245 00042 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: BERT 0000000088101287 00030 (v01 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: SSDT 00000000881012b7 00024 (v02 LINARO FOUNDATI 00000001 INTL 20130517) ACPI: Local APIC address 0x2c002000 ACPI: Local APIC address 0x2c002000 ACPI: GIC (acpi_id[0x0000] gic_id[0x0000] enabled) ACPI: GIC (acpi_id[0x0001] gic_id[0x0001] enabled) ACPI: GIC (acpi_id[0x0002] gic_id[0x0002] enabled) ACPI: GIC (acpi_id[0x0003] gic_id[0x0003] enabled) ACPI: GIC (acpi_id[0x0004] gic_id[0x0004] disabled) ACPI: GIC (acpi_id[0x0005] gic_id[0x0005] disabled) ACPI: GIC (acpi_id[0x0006] gic_id[0x0006] disabled) ACPI: GIC (acpi_id[0x0007] gic_id[0x0007] disabled) 4 CPUs available, 8 CPUs total ACPI: GIC Distributor (id[0x0000] address[0x2c001000] gsi_base[0]) Using ACPI (MADT) for SMP configuration information SMP: the system is limited to 4 CPUs SMP: Allowing 4 CPUs, 0 hotplug CPUs PERCPU: Embedded 10 pages/cpu @ffffffc87ffaa000 s11520 r8192 d21248 u40960 Built 1 zonelists in Zone order, mobility grouping on. Total pages: 1034240 Kernel command line: root=/dev/vda2 consolelog=9 rw console=ttyAMA0 earlyprintk=pl011,0x1c090000 debug loglevel=7 PID hash table entries: 4096 (order: 3, 32768 bytes) Dentry cache hash table entries: 524288 (order: 10, 4194304 bytes) Inode-cache hash table entries: 262144 (order: 9, 2097152 bytes) software IO TLB [mem 0x8f8000000-0x8fc000000] (64MB) mapped at [ffffffc878000000-ffffffc87bffffff] Memory: 4059132K/4194304K available (3495K kernel code, 209K rwdata, 1248K rodata, 187K init, 153K bss, 135172K reserved) Virtual kernel memory layout: vmalloc : 0xffffff8000000000 - 0xffffffbbffff0000 (245759 MB) vmemmap : 0xffffffbc01c00000 - 0xffffffbc1f800000 ( 476 MB) modules : 0xffffffbffc000000 - 0xffffffc000000000 ( 64 MB) memory : 0xffffffc000000000 - 0xffffffc880000000 ( 34816 MB) .init : 0xffffffc000523000 - 0xffffffc000551d00 ( 188 kB) .text : 0xffffffc000080000 - 0xffffffc000522d00 ( 4748 kB) .data : 0xffffffc000554000 - 0xffffffc0005887a0 ( 210 kB) SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1 Hierarchical RCU implementation. NR_IRQS:64 nr_irqs:64 0 Architected local timer running at 100.00MHz (phys). Console: colour dummy device 80x25 Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=1000000) pid_max: default: 32768 minimum: 301 Mount-cache hash table entries: 256 ACPI: Core revision 20130517 ACPI: All ACPI Tables successfully acquired CPU0: thread -1, cpu 0, socket 0, mpidr 0x80000000 hw perfevents: enabled with arm/armv8-pmuv3 PMU driver, 9 counters available CPU1: Booted secondary processor CPU1: thread -1, cpu 1, socket 0, mpidr 0x80000001 BUG: failure at kernel/smpboot.c:134/smpboot_thread_fn()! Kernel panic - not syncing: BUG! CPU: 0 PID: 11 Comm: ksoftirqd/1 Not tainted 3.11.0-rc3+ #234 Call trace: [<ffffffc000087064>] dump_backtrace+0x0/0x12c [<ffffffc0000871a4>] show_stack+0x14/0x1c [<ffffffc0003de54c>] dump_stack+0x70/0x8c [<ffffffc0003db7b4>] panic+0xe8/0x214 [<ffffffc0000ba96c>] smpboot_thread_fn+0x1b0/0x1c8 [<ffffffc0000b2f74>] kthread+0xb4/0xbc CPU1: stopping CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.11.0-rc3+ #234 Call trace: [<ffffffc000087064>] dump_backtrace+0x0/0x12c [<ffffffc0000871a4>] show_stack+0x14/0x1c [<ffffffc0003de54c>] dump_stack+0x70/0x8c [<ffffffc00008dcf4>] handle_IPI+0x120/0x130 [<ffffffc00008131c>] gic_handle_irq+0x7c/0x80 Exception stack(0xffffffc877c97e90 to 0xffffffc877c97fb0) 7e80: 00000001 00000000 0058b160 ffffffc0 7ea0: 77c97fd0 ffffffc8 0008da08 ffffffc0 00000000 00000000 0000000a 00000000 7ec0: 00000001 00000000 000bb17c ffffffc0 fffffffa 00000000 00000000 00000000 7ee0: 00000000 00000000 3b9aca00 00000000 0000001d 00000000 00562000 ffffffc0 7f00: 00000001 00000000 00000058 00000000 00002758 00000000 00000000 00000000 7f20: 000000c6 00000000 00000006 00000000 0058f000 ffffffc0 00000000 00000000 7f40: 00000000 00000000 00000001 00000000 0058b160 ffffffc0 0058b158 ffffffc0 7f60: 410fd000 00000000 80545068 00000000 80000000 00000000 8007b000 00000000 7f80: 8007d000 00000000 00080140 ffffffc0 80080128 00000000 77c97fd0 ffffffc8 7fa0: 0008da04 ffffffc0 77c97fd0 ffffffc8 [<ffffffc0000835ac>] el1_irq+0x6c/0xc0
On 31/07/13 05:11, Hanjun Guo wrote:
Power aware scheduling needs the cpu topology information to improve the cpu scheduler decision making. sched_mc and sched_smt will use the topology information.
For ARM64, we can get the topology from the MPIDR register which defines the the affinity of processors.
This patch is mainly based on arch/arm/kernel/topology.c written by Vincent Guittot, and replaced the topology array with per cpu variable.
v2: Make the changelog easier to understand. Use 'u64' for MPIDR value on arm64. Some typo fix.
Signed-off-by: Hanjun Guo hanjun.guo@linaro.org
arch/arm64/Kconfig | 25 ++++++++ arch/arm64/include/asm/cputype.h | 11 ++++ arch/arm64/include/asm/topology.h | 35 +++++++++++ arch/arm64/kernel/Makefile | 5 +- arch/arm64/kernel/smp.c | 6 ++ arch/arm64/kernel/topology.c | 120 +++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c967b11..67c1c53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,31 @@ config SMP If you don't know what to do here, say N. +config ARM64_CPU_TOPOLOGY
- bool "Support cpu topology definition"
- depends on SMP && ARM64
- default y
- help
Support ARM64 cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM64 System.
+config SCHED_MC
- bool "Multi-core scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_SMT
- bool "SMT scheduler support"
- depends on ARM64_CPU_TOPOLOGY
- help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
- config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf27494..ef7e682 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -30,6 +30,17 @@ #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_SMP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK) +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
- #define read_cpuid(reg) ({ \ u64 __val; \ asm("mrs %0, " reg : "=r" (__val)); \
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 22c9421..06440d4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,43 @@ #ifndef _ASM_ARM64_TOPOLOGY_H #define _ASM_ARM64_TOPOLOGY_H +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+#include <linux/cpumask.h>
+struct cputopo_arm64 {
- int thread_id;
- int core_id;
- int socket_id;
- cpumask_t thread_sibling;
- cpumask_t core_sibling;
+};
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+#define topology_physical_package_id(cpu) (cpu_topo(cpu).socket_id) +#define topology_core_id(cpu) (cpu_topo(cpu).core_id) +#define topology_core_cpumask(cpu) (&cpu_topo(cpu).core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topo(cpu).thread_sibling)
+#define mc_capable() (cpu_topo(0).socket_id != -1) +#define smt_capable() (cpu_topo(0).thread_id != -1)
+void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); void arch_fix_phys_package_id(int num, u32 slot); +#else
+static inline void arch_fix_phys_package_id(int num, u32 slot) {} +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { }
+#endif
- #include <asm-generic/topology.h> #endif /* _ASM_ARM64_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c5e29d5..a47c359 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) @@ -27,7 +28,3 @@ extra-y := $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(obj)/vdso/vdso-offsets.h: $(obj)/vdso
-# Hack for now -obj-y += topology.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
/* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online
@@ -407,6 +410,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus();
- init_cpu_topology();
- store_cpu_topology(smp_processor_id());
/* * are we trying to boot more cores than exist? */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5c8e69c..947fcc2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -1,3 +1,16 @@ +/*
- arch/arm64/kernel/topology.c
- Copyright (C) 2013 Linaro Limited.
- Written by: Hanjun Guo
- based on arch/arm/kernel/topology.c
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
- */
- #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/export.h>
@@ -13,6 +26,113 @@ #include <asm/topology.h> #include <asm/cpu.h> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+const struct cpumask *cpu_coregroup_mask(int cpu) +{
- return &cpu_topo(cpu).core_sibling;
+}
+void update_siblings_masks(unsigned int cpuid) +{
- struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
- int cpu;
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
topo = &cpu_topo(cpu);
if (cpuid_topo->socket_id != topo->socket_id)
continue;
cpumask_set_cpu(cpuid, &topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != topo->core_id)
continue;
cpumask_set_cpu(cpuid, &topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
- }
- smp_wmb();
+}
+/*
- store_cpu_topology is called at boot when only one cpu is running
- and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
- which prevents simultaneous write access to cpu_topology array
- */
+void store_cpu_topology(unsigned int cpuid) +{
- struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
- u64 mpidr;
- /* If the cpu topology has been already set, just return */
- if (cpuid_topo->core_id != -1)
return;
- mpidr = read_cpuid_mpidr();
- /* create cpu topology mapping */
- if (!(mpidr & MPIDR_SMP_BITMASK)) {
/*
* This is a multiprocessor system
* multiprocessor format & multiprocessor mode field are set
*/
if (mpidr & MPIDR_MT_BITMASK) {
/* core performance interdependency */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
} else {
/* largely independent cores */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
}
- } else {
/*
* This is an uniprocessor system
* we are in multiprocessor format but uniprocessor system
* or in the old uniprocessor format
*/
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = 0;
cpuid_topo->socket_id = -1;
- }
- update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
cpuid, cpu_topo(cpuid).thread_id,
cpu_topo(cpuid).core_id,
cpu_topo(cpuid).socket_id, mpidr);
+}
+/*
- init_cpu_topology is called at boot when only one cpu is running
- which prevent simultaneous write access to cpu_topology array
- */
+void __init init_cpu_topology(void) +{
- unsigned int cpu;
- /* init core mask */
- for_each_possible_cpu(cpu) {
struct cputopo_arm64 *topo = &cpu_topo(cpu);
topo->thread_id = -1;
topo->core_id = -1;
topo->socket_id = -1;
cpumask_clear(&topo->core_sibling);
cpumask_clear(&topo->thread_sibling);
- }
- smp_wmb();
+}
- void arch_fix_phys_package_id(int num, u32 slot) { }
Linaro-acpi mailing list Linaro-acpi@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-acpi
Hi Graeme,
Please have a look at the comments below.
On 2013-8-2 16:57, Graeme Gregory wrote:
Hi Hanjun,
I managed to apply this patch by hand, but foundation model does not boot with it.
[...]
CPU0: thread -1, cpu 0, socket 0, mpidr 0x80000000 hw perfevents: enabled with arm/armv8-pmuv3 PMU driver, 9 counters available CPU1: Booted secondary processor CPU1: thread -1, cpu 1, socket 0, mpidr 0x80000001 BUG: failure at kernel/smpboot.c:134/smpboot_thread_fn()! Kernel panic - not syncing: BUG! CPU: 0 PID: 11 Comm: ksoftirqd/1 Not tainted 3.11.0-rc3+ #234 Call trace: [<ffffffc000087064>] dump_backtrace+0x0/0x12c [<ffffffc0000871a4>] show_stack+0x14/0x1c [<ffffffc0003de54c>] dump_stack+0x70/0x8c [<ffffffc0003db7b4>] panic+0xe8/0x214 [<ffffffc0000ba96c>] smpboot_thread_fn+0x1b0/0x1c8 [<ffffffc0000b2f74>] kthread+0xb4/0xbc
[...]
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5ec1723..36d09fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -206,6 +207,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable();
- store_cpu_topology(cpu);
I tested the patch on ARMv8 foundation model, it booted ok and work as expected if I add store_cpu_topology(cpu) in following two places (function [1]):
a) put it behind local_fiq_enable(); b) put it before set_cpu_online(cpu, true);
it will boot failed in you using "patch -p1 < 0001-xxx.patch", and the store_cpu_topology(cpu) will put just behind complete(&cpu_running);
I booted with the config you provided (config-armv8-3.11), and with MADT in the former acpi git tree.
It is weird, I haven't figure out why, that would be thankful if anybody gives me some hints.
Thanks Hanjun
[1]: asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id();
printk("CPU%u: Booted secondary processor\n", cpu);
/* * All kernel threads share the same mm context; grab a * reference and switch to it. */ atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm));
/* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); flush_tlb_all();
preempt_disable(); trace_hardirqs_off();
/* * Let the primary processor know we're out of the * pen, then head off into the C entry point */ write_pen_release(INVALID_HWID);
/* * Synchronise with the boot thread. */ raw_spin_lock(&boot_lock); raw_spin_unlock(&boot_lock);
/* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online * before we continue. */ set_cpu_online(cpu, true); complete(&cpu_running);
/* * Enable GIC and timers. */ notify_cpu_starting(cpu);
local_irq_enable(); local_fiq_enable();
/* * OK, it's off to the idle thread for us */ cpu_startup_entry(CPUHP_ONLINE); }