Re: [RFC][PATCH v5 01/14] sched: add a new arch_sd_local_flags for sched_domain init

6 Nov 2013

On 5 November 2013 23:27, Peter Zijlstra peterz@infradead.org wrote:
...
On Tue, Nov 05, 2013 at 03:57:23PM +0100, Vincent Guittot wrote:
...
Your proposal looks fine for me. It's clearly better to move in one
place the configuration of sched_domain fields. Have you already got
an idea about how to let architecture override the topology?
Maybe something like the below -- completely untested (my s390 compiler
is on a machine that's currently powered off).
...
My primary need comes from the fact that the topology configuration is
not the same for all cores
Do expand.. the various cpu masks used in the topology list are per cpu,
is that sufficient room to wriggle or do you need more?
My current implementation sets a flag in each level (SMT, MC and CPU)
to describe the power gating capabilities for the groups of cpus but
the capabilities can be different for a same level; I mean that we can
have a group of cpus that can power gate at MC level in the system
whereas another group of CPUs can only power gate at CPU level. With
the current implementation i can't make the difference so i have added
the cpu parameters when setting the flags.
The other solution is to add new topology levels with cpu masks that
can give the power dependency with other (currently the power gating
but we can have more level for frequency dependency as an example). In
this case the current implementation is enough and the main difficulty
will be the place where we can insert these new levels compared to
current ones.
A typical example with one cluster that can power gate at core level
whereas the other cluster can power gate at cluster level, will give
the following domain topology:
If we set a flag in the current topology levels we should have
something like below
CPU0:
domain 0: span 0-1 level: SMT flags: SD_SHARE_CPUPOWER |
SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN
    groups: 0 1
domain 1: span 0-7 level: MC flags: SD_SHARE_PKG_RESOURCES
    groups: 0-1 2-3 4-5 6-7
domain 2: span 0-15 level: CPU flags:
    groups: 0-7 8-15
CPU8
domain 0: span 8-9 level: SMT flags: SD_SHARE_CPUPOWER |
SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN
    groups: 8 9
domain 1: span 8-15 level: MC flags: SD_SHARE_PKG_RESOURCES |
SD_SHARE_POWERDOMAIN
    groups: 8-9 10-11 12-13 14-15
domain 2: span 0-15 level CPU flags:
    groups: 8-15 0-7
If we create new levels, we could have something like below
CPU0
domain 0: span 0-1 level: SMT flags: SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES
    groups: 0 1
domain 1: span 0-7 level: MC flags: SD_SHARE_PKG_RESOURCES
    groups: 0-1 2-3 4-5 6-7
domain 2: span 0-15 level PWR flags  SD_NOT_SHARE_POWERDOMAIN
    groups: 0-1 2-3 4-5 6-7 8-15
domain 3: span 0-15 level: CPU flags:
    groups: 0-7 8-15
CPU8
domain 0: span 8-9 level: SMT flags: SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES
    groups: 8 9
domain 1: span 8-15 level: MC flags: SD_SHARE_PKG_RESOURCES
    groups: 8-9 10-11 12-13 14-15
domain 2: span 0-15 level PWR flags  SD_NOT_SHARE_POWERDOMAIN
    groups: 0-1 2-3 4-5 6-7 8-15
domain 3: span 0-15 level CPU flags:
    groups: 8-15 0-7
Vincent
...

--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1070,3 +1070,23 @@ static int __init s390_smp_init(void)
        return 0;
 }
 subsys_initcall(s390_smp_init);



+static struct sched_domain_topology_level s390_topology[] = {
+#ifdef CONFIG_SCHED_SMT

  { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES },



+#endif
+#ifdef CONFIG_SCHED_MC

  { cpu_coregroup_mask, SD_SHARE_PKG_RESOURCES },



+#endif
+#ifdef CONFIG_SCHED_BOOK

  { cpu_book_mask, },



+#endif

  { cpu_cpu_mask, },


  { NULL, },



+};



+static int __init s390_sched_topology(void)
+{

  sched_domain_topology = s390_topology;



+}
+early_initcall(s390_sched_topology);
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -889,6 +889,20 @@ void free_sched_domains(cpumask_var_t do
bool cpus_share_cache(int this_cpu, int that_cpu);
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);



+#define SDTL_OVERLAP   0x01



+struct sched_domain_topology_level {

  sched_domain_mask_f mask;


  int                 sd_flags;


  int                 flags;


  int                 numa_level;


  struct sd_data      data;



+};



+extern struct sched_domain_topology_level *sched_domain_topology;



#else /* CONFIG_SMP */
struct sched_domain_attr;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5377,20 +5377,6 @@ enum s_alloc {
        sa_none,
 };
-struct sched_domain_topology_level;



-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);



-#define SDTL_OVERLAP   0x01



-struct sched_domain_topology_level {

  sched_domain_mask_f mask;


  int                 sd_flags;


  int                 flags;


  int                 numa_level;


  struct sd_data      data;



-};



/*

Build an iteration mask that can exclude certain CPUs from the upwards
domain traversal.

@@ -5841,6 +5827,7 @@ sd_init(struct sched_domain_topology_lev
    return sd;

}



/*

Topology list, bottom-up.

*/
@@ -5851,14 +5838,11 @@ static struct sched_domain_topology_leve
 #ifdef CONFIG_SCHED_MC
        { cpu_coregroup_mask, SD_SHARE_PKG_RESOURCES },
 #endif
-#ifdef CONFIG_SCHED_BOOK

  { cpu_book_mask, },



-#endif
        { cpu_cpu_mask, },
        { NULL, },
 };
-static struct sched_domain_topology_level *sched_domain_topology = default_topology;
+struct sched_domain_topology_level *sched_domain_topology = default_topology;
#define for_each_sd_topology(tl)                       \
        for (tl = sched_domain_topology; tl->mask; tl++)

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Re: [RFC][PATCH v5 01/14] sched: add a new arch_sd_local_flags for sched_domain init