To provide nohz_full tick support, there is a set of tick dependency masks that need to be evaluated on every IRQ and context switch. Switching on nohz_full tick support at runtime will be problematic as some of the tick dependency masks may not be properly set causing problem down the road.
Allow nohz_full boot option to be specified without any parameter to force enable nohz_full tick support without any CPU in the tick_nohz_full_mask yet. The context_tracking_key and tick_nohz_full_running flag will be enabled in this case to make tick_nohz_full_enabled() return true.
There is still a small performance overhead by force enable nohz_full this way. So it should only be used if there is a chance that some CPUs may become isolated later via the cpuset isolated partition functionality and better CPU isolation closed to nohz_full is desired.
Signed-off-by: Waiman Long longman@redhat.com --- .../admin-guide/kernel-parameters.txt | 19 ++++++++++++------- include/linux/context_tracking.h | 7 ++++++- kernel/context_tracking.c | 4 +++- kernel/sched/isolation.c | 13 ++++++++++++- kernel/time/tick-sched.c | 11 +++++++++-- 5 files changed, 42 insertions(+), 12 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 747a55abf494..89a8161475b5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4260,15 +4260,20 @@ Valid arguments: on, off Default: on
- nohz_full= [KNL,BOOT,SMP,ISOL] - The argument is a cpu list, as described above. + nohz_full[=cpu-list] + [KNL,BOOT,SMP,ISOL] In kernels built with CONFIG_NO_HZ_FULL=y, set - the specified list of CPUs whose tick will be stopped - whenever possible. The boot CPU will be forced outside - the range to maintain the timekeeping. Any CPUs - in this list will have their RCU callbacks offloaded, + the specified list of CPUs whose tick will be + stopped whenever possible. If the argument is + not specified, nohz_full will be forced enabled + without any CPU in the nohz_full list yet. + The boot CPU will be forced outside the range + to maintain the timekeeping. Any CPUs in this + list will have their RCU callbacks offloaded, just as if they had also been called out in the - rcu_nocbs= boot parameter. + rcu_nocbs= boot parameter. There is no need + to use rcu_nocbs= boot parameter if nohz_full + has been set which will override rcu_nocbs.
Note that this argument takes precedence over the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option. diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index af9fe87a0922..a3fea7f9fef6 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -9,8 +9,13 @@
#include <asm/ptrace.h>
- #ifdef CONFIG_CONTEXT_TRACKING_USER +/* + * Pass CONTEXT_TRACKING_FORCE_ENABLE to ct_cpu_track_user() to force enable + * user context tracking. + */ +#define CONTEXT_TRACKING_FORCE_ENABLE (-1) + extern void ct_cpu_track_user(int cpu);
/* Called with interrupts disabled. */ diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index fb5be6e9b423..734354bbfdbb 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -698,7 +698,9 @@ void __init ct_cpu_track_user(int cpu) { static __initdata bool initialized = false;
- if (!per_cpu(context_tracking.active, cpu)) { + if (cpu == CONTEXT_TRACKING_FORCE_ENABLE) { + static_branch_inc(&context_tracking_key); + } else if (!per_cpu(context_tracking.active, cpu)) { per_cpu(context_tracking.active, cpu) = true; static_branch_inc(&context_tracking_key); } diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index f26708667754..2bed4b2f9ec5 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -146,6 +146,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) }
alloc_bootmem_cpumask_var(&non_housekeeping_mask); + if (cpulist_parse(str, non_housekeeping_mask) < 0) { pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); goto free_non_housekeeping_mask; @@ -155,6 +156,13 @@ static int __init housekeeping_setup(char *str, unsigned long flags) cpumask_andnot(housekeeping_staging, cpu_possible_mask, non_housekeeping_mask);
+ /* + * Allow "nohz_full" without parameter to force enable nohz_full + * at boot time without any CPUs in the nohz_full list yet. + */ + if ((flags & HK_FLAG_KERNEL_NOISE) && !*str) + goto setup_housekeeping_staging; + first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); @@ -168,6 +176,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) if (cpumask_empty(non_housekeeping_mask)) goto free_housekeeping_staging;
+setup_housekeeping_staging: if (!housekeeping.flags) { /* First setup call ("nohz_full=" or "isolcpus=") */ enum hk_type type; @@ -212,10 +221,12 @@ static int __init housekeeping_nohz_full_setup(char *str) unsigned long flags;
flags = HK_FLAG_KERNEL_NOISE; + if (*str == '=') + str++;
return housekeeping_setup(str, flags); } -__setup("nohz_full=", housekeeping_nohz_full_setup); +__setup("nohz_full", housekeeping_nohz_full_setup);
static int __init housekeeping_isolcpus_setup(char *str) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c527b421c865..87b26a4471e7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -651,8 +651,15 @@ void __init tick_nohz_init(void) } }
- for_each_cpu(cpu, tick_nohz_full_mask) - ct_cpu_track_user(cpu); + /* + * Force enable context_tracking_key if tick_nohz_full_mask empty + */ + if (cpumask_empty(tick_nohz_full_mask)) { + ct_cpu_track_user(CONTEXT_TRACKING_FORCE_ENABLE); + } else { + for_each_cpu(cpu, tick_nohz_full_mask) + ct_cpu_track_user(cpu); + }
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "kernel/nohz:predown", NULL,