If slow path frequency changes are conducted in a SCHED_OTHER context then they may be delayed for some amount of time, including indefinitely, when real time or deadline activity is taking place.
Move the slow path to a real time kernel thread. In the future the thread should be made SCHED_DEADLINE. The RT priority is arbitrarily set to 50 for now.
Hackbench results on ARM Exynos, dual core A15 platform for 10 iterations:
$ hackbench -s 100 -l 100 -g 10 -f 20
Before After --------------------------------- 1.808 1.603 1.847 1.251 2.229 1.590 1.952 1.600 1.947 1.257 1.925 1.627 2.694 1.620 1.258 1.621 1.919 1.632 1.250 1.240
Average:
1.8829 1.5041
Based on initial work by Steve Muckle.
Signed-off-by: Steve Muckle smuckle.linux@gmail.com Signed-off-by: Viresh Kumar viresh.kumar@linaro.org --- kernel/sched/cpufreq_schedutil.c | 85 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 68f21bb6bd44..f165ba0f0766 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -12,11 +12,14 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpufreq.h> +#include <linux/kthread.h> #include <linux/slab.h> #include <trace/events/power.h>
#include "sched.h"
+#define SUGOV_KTHREAD_PRIORITY 50 + struct sugov_tunables { struct gov_attr_set attr_set; unsigned int rate_limit_us; @@ -35,8 +38,10 @@ struct sugov_policy {
/* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; - struct work_struct work; + struct kthread_work work; struct mutex work_lock; + struct kthread_worker worker; + struct task_struct *thread; bool work_in_progress;
bool need_freq_update; @@ -291,7 +296,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, raw_spin_unlock(&sg_policy->update_lock); }
-static void sugov_work(struct work_struct *work) +static void sugov_work(struct kthread_work *work) { struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
@@ -308,7 +313,21 @@ static void sugov_irq_work(struct irq_work *irq_work) struct sugov_policy *sg_policy;
sg_policy = container_of(irq_work, struct sugov_policy, irq_work); - schedule_work_on(smp_processor_id(), &sg_policy->work); + + /* + * For Real Time and Deadline tasks, schedutil governor shoots the + * frequency to maximum. And special care must be taken to ensure that + * this kthread doesn't result in that. + * + * This is (mostly) guaranteed by the work_in_progress flag. The flag is + * updated only at the end of the sugov_work() and before that schedutil + * rejects all other frequency scaling requests. + * + * Though there is a very rare case where the RT thread yields right + * after the work_in_progress flag is cleared. The effects of that are + * neglected for now. + */ + kthread_queue_work(&sg_policy->worker, &sg_policy->work); }
/************************** sysfs interface ************************/ @@ -372,7 +391,6 @@ static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
sg_policy->policy = policy; init_irq_work(&sg_policy->irq_work, sugov_irq_work); - INIT_WORK(&sg_policy->work, sugov_work); mutex_init(&sg_policy->work_lock); raw_spin_lock_init(&sg_policy->update_lock); return sg_policy; @@ -384,6 +402,51 @@ static void sugov_policy_free(struct sugov_policy *sg_policy) kfree(sg_policy); }
+static int sugov_kthread_create(struct sugov_policy *sg_policy) +{ + struct task_struct *thread; + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; + struct cpufreq_policy *policy = sg_policy->policy; + int ret; + + /* kthread only required for slow path */ + if (policy->fast_switch_enabled) + return 0; + + kthread_init_work(&sg_policy->work, sugov_work); + kthread_init_worker(&sg_policy->worker); + thread = kthread_create(kthread_worker_fn, &sg_policy->worker, + "sugov:%d", + cpumask_first(policy->related_cpus)); + if (IS_ERR(thread)) { + pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); + return PTR_ERR(thread); + } + + ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); + if (ret) { + kthread_stop(thread); + pr_warn("%s: failed to set SCHED_FIFO\n", __func__); + return ret; + } + + sg_policy->thread = thread; + kthread_bind_mask(thread, policy->related_cpus); + wake_up_process(thread); + + return 0; +} + +static void sugov_kthread_stop(struct sugov_policy *sg_policy) +{ + /* kthread only required for slow path */ + if (sg_policy->policy->fast_switch_enabled) + return; + + kthread_flush_worker(&sg_policy->worker); + kthread_stop(sg_policy->thread); +} + static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) { struct sugov_tunables *tunables; @@ -424,12 +487,16 @@ static int sugov_init(struct cpufreq_policy *policy) goto disable_fast_switch; }
+ ret = sugov_kthread_create(sg_policy); + if (ret) + goto free_sg_policy; + mutex_lock(&global_tunables_lock);
if (global_tunables) { if (WARN_ON(have_governor_per_policy())) { ret = -EINVAL; - goto free_sg_policy; + goto stop_kthread; } policy->governor_data = sg_policy; sg_policy->tunables = global_tunables; @@ -441,7 +508,7 @@ static int sugov_init(struct cpufreq_policy *policy) tunables = sugov_tunables_alloc(sg_policy); if (!tunables) { ret = -ENOMEM; - goto free_sg_policy; + goto stop_kthread; }
tunables->rate_limit_us = LATENCY_MULTIPLIER; @@ -466,6 +533,9 @@ static int sugov_init(struct cpufreq_policy *policy) policy->governor_data = NULL; sugov_tunables_free(tunables);
+stop_kthread: + sugov_kthread_stop(sg_policy); + free_sg_policy: mutex_unlock(&global_tunables_lock);
@@ -493,6 +563,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
mutex_unlock(&global_tunables_lock);
+ sugov_kthread_stop(sg_policy); sugov_policy_free(sg_policy); cpufreq_disable_fast_switch(policy); } @@ -541,7 +612,7 @@ static void sugov_stop(struct cpufreq_policy *policy) synchronize_sched();
irq_work_sync(&sg_policy->irq_work); - cancel_work_sync(&sg_policy->work); + kthread_cancel_work_sync(&sg_policy->work); }
static void sugov_limits(struct cpufreq_policy *policy)