This patch demonstrates how to queue up irq_work from within schedule() context, which we then use to queue up a normal work_struct to select frequency and program it.
Call graph looks like this:
{en,de}queue_task_fair or task_tick_fair -> cap_gov_update_cpu (same as before) -> cap_gov_irq_work (new irq_work callback) -> cap_gov_work (new work_struct callback)
I've removed cap_gov_thread and the corresponding kernel thread that implemented its own loop. This also gets rid of the atomic need_task_wake variable and the need to call schedule() manually in the kthread loop.
The work_struct runs to completion, no loop necessary. We queue it on the same cpu that ran the *_task_fair function that kicked off the whole chain.
I'm not sure this is better and I don't understand the implications of queueing up this work all the time. I assume it is Not Good for scheduler stats? Maybe we should filter that out?
Note that this avoids the reentrancy issues faced by Morten back in October 2013 since we're not queueing up work_struct with rq locks held.
This patch is only smoke tested. The GOV_STOP path doesn't work, so switching away from cap_gov to ondemand will fail.
Thoughts?
Signed-off-by: Michael Turquette mturquette@linaro.org --- kernel/sched/cap_gov.c | 119 ++++++++++++++++--------------------------------- kernel/sched/fair.c | 8 ---- 2 files changed, 39 insertions(+), 88 deletions(-)
diff --git a/kernel/sched/cap_gov.c b/kernel/sched/cap_gov.c index 72873ab..8380564 100644 --- a/kernel/sched/cap_gov.c +++ b/kernel/sched/cap_gov.c @@ -10,6 +10,8 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/percpu.h> +#include <linux/irq_work.h> +#include <linux/workqueue.h>
#include "sched.h"
@@ -42,6 +44,9 @@ struct gov_data { unsigned int throttle_nsec; struct task_struct *task; atomic_t need_wake_task; + struct irq_work irq_work; + struct work_struct work; + struct cpufreq_policy *policy; };
/** @@ -117,101 +122,56 @@ out: return freq; }
-/* - * we pass in struct cpufreq_policy. This is safe because changing out the - * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP), - * which tears down all of the data structures and __cpufreq_governor(policy, - * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the - * new policy pointer - */ -static int cap_gov_thread(void *data) +static void cap_gov_work(struct work_struct *work) { - struct sched_param param; struct cpufreq_policy *policy; struct gov_data *gd; unsigned long freq; int ret;
- policy = (struct cpufreq_policy *) data; + gd = container_of(work, struct gov_data, work); + if (!gd) { + pr_err("%s: here0.5: no gd!\n", __func__); + return; + } + + policy = gd->policy; if (!policy) { + pr_err("%s: here1.5!\n", __func__); pr_warn("%s: missing policy\n", __func__); - do_exit(-EINVAL); + return; }
- gd = policy->gov_data; - if (!gd) { - pr_warn("%s: missing governor data\n", __func__); - do_exit(-EINVAL); - } + down_write(&policy->rwsem);
- param.sched_priority = 0; - sched_setscheduler(current, SCHED_FIFO, ¶m); - set_cpus_allowed_ptr(current, policy->related_cpus); - - /* main loop of the per-policy kthread */ - do { - down_write(&policy->rwsem); - if (!atomic_read(&gd->need_wake_task)) { - if (kthread_should_stop()) - break; - trace_printk("NOT waking up kthread (%d)", gd->task->pid); - up_write(&policy->rwsem); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - continue; - } - - trace_printk("kthread %d requested freq switch", gd->task->pid); - - freq = cap_gov_select_freq(policy); - - ret = __cpufreq_driver_target(policy, freq, - CPUFREQ_RELATION_H); - if (ret) - pr_debug("%s: __cpufreq_driver_target returned %d\n", - __func__, ret); - - trace_printk("kthread %d requested freq switch", gd->task->pid); - - gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec); - atomic_set(&gd->need_wake_task, 0); - up_write(&policy->rwsem); - } while (!kthread_should_stop()); - - do_exit(0); -} + trace_printk("kthread %d requested freq switch", gd->task->pid);
-static void cap_gov_wake_up_process(struct task_struct *task) -{ - /* this is null during early boot */ - if (IS_ERR_OR_NULL(task)) { - return; - } + freq = cap_gov_select_freq(policy); + + ret = __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_H); + if (ret) + pr_err("%s: __cpufreq_driver_target returned %d\n", + __func__, ret);
- wake_up_process(task); + trace_printk("kthread %d requested freq switch", gd->task->pid); + + gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec); + up_write(&policy->rwsem); }
-void cap_gov_kick_thread(int cpu) +static void cap_gov_irq_work(struct irq_work *irq_work) { struct cpufreq_policy *policy; - struct gov_data *gd = NULL; + struct gov_data *gd;
- policy = cpufreq_cpu_get(cpu); - if (IS_ERR_OR_NULL(policy)) + gd = container_of(irq_work, struct gov_data, irq_work); + if (!gd) { + pr_err("%s: here0.5: no gd!\n", __func__); return; - - gd = policy->gov_data; - if (!gd) - goto out; - - /* per-cpu access not needed here since we have gd */ - if (atomic_read(&gd->need_wake_task)) { - trace_printk("waking up kthread (%d)", gd->task->pid); - cap_gov_wake_up_process(gd->task); }
-out: - cpufreq_cpu_put(policy); + schedule_work_on(raw_smp_processor_id(), &gd->work) }
/** @@ -262,7 +222,8 @@ void cap_gov_update_cpu(int cpu) goto out; }
- atomic_set(per_cpu(cap_gov_wake_task, cpu), 1); + if (irq_work_queue_on(&gd->irq_work, cpu)) + trace_printk("could not queue irq_work");
out: cpufreq_cpu_put(policy); @@ -295,12 +256,10 @@ static void cap_gov_start(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->related_cpus) per_cpu(cap_gov_wake_task, cpu) = &gd->need_wake_task;
- /* init per-policy kthread */ - gd->task = kthread_create(cap_gov_thread, policy, "kcap_gov_task"); - if (IS_ERR_OR_NULL(gd->task)) - pr_err("%s: failed to create kcap_gov_task thread\n", __func__); - + init_irq_work(&gd->irq_work, cap_gov_irq_work); + INIT_WORK(&gd->work, cap_gov_work); policy->gov_data = gd; + gd->policy = policy; }
static void cap_gov_stop(struct cpufreq_policy *policy) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2ec2dc7..16d73a9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7776,14 +7776,6 @@ static void run_rebalance_domains(struct softirq_action *h) */ nohz_idle_balance(this_rq, idle); rebalance_domains(this_rq, idle); - - /* - * FIXME some hardware does not require this, but current CPUfreq - * locking prevents us from changing cpu frequency with rq locks held - * and interrupts disabled - */ - if (sched_energy_freq()) - cap_gov_kick_thread(cpu_of(this_rq)); }
/* -- 1.9.1