Hi Mike,
On 22/04/15 06:16, Michael Turquette wrote:
This patch demonstrates how to queue up irq_work from within schedule() context, which we then use to queue up a normal work_struct to select frequency and program it.
Call graph looks like this:
{en,de}queue_task_fair or task_tick_fair -> cap_gov_update_cpu (same as before) -> cap_gov_irq_work (new irq_work callback) -> cap_gov_work (new work_struct callback)
I've removed cap_gov_thread and the corresponding kernel thread that implemented its own loop. This also gets rid of the atomic need_task_wake variable and the need to call schedule() manually in the kthread loop.
Can you expand a bit more on why do you think completely moving away from the kthread is better/more desirable?
The work_struct runs to completion, no loop necessary. We queue it on the same cpu that ran the *_task_fair function that kicked off the whole chain.
I'm not sure this is better and I don't understand the implications of queueing up this work all the time. I assume it is Not Good for scheduler stats? Maybe we should filter that out?
So, the implications I see on removing the kthread is that the work we queue ends up in the same bucket of others using the same queue. And also it seems we still have some reentrancy issues, as usually we don't want to trigger anything when the thing that is scheduled is the one responsible for changing frequency (with the kthread we can temporarily work around this setting the kthread to some RT prio, then maybe associating some special flag to it). The filtering seems to be simpler if we just know that we have to filter cap_gov's kthreads.
Note that this avoids the reentrancy issues faced by Morten back in October 2013 since we're not queueing up work_struct with rq locks held.
IMHO, we still have some issues, as said above.
This patch is only smoke tested. The GOV_STOP path doesn't work, so switching away from cap_gov to ondemand will fail.
Thoughts?
Comments on the code follows.
Signed-off-by: Michael Turquette mturquette@linaro.org
kernel/sched/cap_gov.c | 119 ++++++++++++++++--------------------------------- kernel/sched/fair.c | 8 ---- 2 files changed, 39 insertions(+), 88 deletions(-)
diff --git a/kernel/sched/cap_gov.c b/kernel/sched/cap_gov.c index 72873ab..8380564 100644 --- a/kernel/sched/cap_gov.c +++ b/kernel/sched/cap_gov.c @@ -10,6 +10,8 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/percpu.h> +#include <linux/irq_work.h> +#include <linux/workqueue.h> #include "sched.h" @@ -42,6 +44,9 @@ struct gov_data { unsigned int throttle_nsec; struct task_struct *task; atomic_t need_wake_task;
We can remove this two fields above.
- struct irq_work irq_work;
- struct work_struct work;
- struct cpufreq_policy *policy;
}; /** @@ -117,101 +122,56 @@ out: return freq; } -/*
- we pass in struct cpufreq_policy. This is safe because changing out the
- policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
- which tears down all of the data structures and __cpufreq_governor(policy,
- CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
- new policy pointer
- */
-static int cap_gov_thread(void *data) +static void cap_gov_work(struct work_struct *work) {
- struct sched_param param; struct cpufreq_policy *policy; struct gov_data *gd; unsigned long freq; int ret;
- policy = (struct cpufreq_policy *) data;
- gd = container_of(work, struct gov_data, work);
- if (!gd) {
pr_err("%s: here0.5: no gd!\n", __func__);
return;
- }
- policy = gd->policy; if (!policy) {
pr_warn("%s: missing policy\n", __func__);pr_err("%s: here1.5!\n", __func__);
do_exit(-EINVAL);
}return;
- gd = policy->gov_data;
- if (!gd) {
pr_warn("%s: missing governor data\n", __func__);
do_exit(-EINVAL);
- }
- down_write(&policy->rwsem);
- param.sched_priority = 0;
- sched_setscheduler(current, SCHED_FIFO, ¶m);
- set_cpus_allowed_ptr(current, policy->related_cpus);
- /* main loop of the per-policy kthread */
- do {
down_write(&policy->rwsem);
if (!atomic_read(&gd->need_wake_task)) {
if (kthread_should_stop())
break;
trace_printk("NOT waking up kthread (%d)", gd->task->pid);
up_write(&policy->rwsem);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
continue;
}
trace_printk("kthread %d requested freq switch", gd->task->pid);
freq = cap_gov_select_freq(policy);
ret = __cpufreq_driver_target(policy, freq,
CPUFREQ_RELATION_H);
if (ret)
pr_debug("%s: __cpufreq_driver_target returned %d\n",
__func__, ret);
trace_printk("kthread %d requested freq switch", gd->task->pid);
gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
atomic_set(&gd->need_wake_task, 0);
up_write(&policy->rwsem);
- } while (!kthread_should_stop());
- do_exit(0);
-}
- trace_printk("kthread %d requested freq switch", gd->task->pid);
This goes away too (apart from being a trace_printk()).
-static void cap_gov_wake_up_process(struct task_struct *task) -{
- /* this is null during early boot */
- if (IS_ERR_OR_NULL(task)) {
return;
- }
- freq = cap_gov_select_freq(policy);
- ret = __cpufreq_driver_target(policy, freq,
CPUFREQ_RELATION_H);
- if (ret)
pr_err("%s: __cpufreq_driver_target returned %d\n",
__func__, ret);
- wake_up_process(task);
- trace_printk("kthread %d requested freq switch", gd->task->pid);
Ditto.
- gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
- up_write(&policy->rwsem);
} -void cap_gov_kick_thread(int cpu) +static void cap_gov_irq_work(struct irq_work *irq_work) { struct cpufreq_policy *policy;
- struct gov_data *gd = NULL;
- struct gov_data *gd;
- policy = cpufreq_cpu_get(cpu);
- if (IS_ERR_OR_NULL(policy))
- gd = container_of(irq_work, struct gov_data, irq_work);
- if (!gd) {
return;pr_err("%s: here0.5: no gd!\n", __func__);
- gd = policy->gov_data;
- if (!gd)
goto out;
- /* per-cpu access not needed here since we have gd */
- if (atomic_read(&gd->need_wake_task)) {
trace_printk("waking up kthread (%d)", gd->task->pid);
}cap_gov_wake_up_process(gd->task);
-out:
- cpufreq_cpu_put(policy);
- schedule_work_on(raw_smp_processor_id(), &gd->work)
Missing semicolon :).
- schedule_work_on(raw_smp_processor_id(), &gd->work) + schedule_work_on(raw_smp_processor_id(), &gd->work);
} /** @@ -262,7 +222,8 @@ void cap_gov_update_cpu(int cpu) goto out; }
- atomic_set(per_cpu(cap_gov_wake_task, cpu), 1);
- if (irq_work_queue_on(&gd->irq_work, cpu))
trace_printk("could not queue irq_work");
Maybe some pr_err thing?
out: cpufreq_cpu_put(policy); @@ -295,12 +256,10 @@ static void cap_gov_start(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->related_cpus) per_cpu(cap_gov_wake_task, cpu) = &gd->need_wake_task;
- /* init per-policy kthread */
- gd->task = kthread_create(cap_gov_thread, policy, "kcap_gov_task");
- if (IS_ERR_OR_NULL(gd->task))
pr_err("%s: failed to create kcap_gov_task thread\n", __func__);
- init_irq_work(&gd->irq_work, cap_gov_irq_work);
- INIT_WORK(&gd->work, cap_gov_work); policy->gov_data = gd;
- gd->policy = policy;
}
You should also add
@@ -269,7 +270,7 @@ static void cap_gov_stop(struct cpufreq_policy *policy) gd = policy->gov_data; policy->gov_data = NULL;
- kthread_stop(gd->task);
/* FIXME replace with devm counterparts? */ kfree(gd);
Thanks,
- Juri
static void cap_gov_stop(struct cpufreq_policy *policy) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2ec2dc7..16d73a9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7776,14 +7776,6 @@ static void run_rebalance_domains(struct softirq_action *h) */ nohz_idle_balance(this_rq, idle); rebalance_domains(this_rq, idle);
- /*
* FIXME some hardware does not require this, but current CPUfreq
* locking prevents us from changing cpu frequency with rq locks held
* and interrupts disabled
*/
- if (sched_energy_freq())
cap_gov_kick_thread(cpu_of(this_rq));
} /*