On 01/05/15 00:49, Michael Turquette wrote:
Quoting Daniel Lezcano (2015-04-30 08:05:52)
On 04/27/2015 09:46 AM, Michael Turquette wrote:
[snip]
/** get_cpu_usage is called without locking the runqueues. This is the* same behavior used by find_busiest_cpu in load_balance. We are* willing to accept occasionally stale data here in exchange for* lockless behavior.*/for_each_cpu(cpu, policy->cpus) {usage = get_cpu_usage(cpu);if (usage > max_usage)max_usage = usage;}/* add margin to max_usage based on imbalance_pct */max_usage = max_usage * MARGIN_PCT / 100;cpu = cpumask_first(policy->cpus);/* freq is current utilization + 25% */freq = max_usage * policy->max / capacity_orig_of(cpu);Couldn't this be slightly simplified by using directly cpu_rq(cpu)->cfs.utilization_load_avg instead of calling get_cpu_usage ?
The big.LITTLE case here is confusing. Is cfs.utilization_load_avg already normalized against cpu capacity differences? If so then you are right, I
Nope. In this patchset utilization_load_avg is only freq invariant.
Thanks,
- Juri
could use the value directly. But the if not then get_cpu_usage buys us that normalization by doing:
cfs.utilization_load_avg * capacity_orig >> SCHED_LOAD_SHIFT;
Where capacity_orig may be different across various CPUs.
+out:
return freq;+}
+/*
- we pass in struct cpufreq_policy. This is safe because changing out the
 
- policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
 
- which tears down all of the data structures and __cpufreq_governor(policy,
 
- CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
 
- new policy pointer
 - */
 +static int gov_cfs_thread(void *data) +{
struct sched_param param;struct cpufreq_policy *policy;struct gov_data *gd;unsigned long freq;int ret;policy = (struct cpufreq_policy *) data;if (!policy) {pr_warn("%s: missing policy\n", __func__);do_exit(-EINVAL);}gd = policy->governor_data;if (!gd) {pr_warn("%s: missing governor data\n", __func__);do_exit(-EINVAL);}param.sched_priority = 50;ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m);if (ret) {pr_warn("%s: failed to set SCHED_FIFO\n", __func__);do_exit(-EINVAL);} else {pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",__func__, gd->task->pid);}ret = set_cpus_allowed_ptr(gd->task, policy->related_cpus);if (ret) {pr_warn("%s: failed to set allowed ptr\n", __func__);do_exit(-EINVAL);}/* main loop of the per-policy kthread */do {set_current_state(TASK_INTERRUPTIBLE);schedule();if (kthread_should_stop())break;/* avoid race with gov_cfs_stop */if (!down_write_trylock(&policy->rwsem))continue;freq = gov_cfs_select_freq(policy);ret = __cpufreq_driver_target(policy, freq,CPUFREQ_RELATION_H);if (ret)pr_debug("%s: __cpufreq_driver_target returned %d\n",__func__, ret);Shouldn't the relation be H or L depending we are increasing or decreasing the freq ?
Yes, this was pointed out in another reply. I'll fix it up.
gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);up_write(&policy->rwsem);} while (!kthread_should_stop());do_exit(0);+}
+static void gov_cfs_irq_work(struct irq_work *irq_work) +{
struct gov_data *gd;gd = container_of(irq_work, struct gov_data, irq_work);if (!gd) {return;}wake_up_process(gd->task);+}
+/**
- gov_cfs_update_cpu - interface to scheduler for changing capacity values
 
- @cpu: cpu whose capacity utilization has recently changed
 
- gov_cfs_udpate_cpu is an interface exposed to the scheduler so that the
 
- scheduler may inform the governor of updates to capacity utilization and
 
- make changes to cpu frequency. Currently this interface is designed around
 
- PELT values in CFS. It can be expanded to other scheduling classes in the
 
- future if needed.
 
- gov_cfs_update_cpu raises an IPI. The irq_work handler for that IPI wakes up
 
- the thread that does the actual work, gov_cfs_thread.
 - */
 +void gov_cfs_update_cpu(int cpu) +{
struct cpufreq_policy *policy;struct gov_data *gd;/* XXX put policy pointer in per-cpu data? */policy = cpufreq_cpu_get(cpu);if (IS_ERR_OR_NULL(policy)) {return;}if (!policy->governor_data) {goto out;}gd = policy->governor_data;/* bail early if we are throttled */if (ktime_before(ktime_get(), gd->throttle)) {goto out;}irq_work_queue_on(&gd->irq_work, cpu);+out:
cpufreq_cpu_put(policy);return;+}
+static void gov_cfs_start(struct cpufreq_policy *policy) +{
struct gov_data *gd;/* prepare per-policy private data */gd = kzalloc(sizeof(*gd), GFP_KERNEL);if (!gd) {pr_debug("%s: failed to allocate private data\n", __func__);return;}/** Don't ask for freq changes at an higher rate than what* the driver advertises as transition latency.*/gd->throttle_nsec = policy->cpuinfo.transition_latency ?policy->cpuinfo.transition_latency :THROTTLE_NSEC;pr_debug("%s: throttle threshold = %u [ns]\n",__func__, gd->throttle_nsec);/* init per-policy kthread */gd->task = kthread_run(gov_cfs_thread, policy, "kgov_cfs_task");if (IS_ERR_OR_NULL(gd->task))pr_err("%s: failed to create kgov_cfs_task thread\n", __func__);init_irq_work(&gd->irq_work, gov_cfs_irq_work);It does not make sense to have a workqueue and a kthread, this is duplicating what the workqueue already does.
There is no traditional wq here. Just irq_work handler + kthread.
I saw also the irqwork + kthread mail you sent and I believe it is the way to go. Did you think about creating a workqueue per clock line instead of using the irqworkq ?
I am confused by how you phrased the above text. This patch uses irqwork
- kthread, which you say is the way to go. But I don't use a traditional
 workqueue instead of irqwork.
I did implement a method that uses irqwork + wq, and it had some bugs, as well as the fact that it clearly increased cfs load tracking stats.
Thanks a lot for the review, Mike
policy->governor_data = gd;gd->policy = policy;+}
+static void gov_cfs_stop(struct cpufreq_policy *policy) +{
struct gov_data *gd;gd = policy->governor_data;kthread_stop(gd->task);policy->governor_data = NULL;/* FIXME replace with devm counterparts? */kfree(gd);+}
+static int gov_cfs_setup(struct cpufreq_policy *policy, unsigned int event) +{
switch (event) {case CPUFREQ_GOV_START:/* Start managing the frequency */gov_cfs_start(policy);return 0;case CPUFREQ_GOV_STOP:gov_cfs_stop(policy);return 0;case CPUFREQ_GOV_LIMITS: /* unused */case CPUFREQ_GOV_POLICY_INIT: /* unused */case CPUFREQ_GOV_POLICY_EXIT: /* unused */break;}return 0;+}
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED_CFS +static +#endif +struct cpufreq_governor cpufreq_gov_cfs = {
.name = "gov_cfs",.governor = gov_cfs_setup,.owner = THIS_MODULE,+};
+static int __init gov_cfs_init(void) +{
return cpufreq_register_governor(&cpufreq_gov_cfs);+}
+static void __exit gov_cfs_exit(void) +{
cpufreq_unregister_governor(&cpufreq_gov_cfs);+}
+/* Try to make this the default governor */ +fs_initcall(gov_cfs_init);
+MODULE_LICENSE("GPL"); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 393fc36..a7b97f9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4257,6 +4257,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_rq_runnable_avg(rq, rq->nr_running); add_nr_running(rq, 1); }
if(sched_energy_freq())gov_cfs_update_cpu(cpu_of(rq)); }hrtick_update(rq);@@ -4318,6 +4322,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) sub_nr_running(rq, 1); update_rq_runnable_avg(rq, 1); }
if(sched_energy_freq())gov_cfs_update_cpu(cpu_of(rq)); }hrtick_update(rq);@@ -7821,6 +7829,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) task_tick_numa(rq, curr);
update_rq_runnable_avg(rq, 1);
if(sched_energy_freq())gov_cfs_update_cpu(cpu_of(rq));}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 63a8be9..ec23523 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1399,6 +1399,12 @@ unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) int get_cpu_usage(int cpu); unsigned long capacity_orig_of(int cpu);
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED_CFS +void gov_cfs_update_cpu(int cpu); +#else +static inline void gov_cfs_update_cpu(int cpu) {} +#endif
- static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
 -- http://www.linaro.org/ Linaro.org │ Open source software for ARM SoCs
Follow Linaro: http://www.facebook.com/pages/Linaro Facebook | http://twitter.com/#!/linaroorg Twitter | http://www.linaro.org/linaro-blog/ Blog
eas-dev mailing list eas-dev@lists.linaro.org https://lists.linaro.org/mailman/listinfo/eas-dev