On 01/05/15 00:49, Michael Turquette wrote:
Quoting Daniel Lezcano (2015-04-30 08:05:52)
On 04/27/2015 09:46 AM, Michael Turquette wrote:
[snip]
/*
* get_cpu_usage is called without locking the runqueues. This is the
* same behavior used by find_busiest_cpu in load_balance. We are
* willing to accept occasionally stale data here in exchange for
* lockless behavior.
*/
for_each_cpu(cpu, policy->cpus) {
usage = get_cpu_usage(cpu);
if (usage > max_usage)
max_usage = usage;
}
/* add margin to max_usage based on imbalance_pct */
max_usage = max_usage * MARGIN_PCT / 100;
cpu = cpumask_first(policy->cpus);
/* freq is current utilization + 25% */
freq = max_usage * policy->max / capacity_orig_of(cpu);
Couldn't this be slightly simplified by using directly cpu_rq(cpu)->cfs.utilization_load_avg instead of calling get_cpu_usage ?
The big.LITTLE case here is confusing. Is cfs.utilization_load_avg already normalized against cpu capacity differences? If so then you are right, I
Nope. In this patchset utilization_load_avg is only freq invariant.
Thanks,
- Juri
could use the value directly. But the if not then get_cpu_usage buys us that normalization by doing:
cfs.utilization_load_avg * capacity_orig >> SCHED_LOAD_SHIFT;
Where capacity_orig may be different across various CPUs.
+out:
return freq;
+}
+/*
- we pass in struct cpufreq_policy. This is safe because changing out the
- policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
- which tears down all of the data structures and __cpufreq_governor(policy,
- CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
- new policy pointer
- */
+static int gov_cfs_thread(void *data) +{
struct sched_param param;
struct cpufreq_policy *policy;
struct gov_data *gd;
unsigned long freq;
int ret;
policy = (struct cpufreq_policy *) data;
if (!policy) {
pr_warn("%s: missing policy\n", __func__);
do_exit(-EINVAL);
}
gd = policy->governor_data;
if (!gd) {
pr_warn("%s: missing governor data\n", __func__);
do_exit(-EINVAL);
}
param.sched_priority = 50;
ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m);
if (ret) {
pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
do_exit(-EINVAL);
} else {
pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
__func__, gd->task->pid);
}
ret = set_cpus_allowed_ptr(gd->task, policy->related_cpus);
if (ret) {
pr_warn("%s: failed to set allowed ptr\n", __func__);
do_exit(-EINVAL);
}
/* main loop of the per-policy kthread */
do {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
if (kthread_should_stop())
break;
/* avoid race with gov_cfs_stop */
if (!down_write_trylock(&policy->rwsem))
continue;
freq = gov_cfs_select_freq(policy);
ret = __cpufreq_driver_target(policy, freq,
CPUFREQ_RELATION_H);
if (ret)
pr_debug("%s: __cpufreq_driver_target returned %d\n",
__func__, ret);
Shouldn't the relation be H or L depending we are increasing or decreasing the freq ?
Yes, this was pointed out in another reply. I'll fix it up.
gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
up_write(&policy->rwsem);
} while (!kthread_should_stop());
do_exit(0);
+}
+static void gov_cfs_irq_work(struct irq_work *irq_work) +{
struct gov_data *gd;
gd = container_of(irq_work, struct gov_data, irq_work);
if (!gd) {
return;
}
wake_up_process(gd->task);
+}
+/**
- gov_cfs_update_cpu - interface to scheduler for changing capacity values
- @cpu: cpu whose capacity utilization has recently changed
- gov_cfs_udpate_cpu is an interface exposed to the scheduler so that the
- scheduler may inform the governor of updates to capacity utilization and
- make changes to cpu frequency. Currently this interface is designed around
- PELT values in CFS. It can be expanded to other scheduling classes in the
- future if needed.
- gov_cfs_update_cpu raises an IPI. The irq_work handler for that IPI wakes up
- the thread that does the actual work, gov_cfs_thread.
- */
+void gov_cfs_update_cpu(int cpu) +{
struct cpufreq_policy *policy;
struct gov_data *gd;
/* XXX put policy pointer in per-cpu data? */
policy = cpufreq_cpu_get(cpu);
if (IS_ERR_OR_NULL(policy)) {
return;
}
if (!policy->governor_data) {
goto out;
}
gd = policy->governor_data;
/* bail early if we are throttled */
if (ktime_before(ktime_get(), gd->throttle)) {
goto out;
}
irq_work_queue_on(&gd->irq_work, cpu);
+out:
cpufreq_cpu_put(policy);
return;
+}
+static void gov_cfs_start(struct cpufreq_policy *policy) +{
struct gov_data *gd;
/* prepare per-policy private data */
gd = kzalloc(sizeof(*gd), GFP_KERNEL);
if (!gd) {
pr_debug("%s: failed to allocate private data\n", __func__);
return;
}
/*
* Don't ask for freq changes at an higher rate than what
* the driver advertises as transition latency.
*/
gd->throttle_nsec = policy->cpuinfo.transition_latency ?
policy->cpuinfo.transition_latency :
THROTTLE_NSEC;
pr_debug("%s: throttle threshold = %u [ns]\n",
__func__, gd->throttle_nsec);
/* init per-policy kthread */
gd->task = kthread_run(gov_cfs_thread, policy, "kgov_cfs_task");
if (IS_ERR_OR_NULL(gd->task))
pr_err("%s: failed to create kgov_cfs_task thread\n", __func__);
init_irq_work(&gd->irq_work, gov_cfs_irq_work);
It does not make sense to have a workqueue and a kthread, this is duplicating what the workqueue already does.
There is no traditional wq here. Just irq_work handler + kthread.
I saw also the irqwork + kthread mail you sent and I believe it is the way to go. Did you think about creating a workqueue per clock line instead of using the irqworkq ?
I am confused by how you phrased the above text. This patch uses irqwork
- kthread, which you say is the way to go. But I don't use a traditional
workqueue instead of irqwork.
I did implement a method that uses irqwork + wq, and it had some bugs, as well as the fact that it clearly increased cfs load tracking stats.
Thanks a lot for the review, Mike
policy->governor_data = gd;
gd->policy = policy;
+}
+static void gov_cfs_stop(struct cpufreq_policy *policy) +{
struct gov_data *gd;
gd = policy->governor_data;
kthread_stop(gd->task);
policy->governor_data = NULL;
/* FIXME replace with devm counterparts? */
kfree(gd);
+}
+static int gov_cfs_setup(struct cpufreq_policy *policy, unsigned int event) +{
switch (event) {
case CPUFREQ_GOV_START:
/* Start managing the frequency */
gov_cfs_start(policy);
return 0;
case CPUFREQ_GOV_STOP:
gov_cfs_stop(policy);
return 0;
case CPUFREQ_GOV_LIMITS: /* unused */
case CPUFREQ_GOV_POLICY_INIT: /* unused */
case CPUFREQ_GOV_POLICY_EXIT: /* unused */
break;
}
return 0;
+}
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED_CFS +static +#endif +struct cpufreq_governor cpufreq_gov_cfs = {
.name = "gov_cfs",
.governor = gov_cfs_setup,
.owner = THIS_MODULE,
+};
+static int __init gov_cfs_init(void) +{
return cpufreq_register_governor(&cpufreq_gov_cfs);
+}
+static void __exit gov_cfs_exit(void) +{
cpufreq_unregister_governor(&cpufreq_gov_cfs);
+}
+/* Try to make this the default governor */ +fs_initcall(gov_cfs_init);
+MODULE_LICENSE("GPL"); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 393fc36..a7b97f9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4257,6 +4257,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_rq_runnable_avg(rq, rq->nr_running); add_nr_running(rq, 1); }
if(sched_energy_freq())
gov_cfs_update_cpu(cpu_of(rq));
}hrtick_update(rq);
@@ -4318,6 +4322,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) sub_nr_running(rq, 1); update_rq_runnable_avg(rq, 1); }
if(sched_energy_freq())
gov_cfs_update_cpu(cpu_of(rq));
}hrtick_update(rq);
@@ -7821,6 +7829,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) task_tick_numa(rq, curr);
update_rq_runnable_avg(rq, 1);
if(sched_energy_freq())
gov_cfs_update_cpu(cpu_of(rq));
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 63a8be9..ec23523 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1399,6 +1399,12 @@ unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) int get_cpu_usage(int cpu); unsigned long capacity_orig_of(int cpu);
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED_CFS +void gov_cfs_update_cpu(int cpu); +#else +static inline void gov_cfs_update_cpu(int cpu) {} +#endif
- static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
-- http://www.linaro.org/ Linaro.org │ Open source software for ARM SoCs
Follow Linaro: http://www.facebook.com/pages/Linaro Facebook | http://twitter.com/#!/linaroorg Twitter | http://www.linaro.org/linaro-blog/ Blog
eas-dev mailing list eas-dev@lists.linaro.org https://lists.linaro.org/mailman/listinfo/eas-dev