This patch add new feature for the scheduler and provides connection with thermal subsystem which grants power to cooling devices (in DVFS - CPU devices). .....
Signed-off-by: Lukasz Luba l.luba@partner.samsung.com --- include/linux/sched/power.h | 15 ++ kernel/sched/Makefile | 2 +- kernel/sched/fair.c | 6 + kernel/sched/power.c | 268 ++++++++++++++++++++++++++++++++++++ kernel/sched/power.h | 56 ++++++++ kernel/sched/sched.h | 22 +++ 6 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/power.h create mode 100644 kernel/sched/power.c create mode 100644 kernel/sched/power.h
diff --git a/include/linux/sched/power.h b/include/linux/sched/power.h new file mode 100644 index 000000000000..7827ba02a65c --- /dev/null +++ b/include/linux/sched/power.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Scheduler CPU power + * + * Copyright (C) 2018 Samsung + */ + +#ifndef __INC_SCHED_POWER_H__ +#define __INC_SCHED_POWER_H__ + + + +int sched_power_cpu_reinit_weight(int cpu, int weight); + +#endif diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 7fe183404c38..c1ccc0a9dc9b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o power.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 908c9cdae2f0..c03c709ccc68 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4172,6 +4172,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) */ update_curr(cfs_rq);
+ /* * Ensure that runnable average is periodically updated. */ @@ -6357,6 +6358,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } rcu_read_unlock();
+ if (prev_cpu != new_cpu) + sched_power_change_cpu_weight(new_cpu, 512, 0); + return new_cpu; }
@@ -9658,6 +9662,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); + + /* sched_power_change_cpu_weight(cpu_of(rq), 768, 0); */ }
/* diff --git a/kernel/sched/power.c b/kernel/sched/power.c new file mode 100644 index 000000000000..c2fc0811bf37 --- /dev/null +++ b/kernel/sched/power.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Scheduler CPU power + * + * Copyright (C) 2018 Samsung + */ + + +#include <linux/sched.h> +#include <linux/thermal.h> + +#include "power.h" + +#define THERMAL_REQUEST_KFIFO_SIZE (64 * sizeof(struct power_request)) +#define DEFAULT_CPU_WEIGHT 1024 + +static DEFINE_PER_CPU(struct cpu_power, cpu_power); +DEFINE_PER_CPU(struct update_sched_power *, update_cpu_power); + +static struct sched_power sched_power; + +void sched_power_set_update_func(int cpu, struct update_sched_power *update, + void (*fn)(struct update_sched_power *, int, unsigned int, int, + int)) +{ + + if (WARN_ON(!update || !fn)) + return; + + if (WARN_ON(per_cpu(update_cpu_power, cpu))) + return; + + update->func = fn; + rcu_assign_pointer(per_cpu(update_cpu_power, cpu), update); +} + +void sched_power_clean_update_func(int cpu) +{ + rcu_assign_pointer(per_cpu(update_cpu_power, cpu), NULL); +} + + +///////////////////////////////////////////////////////////////////////// + + +unsigned int cpu_power_calc_group_weight(int cpu) +{ + cpumask_t *span_cpus = NULL; + struct cpu_power *power; + unsigned int w = 0; + int i; + int num_cpus; + + + num_cpus = cpumask_weight(span_cpus); + + for_each_cpu(i, span_cpus) { + power = (&per_cpu(cpu_power, i)); + w += power->weight; + } + + if (num_cpus) + w /= num_cpus; + + return w; +} + +int get_state_for_power(int cpu, unsigned long power) +{ + /* unsigned long gr_load; */ + + + return 0; +} + +int cpu_power_calc_group_capacity(unsigned long gr_power, unsigned gr_weight, + int cpu) +{ + cpumask_t *span_cpus = NULL; + int num_cpus; + struct cpu_power *power; + unsigned long p; + int i, state; + /* int size = 0; */ + unsigned long max_power = 0; + + num_cpus = cpumask_weight(span_cpus); + + for_each_cpu(i, span_cpus) { + power = (&per_cpu(cpu_power, i)); + p = gr_power * (power->weight << 10) / gr_weight; + p >>= 10; + + if (max_power < p) + max_power = p; + } + + + state = get_state_for_power(cpu, max_power); + + + return 0; +} + +int sched_power_cpu_reinit_weight(int cpu, int weight) +{ + struct cpu_power *cpower = &per_cpu(cpu_power, cpu); + + if (!cpower->operating) + return -EAGAIN; + + raw_spin_lock(&cpower->update_lock); + cpower->weight = weight; + raw_spin_unlock(&cpower->update_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(sched_power_cpu_reinit_weight); + +////////////////////////////////////////////////////////////// + + +static bool should_update_next_weight(int time) +{ + return 1; +} + +static void sched_power_work(struct kthread_work *work) +{ + struct sched_power *sp = container_of(work, struct sched_power, work); + int i; + struct cpu_power *cpower = NULL; + struct power_request req; + + for_each_online_cpu(i) { + cpower = (&per_cpu(cpu_power, i)); + raw_spin_lock(&cpower->update_lock); + req = cpower->req; + cpower->req.time = 0; + raw_spin_unlock(&cpower->update_lock); + + if (should_update_next_weight(req.time)) { + pr_info("cpower req poped\n"); + thermal_cpu_cdev_set_weight(req.cpu, req.weight); + } + } + + sp->work_in_progress = false; +} + +static void sched_power_irq_work(struct irq_work *irq_work) +{ + struct sched_power *power; + + power = container_of(irq_work, struct sched_power, irq_work); + + kthread_queue_work(&power->worker, &power->work); +} + +static void sched_power_update(struct update_sched_power *update, int cpu, + unsigned int weight, int flags, int time) +{ + struct cpu_power *cpower = container_of(update, struct cpu_power, + update_power); + struct sched_power *sp; + + if (!cpower->operating) + return; + + sp = cpower->sched_power; + + /* Filter to frequent changes */ + if (!should_update_next_weight(time)) + return; + + raw_spin_lock(&cpower->update_lock); + cpower->req.weight = weight; + cpower->req.cpu = cpu; + cpower->req.time = time; + raw_spin_unlock(&cpower->update_lock); + + if (!sp->work_in_progress) { + sp->work_in_progress = true; + irq_work_queue(&sp->irq_work); + } +} + + +static int sched_power_create_thread(struct sched_power *power) +{ + int ret; + struct task_struct *thread; + struct sched_attr attr = { + .sched_policy = SCHED_DEADLINE, + .sched_nice = 0, + .sched_priority = 0, + .sched_flags = 0, + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + + kthread_init_work(&power->work, sched_power_work); + kthread_init_worker(&power->worker); + thread = kthread_create(kthread_worker_fn, &power->worker, + "sched_power/a"); + + if (IS_ERR(thread)) { + pr_err("failed to create sched_power thread %ld\n", + PTR_ERR(thread)); + return PTR_ERR(thread); + } + + ret = sched_setattr_nocheck(thread, &attr); + if (ret) { + kthread_stop(thread); + pr_warn("failed to set SCHED_DEADLINE for sched_power %d\n", + ret); + return ret; + } + + power->thread = thread; + mutex_init(&power->work_lock); + init_irq_work(&power->irq_work, sched_power_irq_work); + wake_up_process(thread); + + return 0; +} + +static void sched_power_disable_thread(struct sched_power *sp) +{ + kthread_flush_worker(&sp->worker); + kthread_stop(sp->thread); + mutex_destroy(&sp->work_lock); +} + +static int sched_power_setup(struct sched_power *sp) +{ + int i; + struct cpu_power *cpower; + + for_each_possible_cpu(i) { + cpower = (&per_cpu(cpu_power, i)); + cpower->weight = DEFAULT_CPU_WEIGHT; + cpower->sched_power = sp; + sched_power_set_update_func(i, &cpower->update_power, + sched_power_update); + raw_spin_lock_init(&cpower->update_lock); + cpower->operating = true; + } + + return 0; +} + + +static int __init sched_power_init(void) +{ + int ret = 0; + + ret = sched_power_create_thread(&sched_power); + if (ret) + return ret; + + sched_power_setup(&sched_power); + + return ret; +} +fs_initcall(sched_power_init); diff --git a/kernel/sched/power.h b/kernel/sched/power.h new file mode 100644 index 000000000000..f08277efd50d --- /dev/null +++ b/kernel/sched/power.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Scheduler CPU power + * + * Copyright (C) 2018 Samsung + */ + +#ifndef __SCHED_POWER_H__ +#define __SCHED_POWER_H__ + +#include "sched.h" + +// struct update_sched_power { +// void (*func)(struct update_sched_power *, int, unsigned int, int); +// }; + +struct power_budget { + s64 temp; + s64 temp_limit; + s64 avail_power; +}; + +struct sched_power { + struct task_struct *thread; + struct irq_work irq_work; + struct kthread_work work; + struct kthread_worker worker; + bool work_in_progress; + struct mutex work_lock; +}; + +struct power_request { + unsigned int weight; + int cpu; + int time; +}; + +struct cpu_power { + struct update_sched_power update_power; + unsigned int max_capacity; + unsigned int capacity; + unsigned int vcapacity; + int opp_state; + u64 opp_power_cost; + unsigned long vidle; + unsigned int vrun; /* from 0..1024 (100%) */ + unsigned int weight; /* 0..1024 (100%) */ + struct sched_power *sched_power; + struct power_request req; + bool operating; + /* lock shared with thermal framework and/or cpufreq */ + raw_spinlock_t update_lock; +}; + + +#endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9683f458aec7..c1714ef73669 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2244,3 +2244,25 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned return util; } #endif + +#ifdef CONFIG_THERMAL +struct update_sched_power { + void (*func)(struct update_sched_power *, int, unsigned int, int, int); +}; +DECLARE_PER_CPU(struct update_sched_power *, update_cpu_power); + +static inline void sched_power_change_cpu_weight(int cpu, unsigned long weight, + int flags) +{ + struct update_sched_power *update; + int time = 0; + + + update = rcu_dereference_sched(*per_cpu_ptr(&update_cpu_power, cpu)); + if (update) + update->func(update, cpu, weight, flags, time); +} +#else +static inline void sched_power_change_cpu_weight(int cpu, unsigned int weight, + int flags) {} +#endif /* CONFIG_THERMAL */