[Eas-dev] [RFC 0/43 03/43] sched/power: add power and thermal governance

4 Oct 2019

This patch add new feature for the scheduler and provides
connection with thermal subsystem which grants power to
cooling devices (in DVFS - CPU devices).
.....
Signed-off-by: Lukasz Luba l.luba@partner.samsung.com
---
 include/linux/sched/power.h |  15 ++
 kernel/sched/Makefile       |   2 +-
 kernel/sched/fair.c         |   6 +
 kernel/sched/power.c        | 268 ++++++++++++++++++++++++++++++++++++
 kernel/sched/power.h        |  56 ++++++++
 kernel/sched/sched.h        |  22 +++
 6 files changed, 368 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/power.h
 create mode 100644 kernel/sched/power.c
 create mode 100644 kernel/sched/power.h

diff --git a/include/linux/sched/power.h b/include/linux/sched/power.h
new file mode 100644
index 000000000000..7827ba02a65c
--- /dev/null
+++ b/include/linux/sched/power.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Scheduler CPU power
+ *
+ *  Copyright (C) 2018 Samsung
+ */
+
+#ifndef __INC_SCHED_POWER_H__
+#define __INC_SCHED_POWER_H__
+
+
+
+int sched_power_cpu_reinit_weight(int cpu, int weight);
+
+#endif
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7fe183404c38..c1ccc0a9dc9b 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle.o fair.o rt.o deadline.o
 obj-y += wait.o wait_bit.o swait.o completion.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o power.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 908c9cdae2f0..c03c709ccc68 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4172,6 +4172,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
     */
    update_curr(cfs_rq);
+
    /*
     * Ensure that runnable average is periodically updated.
     */
@@ -6357,6 +6358,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
    }
    rcu_read_unlock();
+	if (prev_cpu != new_cpu)
+		sched_power_change_cpu_weight(new_cpu, 512, 0);
+
    return new_cpu;
 }
@@ -9658,6 +9662,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing))
    	task_tick_numa(rq, curr);
+
+	/* sched_power_change_cpu_weight(cpu_of(rq), 768, 0); */
 }
/*
diff --git a/kernel/sched/power.c b/kernel/sched/power.c
new file mode 100644
index 000000000000..c2fc0811bf37
--- /dev/null
+++ b/kernel/sched/power.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Scheduler CPU power
+ *
+ *  Copyright (C) 2018 Samsung
+ */
+
+
+#include <linux/sched.h>
+#include <linux/thermal.h>
+
+#include "power.h"
+
+#define THERMAL_REQUEST_KFIFO_SIZE	(64 * sizeof(struct power_request))
+#define DEFAULT_CPU_WEIGHT 1024
+
+static DEFINE_PER_CPU(struct cpu_power, cpu_power);
+DEFINE_PER_CPU(struct update_sched_power *, update_cpu_power);
+
+static struct sched_power sched_power;
+
+void sched_power_set_update_func(int cpu, struct update_sched_power *update,
+		void (*fn)(struct update_sched_power *, int, unsigned int, int,
+			   int))
+{
+
+	if (WARN_ON(!update || !fn))
+		return;
+
+	if (WARN_ON(per_cpu(update_cpu_power, cpu)))
+		return;
+
+	update->func = fn;
+	rcu_assign_pointer(per_cpu(update_cpu_power, cpu), update);
+}
+
+void sched_power_clean_update_func(int cpu)
+{
+	rcu_assign_pointer(per_cpu(update_cpu_power, cpu), NULL);
+}
+
+
+/////////////////////////////////////////////////////////////////////////
+
+
+unsigned int cpu_power_calc_group_weight(int cpu)
+{
+	cpumask_t *span_cpus = NULL;
+	struct cpu_power *power;
+	unsigned int w = 0;
+	int i;
+	int num_cpus;
+
+
+	num_cpus = cpumask_weight(span_cpus);
+
+	for_each_cpu(i, span_cpus) {
+		power = (&per_cpu(cpu_power, i));
+		w += power->weight;
+	}
+
+	if (num_cpus)
+		w /= num_cpus;
+
+	return w;
+}
+
+int get_state_for_power(int cpu, unsigned long power)
+{
+	/* unsigned long gr_load; */
+
+
+	return 0;
+}
+
+int cpu_power_calc_group_capacity(unsigned long gr_power, unsigned gr_weight,
+				  int cpu)
+{
+	cpumask_t *span_cpus = NULL;
+	int num_cpus;
+	struct cpu_power *power;
+	unsigned long p;
+	int i, state;
+	/* int size = 0; */
+	unsigned long max_power = 0;
+
+	num_cpus = cpumask_weight(span_cpus);
+
+	for_each_cpu(i, span_cpus) {
+		power = (&per_cpu(cpu_power, i));
+		p = gr_power * (power->weight << 10) / gr_weight;
+		p >>= 10;
+
+		if (max_power < p)
+			max_power = p;
+	}
+
+
+	state = get_state_for_power(cpu, max_power);
+
+
+	return 0;
+}
+
+int sched_power_cpu_reinit_weight(int cpu, int weight)
+{
+	struct cpu_power *cpower = &per_cpu(cpu_power, cpu);
+
+	if (!cpower->operating)
+		return -EAGAIN;
+
+	raw_spin_lock(&cpower->update_lock);
+	cpower->weight = weight;
+	raw_spin_unlock(&cpower->update_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sched_power_cpu_reinit_weight);
+
+//////////////////////////////////////////////////////////////
+
+
+static bool should_update_next_weight(int time)
+{
+	return 1;
+}
+
+static void sched_power_work(struct kthread_work *work)
+{
+	struct sched_power *sp = container_of(work, struct sched_power, work);
+	int i;
+	struct cpu_power *cpower = NULL;
+	struct power_request req;
+
+	for_each_online_cpu(i) {
+		cpower = (&per_cpu(cpu_power, i));
+		raw_spin_lock(&cpower->update_lock);
+		req = cpower->req;
+		cpower->req.time = 0;
+		raw_spin_unlock(&cpower->update_lock);
+
+		if (should_update_next_weight(req.time)) {
+			pr_info("cpower req poped\n");
+			thermal_cpu_cdev_set_weight(req.cpu, req.weight);
+		}
+	}
+
+	sp->work_in_progress = false;
+}
+
+static void sched_power_irq_work(struct irq_work *irq_work)
+{
+	struct sched_power *power;
+
+	power = container_of(irq_work, struct sched_power, irq_work);
+
+	kthread_queue_work(&power->worker, &power->work);
+}
+
+static void sched_power_update(struct update_sched_power *update, int cpu,
+			       unsigned int weight, int flags, int time)
+{
+	struct cpu_power *cpower = container_of(update, struct cpu_power,
+						   update_power);
+	struct sched_power *sp;
+
+	if (!cpower->operating)
+		return;
+
+	sp = cpower->sched_power;
+
+	/* Filter to frequent changes */
+	if (!should_update_next_weight(time))
+		return;
+
+	raw_spin_lock(&cpower->update_lock);
+	cpower->req.weight = weight;
+	cpower->req.cpu = cpu;
+	cpower->req.time = time;
+	raw_spin_unlock(&cpower->update_lock);
+
+	if (!sp->work_in_progress) {
+		sp->work_in_progress = true;
+		irq_work_queue(&sp->irq_work);
+	}
+}
+
+
+static int sched_power_create_thread(struct sched_power *power)
+{
+	int ret;
+	struct task_struct *thread;
+	struct sched_attr attr = {
+		.sched_policy = SCHED_DEADLINE,
+		.sched_nice = 0,
+		.sched_priority = 0,
+		.sched_flags = 0,
+		.sched_runtime	=  1000000,
+		.sched_deadline = 10000000,
+		.sched_period	= 10000000,
+	};
+
+	kthread_init_work(&power->work, sched_power_work);
+	kthread_init_worker(&power->worker);
+	thread = kthread_create(kthread_worker_fn, &power->worker,
+				"sched_power/a");
+
+	if (IS_ERR(thread)) {
+		pr_err("failed to create sched_power thread %ld\n",
+		       PTR_ERR(thread));
+		return PTR_ERR(thread);
+	}
+
+	ret = sched_setattr_nocheck(thread, &attr);
+	if (ret) {
+		kthread_stop(thread);
+		pr_warn("failed to set SCHED_DEADLINE for sched_power %d\n",
+			ret);
+		return ret;
+	}
+
+	power->thread = thread;
+	mutex_init(&power->work_lock);
+	init_irq_work(&power->irq_work, sched_power_irq_work);
+	wake_up_process(thread);
+
+	return 0;
+}
+
+static void sched_power_disable_thread(struct sched_power *sp)
+{
+	kthread_flush_worker(&sp->worker);
+	kthread_stop(sp->thread);
+	mutex_destroy(&sp->work_lock);
+}
+
+static int sched_power_setup(struct sched_power *sp)
+{
+	int i;
+	struct cpu_power *cpower;
+
+	for_each_possible_cpu(i) {
+		cpower = (&per_cpu(cpu_power, i));
+		cpower->weight = DEFAULT_CPU_WEIGHT;
+		cpower->sched_power = sp;
+		sched_power_set_update_func(i, &cpower->update_power,
+					    sched_power_update);
+		raw_spin_lock_init(&cpower->update_lock);
+		cpower->operating = true;
+	}
+
+	return 0;
+}
+
+
+static int __init sched_power_init(void)
+{
+	int ret = 0;
+
+	ret = sched_power_create_thread(&sched_power);
+	if (ret)
+		return ret;
+
+	sched_power_setup(&sched_power);
+
+	return ret;
+}
+fs_initcall(sched_power_init);
diff --git a/kernel/sched/power.h b/kernel/sched/power.h
new file mode 100644
index 000000000000..f08277efd50d
--- /dev/null
+++ b/kernel/sched/power.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Scheduler CPU power
+ *
+ *  Copyright (C) 2018 Samsung
+ */
+
+#ifndef __SCHED_POWER_H__
+#define __SCHED_POWER_H__
+
+#include "sched.h"
+
+// struct update_sched_power {
+// 	void (*func)(struct update_sched_power *, int, unsigned int, int);
+// };
+
+struct power_budget {
+	s64 temp;
+	s64 temp_limit;
+	s64 avail_power;
+};
+
+struct sched_power {
+	struct task_struct *thread;
+	struct irq_work irq_work;
+	struct kthread_work work;
+	struct kthread_worker worker;
+	bool work_in_progress;
+	struct mutex work_lock;
+};
+
+struct power_request {
+	unsigned int weight;
+	int cpu;
+	int time;
+};
+
+struct cpu_power {
+	struct update_sched_power update_power;
+	unsigned int max_capacity;
+	unsigned int capacity;
+	unsigned int vcapacity;
+	int opp_state;
+	u64 opp_power_cost;
+	unsigned long vidle;
+	unsigned int vrun; /* from 0..1024 (100%) */
+	unsigned int weight; /* 0..1024 (100%) */
+	struct sched_power *sched_power;
+	struct power_request req;
+	bool operating;
+	/* lock shared with thermal framework and/or cpufreq */
+	raw_spinlock_t update_lock;
+};
+
+
+#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9683f458aec7..c1714ef73669 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2244,3 +2244,25 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
    return util;
 }
 #endif
+
+#ifdef CONFIG_THERMAL
+struct update_sched_power {
+	void (*func)(struct update_sched_power *, int, unsigned int, int, int);
+};
+DECLARE_PER_CPU(struct update_sched_power *, update_cpu_power);
+
+static inline void sched_power_change_cpu_weight(int cpu, unsigned long weight,
+						 int flags)
+{
+	struct update_sched_power *update;
+	int time = 0;
+
+
+	update = rcu_dereference_sched(*per_cpu_ptr(&update_cpu_power, cpu));
+	if (update)
+		update->func(update, cpu, weight, flags, time);
+}
+#else
+static inline void sched_power_change_cpu_weight(int cpu, unsigned int weight,
+						 int flags) {}
+#endif /* CONFIG_THERMAL */
-- 
2.17.1


    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

[Eas-dev] [RFC 0/43 03/43] sched/power: add power and thermal governance