On 8/29/25 11:11, Lin Yikai wrote:
Add test to verify cpuidle governor ext's load, attach, and kfuncs.
This patch also provides a simple demonstration of `cpuidle_gov_ext_ops` usage:
- In `ops.init()`, we set the "rating" value to 60 - significantly exceeding other governors' ratings - to activate `cpuidle_gov_ext`.
- For specific scenarios (e.g., screen-off music playback on mobile devices), we can enable "expect_deeper" to transition to deeper idle states.
This implementation serves as a foundation, not a final solution. We can explore further exploration of cpuidle strategies optimized for various usage scenarios.
This is the example cpuidle BPF governor I guess, just some very first comments.
patch title should mention "cpuidle governor" somehow.
Signed-off-by: Lin Yikai yikai.lin@vivo.com
.../bpf/prog_tests/test_cpuidle_gov_ext.c | 28 +++ .../selftests/bpf/progs/cpuidle_gov_ext.c | 208 ++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c create mode 100644 tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c
diff --git a/tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c b/tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c new file mode 100644 index 000000000000..8b35771ada44 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- test_cpuidle_gov_ext.c - test cpuidle governor ext's load, attach and kfuncs
- Copyright (C) Yikai Lin yikai.lin@vivo.com
- */
+#include <test_progs.h> +#include "cpuidle_gov_ext.skel.h"
+void test_test_cpuidle_gov_ext(void) +{
- struct cpuidle_gov_ext *skel;
- int err;
- skel = cpuidle_gov_ext__open_and_load();
- if (!ASSERT_OK_PTR(skel, "cpuidle_gov_ext__open_and_load"))
return;
- skel->bss->expect_deeper = 1;
- err = cpuidle_gov_ext__attach(skel);
- if (!ASSERT_OK(err, "cpuidle_gov_ext__attach"))
goto cleanup;
+cleanup:
- cpuidle_gov_ext__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c b/tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c new file mode 100644 index 000000000000..62d5a9bc8cb3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- cpuidle_gov_ext.c - test to use cpuidle governor ext by bpf
- Copyright (C) Yikai Lin yikai.lin@vivo.com
- */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h>
+char LICENSE[] SEC("license") = "GPL";
+#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif
+#define ALPHA 10 +#define ALPHA_SCALE 100 +#define FIT_FACTOR 90
+/*
- For some low-power scenarios,
- such as the screen off scenario of mobile devices
- (which will be determined by the user-space BPF program),
- we aim to choose a deeper state
- At this point, we will somewhat disregard the impact on CPU performance.
- */
+int expect_deeper = 0;
+int bpf_cpuidle_ext_gov_update_rating(unsigned int rating) __ksym __weak; +s64 bpf_cpuidle_ext_gov_latency_req(unsigned int cpu) __ksym __weak; +s64 bpf_tick_nohz_get_sleep_length(void) __ksym __weak;
These would need proper headers, every BPF cpuidle governor would need them, no?
+struct cpuidle_gov_data {
- int cpu;
- int last_idx;
- u64 last_pred;
- u64 last_duration;
- u64 next_pred;
+};
+struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __uint(max_entries, 1);
- __type(key, u32);
- __type(value, struct cpuidle_gov_data);
+} cpuidle_gov_data_map SEC(".maps");
+static u64 calculate_ewma(u64 last, u64 new, u32 alpha, u32 alpha_scale) +{
- return (alpha * new + (alpha_scale - alpha) * last) / alpha_scale;
+}
+static void update_predict_duration(struct cpuidle_gov_data *data,
struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
- int idx;
- struct cpuidle_state target;
- if (!data || !drv || !dev)
return;
- idx = data->last_idx;
- data->last_duration = dev->last_residency_ns;
- if (idx > 0) {
bpf_core_read(&target, sizeof(target), &drv->states[idx]);
if (data->last_duration > target.exit_latency)
data->last_duration -= target.exit_latency;
- }
- data->last_pred = data->next_pred;
- data->next_pred = calculate_ewma(data->next_pred,
data->last_duration, ALPHA, ALPHA_SCALE);
+}
+/* Enable the cpuidle governor */ +SEC("struct_ops.s/enable") +int BPF_PROG(bpf_cpuidle_enable, struct cpuidle_driver *drv, struct cpuidle_device *dev) +{
- u32 key = 0;
- struct cpuidle_gov_data *data;
- bpf_printk("cpuidle_gov_ext: enabled");
- data = bpf_map_lookup_percpu_elem(&cpuidle_gov_data_map, &key, dev->cpu);
- if (!data)
return 0;
- __builtin_memset(data, 0, sizeof(struct cpuidle_gov_data));
- data->cpu = dev->cpu;
- return 0;
+}
+/* Disable the cpuidle governor */ +SEC("struct_ops.s/disable") +void BPF_PROG(bpf_cpuidle_disable, struct cpuidle_driver *drv, struct cpuidle_device *dev) +{
- bpf_printk("cpuidle_gov_ext: disabled");
+}
+/* Select the next idle state */ +SEC("struct_ops.s/select") +int BPF_PROG(bpf_cpuidle_select, struct cpuidle_driver *drv, struct cpuidle_device *dev) +{
- u32 key = 0;
- s64 delta, latency_req, residency_ns;
- int i, selected;
- unsigned long long disable = 0;
- struct cpuidle_gov_data *data;
- struct cpuidle_state cs;
- data = bpf_map_lookup_percpu_elem(&cpuidle_gov_data_map, &key, dev->cpu);
- if (!data) {
bpf_printk("cpuidle_gov_ext: [%s] cpuidle_gov_data_map is NULL\n", __func__);
return 0;
- }
- latency_req = bpf_cpuidle_ext_gov_latency_req(dev->cpu);
- delta = bpf_tick_nohz_get_sleep_length();
- update_predict_duration(data, drv, dev);
- for (i = ARRAY_SIZE(drv->states)-1; i > 0; i--) {
if (i > drv->state_count-1)
continue;
When is this true?
bpf_core_read(&cs, sizeof(cs), &drv->states[i]);
bpf_core_read(&disable, sizeof(disable), &dev->states_usage[i]);
Is there no nicer way of doing this than reading it every select() call?
if (disable)
continue;
if (latency_req < cs.exit_latency_ns)
continue;
if (delta < cs.target_residency_ns)
continue;
if (data->next_pred / FIT_FACTOR * ALPHA_SCALE < cs.target_residency_ns)
continue;
break;
- }
- residency_ns = drv->states[i].target_residency_ns;
- if (expect_deeper &&
i < drv->state_count - 1 &&
data->last_pred >= residency_ns &&
data->next_pred < residency_ns &&
data->next_pred / FIT_FACTOR * ALPHA_SCALE >= residency_ns &&
data->next_pred / FIT_FACTOR * ALPHA_SCALE >= data->last_duration &&
delta > residency_ns) {
i++;
i++ might be disabled though.
- }
- selected = i;
Just return i?
- return selected;
+}
+//enable or disable scheduling tick after selecting cpuidle state +SEC("struct_ops.s/set_stop_tick") +bool BPF_PROG(bpf_cpuidle_set_stop_tick) +{
- return false;
+}
+/* Reflect function called after entering an idle state */ +SEC("struct_ops.s/reflect") +void BPF_PROG(bpf_cpuidle_reflect, struct cpuidle_device *dev, int index) +{
- u32 key = 0;
- struct cpuidle_gov_data *data;
- data = bpf_map_lookup_percpu_elem(&cpuidle_gov_data_map, &key, dev->cpu);
- if (!data) {
bpf_printk("cpuidle_gov_ext: [%s] cpuidle_gov_data_map is NULL\n", __func__);
return;
- }
- data->last_idx = index;
+}
+/* Initialize the BPF cpuidle governor */ +SEC("struct_ops.s/init") +int BPF_PROG(bpf_cpuidle_init) +{
- int ret = bpf_cpuidle_ext_gov_update_rating(60);
- return ret;
unnecessary ret.
+}
+/* Cleanup after the BPF cpuidle governor */ +SEC("struct_ops.s/exit") +void BPF_PROG(bpf_cpuidle_exit) { }
+/* Struct_ops linkage for cpuidle governor */ +SEC(".struct_ops.link") +struct cpuidle_gov_ext_ops ops = {
- .enable = (void *)bpf_cpuidle_enable,
- .disable = (void *)bpf_cpuidle_disable,
- .select = (void *)bpf_cpuidle_select,
- .set_stop_tick = (void *)bpf_cpuidle_set_stop_tick,
- .reflect = (void *)bpf_cpuidle_reflect,
- .init = (void *)bpf_cpuidle_init,
- .exit = (void *)bpf_cpuidle_exit,
- .name = "BPF_cpuidle_gov"
+};