From: Vikram Mulukutla markivx@codeaurora.org
Translating utilization to frequency may not be a simple operation since on some architectures, certain frequencies represent "boost" frequencies that may allow hardware to boost frequency to beyond what is represented in software. For example, Intel x86 machines have a max frequency that is only 1MHz greater than the next highest frequency in cpufreq tables, but can provide 200MHz more capacity depending on the number of non-idle CPUs.
This is a temporary/hack patch to use a translation table in cpufreq_schedutil to translate scheduler utilization to the next_freq value in get_next_freq. The capacity values in the table are calculated by running appropriate workloads (like sysbench) at each P-state.
Signed-off-by: Vikram Mulukutla markivx@codeaurora.org --- include/linux/sched/sysctl.h | 1 + kernel/sched/cpufreq_schedutil.c | 37 ++++++++++++++++++++++++++++++++++++- kernel/sysctl.c | 9 +++++++++ 3 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 7007815..3b2dac1 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -32,6 +32,7 @@ extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_sched_use_walt_metrics; +extern unsigned int sysctl_sched_use_cap_table;
#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_migration_cost; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2eef34d..ef688216 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -107,6 +107,27 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, } }
+struct util_freq { + unsigned int util; + unsigned long freq; +}; + +static struct util_freq cap_table[] = { + {589, 3401000}, + {526, 3400000}, + {494, 3200000}, + {463, 3000000}, + {433, 2800000}, + {401, 2600000}, + {362, 2400000}, + {339, 2200000}, + {308, 2000000}, + {276, 1800000}, + {245, 1600000}, +}; + +unsigned int sysctl_sched_use_cap_table; + /** * get_next_freq - Compute a new frequency for a given cpufreq policy. * @policy: cpufreq policy object to compute the new frequency for. @@ -132,8 +153,21 @@ static unsigned int get_next_freq(struct cpufreq_policy *policy, arch_scale_freq_invariant()); unsigned int freq = invariant ? policy->cpuinfo.max_freq : policy->cur; + unsigned int next_freq; + int j = 1; + + if (sysctl_sched_use_cap_table) { + if (!invariant) + util = (util * policy->cur) / policy->cpuinfo.max_freq; + util += util >> 2; + while ((j < ARRAY_SIZE(cap_table)) && (util < cap_table[j].util)) + j++; + next_freq = cap_table[j-1].freq; + } else { + next_freq = (freq + (freq >> 2)) * util / max; + }
- return (freq + (freq >> 2)) * util / max; + return next_freq; }
static void sugov_update_single(struct update_util_data *hook, u64 time, @@ -150,6 +184,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : get_next_freq(policy, util, max); sugov_update_commit(sg_policy, time, next_f); + }
static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4669a34..2605758 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -448,6 +448,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "sched_use_cap_table", + .data = &sysctl_sched_use_cap_table, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #ifdef CONFIG_CFS_BANDWIDTH { .procname = "sched_cfs_bandwidth_slice_us", -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project