Hi Rafael,
As suggested, I have kept only what you suggested and fixed them based on your review comments.
@Juri and Shilpa: I have also added your Tested-by's, please let me know if you don't prefer to add them here.
V3->V4: - Keep only relevant patches for lockdep, will send cleanups patches separately. - kobj_name is dropped - Comment over update_sampling_rate is kept intact, with minor additions. - s/global/common for common tunables
Viresh Kumar (6): cpufreq: governor: Create generic macro for global tuners cpufreq: governor: Move common tunables to 'struct dbs_data' cpufreq: governor: New sysfs show/store callbacks for governor tunables cpufreq: governor: Drop unused macros for creating governor tunable attributes Revert "cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT" cpufreq: governor: Create and traverse list of policy_dbs to fix lockdep
drivers/cpufreq/cpufreq.c | 5 - drivers/cpufreq/cpufreq_conservative.c | 102 +++++++---------- drivers/cpufreq/cpufreq_governor.c | 127 ++++++++++++++------- drivers/cpufreq/cpufreq_governor.h | 143 ++++++++---------------- drivers/cpufreq/cpufreq_ondemand.c | 194 ++++++++++++--------------------- include/linux/cpufreq.h | 4 - 6 files changed, 239 insertions(+), 336 deletions(-)
Some tunables are present in governor specific structures, whereas one (min_sampling_rate) is present in global 'struct dbs_data'.
The macro for that is pretty much specific to sampling_rate_min for now. Next patch would be moving few more tunables to the 'struct dbs_data', then it would be useful to create a generic macro for such cases.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com --- drivers/cpufreq/cpufreq_conservative.c | 8 ++++---- drivers/cpufreq/cpufreq_governor.h | 36 +++++++++++++++++++--------------- drivers/cpufreq/cpufreq_ondemand.c | 8 ++++---- 3 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 1a899bb7d1a4..a69eb7eae7ec 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -245,7 +245,7 @@ show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); -declare_show_sampling_rate_min(cs); +show_one_common(cs, min_sampling_rate);
gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(sampling_down_factor); @@ -253,10 +253,10 @@ gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(down_threshold); gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(sampling_rate_min); +gov_sys_pol_attr_ro(min_sampling_rate);
static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, + &min_sampling_rate_gov_sys.attr, &sampling_rate_gov_sys.attr, &sampling_down_factor_gov_sys.attr, &up_threshold_gov_sys.attr, @@ -272,7 +272,7 @@ static struct attribute_group cs_attr_group_gov_sys = { };
static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, + &min_sampling_rate_gov_pol.attr, &sampling_rate_gov_pol.attr, &sampling_down_factor_gov_pol.attr, &up_threshold_gov_pol.attr, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 95e6834d36a8..62bafac5798b 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -110,6 +110,26 @@ static ssize_t store_##file_name##_gov_pol \ show_one(_gov, file_name); \ store_one(_gov, file_name)
+#define show_one_common(_gov, file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} + +#define show_store_one_common(_gov, file_name) \ +show_one_common(_gov, file_name); \ +store_one(_gov, file_name) + /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ @@ -264,22 +284,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) return delay; }
-#define declare_show_sampling_rate_min(_gov) \ -static ssize_t show_sampling_rate_min_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} \ - \ -static ssize_t show_sampling_rate_min_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} - extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index b7ef2e7f4d4a..8c44bc3fffc5 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -443,7 +443,7 @@ show_store_one(od, up_threshold); show_store_one(od, sampling_down_factor); show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); -declare_show_sampling_rate_min(od); +show_one_common(od, min_sampling_rate);
gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(io_is_busy); @@ -451,10 +451,10 @@ gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(sampling_down_factor); gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(sampling_rate_min); +gov_sys_pol_attr_ro(min_sampling_rate);
static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, + &min_sampling_rate_gov_sys.attr, &sampling_rate_gov_sys.attr, &up_threshold_gov_sys.attr, &sampling_down_factor_gov_sys.attr, @@ -470,7 +470,7 @@ static struct attribute_group od_attr_group_gov_sys = { };
static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, + &min_sampling_rate_gov_pol.attr, &sampling_rate_gov_pol.attr, &up_threshold_gov_pol.attr, &sampling_down_factor_gov_pol.attr,
There are few more common tunables shared across ondemand and conservative governors. Move them to 'struct dbs_data' to simplify code.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com --- drivers/cpufreq/cpufreq_conservative.c | 38 ++++++++++++++----------------- drivers/cpufreq/cpufreq_governor.c | 37 ++++++------------------------ drivers/cpufreq/cpufreq_governor.h | 14 +++++------- drivers/cpufreq/cpufreq_ondemand.c | 41 +++++++++++++++------------------- 4 files changed, 47 insertions(+), 83 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a69eb7eae7ec..4f640b028c94 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -60,7 +60,7 @@ static void cs_check_cpu(int cpu, unsigned int load) return;
/* Check for frequency increase */ - if (load > cs_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { dbs_info->down_skip = 0;
/* if we are already at full speed then break out early */ @@ -78,7 +78,7 @@ static void cs_check_cpu(int cpu, unsigned int load) }
/* if sampling_down_factor is active break out early */ - if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) + if (++dbs_info->down_skip < dbs_data->sampling_down_factor) return; dbs_info->down_skip = 0;
@@ -107,10 +107,9 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
dbs_check_cpu(policy); - return delay_for_sampling_rate(cs_tuners->sampling_rate); + return delay_for_sampling_rate(dbs_data->sampling_rate); }
static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -126,7 +125,6 @@ static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -134,14 +132,13 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL;
- cs_tuners->sampling_down_factor = input; + dbs_data->sampling_down_factor = input; return count; }
static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -149,7 +146,7 @@ static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, if (ret != 1) return -EINVAL;
- cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); + dbs_data->sampling_rate = max(input, dbs_data->min_sampling_rate); return count; }
@@ -164,7 +161,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) return -EINVAL;
- cs_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; }
@@ -178,7 +175,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
/* cannot be lower than 11 otherwise freq will not fall */ if (ret != 1 || input < 11 || input > 100 || - input >= cs_tuners->up_threshold) + input >= dbs_data->up_threshold) return -EINVAL;
cs_tuners->down_threshold = input; @@ -188,7 +185,6 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input, j; int ret;
@@ -199,10 +195,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1;
- if (input == cs_tuners->ignore_nice_load) /* nothing to do */ + if (input == dbs_data->ignore_nice_load) /* nothing to do */ return count;
- cs_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input;
/* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -210,7 +206,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_info = &per_cpu(cs_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, 0); - if (cs_tuners->ignore_nice_load) + if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } @@ -239,12 +235,12 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; }
-show_store_one(cs, sampling_rate); -show_store_one(cs, sampling_down_factor); -show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); -show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); +show_store_one_common(cs, sampling_rate); +show_store_one_common(cs, sampling_down_factor); +show_store_one_common(cs, up_threshold); +show_store_one_common(cs, ignore_nice_load); show_one_common(cs, min_sampling_rate);
gov_sys_pol_attr_rw(sampling_rate); @@ -299,11 +295,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) return -ENOMEM; }
- tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; tuners->freq_step = DEF_FREQUENCY_STEP; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0;
dbs_data->tuners = tuners; dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 9c1dfcee0d57..b168a32cc8f0 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -38,10 +38,9 @@ void dbs_check_cpu(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - unsigned int sampling_rate; + unsigned int sampling_rate = dbs_data->sampling_rate; + unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int ignore_nice; unsigned int j;
if (gov->governor == GOV_ONDEMAND) { @@ -54,13 +53,8 @@ void dbs_check_cpu(struct cpufreq_policy *policy) * the 'sampling_rate', so as to keep the wake-up-from-idle * detection logic a bit conservative. */ - sampling_rate = od_tuners->sampling_rate; sampling_rate *= od_dbs_info->rate_mult;
- ignore_nice = od_tuners->ignore_nice_load; - } else { - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; }
/* Get Absolute Load */ @@ -279,19 +273,6 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, atomic_dec(&policy_dbs->skip_work); }
-static void set_sampling_rate(struct dbs_data *dbs_data, - struct dbs_governor *gov, - unsigned int sampling_rate) -{ - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - cs_tuners->sampling_rate = sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - od_tuners->sampling_rate = sampling_rate; - } -} - static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -383,8 +364,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate, - latency * LATENCY_MULTIPLIER)); + dbs_data->sampling_rate = max(dbs_data->min_sampling_rate, + LATENCY_MULTIPLIER * latency);
if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; @@ -456,16 +437,12 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (policy_dbs->policy) return -EBUSY;
- if (gov->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + sampling_rate = dbs_data->sampling_rate; + ignore_nice = dbs_data->ignore_nice_load;
- sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; - } else { + if (gov->governor == GOV_ONDEMAND) { struct od_dbs_tuners *od_tuners = dbs_data->tuners;
- sampling_rate = od_tuners->sampling_rate; - ignore_nice = od_tuners->ignore_nice_load; io_busy = od_tuners->io_is_busy; }
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 62bafac5798b..17d2c4282dcd 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -153,9 +153,13 @@ static void *get_cpu_dbs_info_s(int cpu) \
/* Governor demand based switching data (per-policy or global). */ struct dbs_data { - unsigned int min_sampling_rate; int usage_count; void *tuners; + unsigned int min_sampling_rate; + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; };
/* Common to all CPUs of a policy */ @@ -216,19 +220,11 @@ struct cs_cpu_dbs_info_s {
/* Per policy Governors sysfs tunables */ struct od_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; unsigned int powersave_bias; unsigned int io_is_busy; };
struct cs_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; unsigned int down_threshold; unsigned int freq_step; }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 8c44bc3fffc5..13c64b662fa1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -110,7 +110,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, dbs_info->freq_lo_jiffies = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); + jiffies_total = usecs_to_jiffies(dbs_data->sampling_rate); jiffies_hi = (freq_avg - freq_lo) * jiffies_total; jiffies_hi += ((freq_hi - freq_lo) / 2); jiffies_hi /= (freq_hi - freq_lo); @@ -161,11 +161,10 @@ static void od_check_cpu(int cpu, unsigned int load) dbs_info->freq_lo = 0;
/* Check for frequency increase */ - if (load > od_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) - dbs_info->rate_mult = - od_tuners->sampling_down_factor; + dbs_info->rate_mult = dbs_data->sampling_down_factor; dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ @@ -195,7 +194,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type;
/* Common NORMAL_SAMPLE setup */ @@ -214,7 +212,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) }
if (!delay) - delay = delay_for_sampling_rate(od_tuners->sampling_rate + delay = delay_for_sampling_rate(dbs_data->sampling_rate * dbs_info->rate_mult);
return delay; @@ -239,11 +237,10 @@ static struct dbs_governor od_dbs_gov; static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cpumask cpumask; int cpu;
- od_tuners->sampling_rate = new_rate = max(new_rate, + dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate);
/* @@ -348,7 +345,6 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -358,21 +354,20 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, return -EINVAL; }
- od_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; }
static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input, j; int ret; ret = sscanf(buf, "%u", &input);
if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - od_tuners->sampling_down_factor = input; + dbs_data->sampling_down_factor = input;
/* Reset down sampling multiplier in case it was active */ for_each_online_cpu(j) { @@ -399,10 +394,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1;
- if (input == od_tuners->ignore_nice_load) { /* nothing to do */ + if (input == dbs_data->ignore_nice_load) { /* nothing to do */ return count; } - od_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input;
/* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -410,7 +405,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); - if (od_tuners->ignore_nice_load) + if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
@@ -437,12 +432,12 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, return count; }
-show_store_one(od, sampling_rate); show_store_one(od, io_is_busy); -show_store_one(od, up_threshold); -show_store_one(od, sampling_down_factor); -show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); +show_store_one_common(od, sampling_rate); +show_store_one_common(od, up_threshold); +show_store_one_common(od, sampling_down_factor); +show_store_one_common(od, ignore_nice_load); show_one_common(od, min_sampling_rate);
gov_sys_pol_attr_rw(sampling_rate); @@ -504,7 +499,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) put_cpu(); if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ - tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred @@ -512,15 +507,15 @@ static int od_init(struct dbs_data *dbs_data, bool notify) */ dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; } else { - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
/* For correct statistics, we need 10 ticks for each measure */ dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); }
- tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; tuners->io_is_busy = should_io_be_busy();
The ondemand and conservative governors use the global-attr or freq-attr structures to represent sysfs attributes corresponding to their tunables (which of them is actually used depends on whether or not different policy objects can use the same governor with different tunables at the same time and, consequently, on where those attributes are located in sysfs).
Unfortunately, in the freq-attr case, the standard cpufreq show/store sysfs attribute callbacks are applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That may lead to an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely).
We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. Therefore policy->rwsem cannot be dropped in cpufreq_set_policy() at any point, but the deadlock situation described above must be avoided too.
To that end, use the observation that in principle governor tunables may be represented by the same data type regardless of whether the governor is system-wide or per-policy and introduce a new structure, struct governor_attr, for representing them and new corresponding macros for creating show/store sysfs callbacks for them. Also make their parent kobject use a new kobject type whose default show/store callbacks are not related to the standard core cpufreq ones in any way (and they don't acquire policy->rwsem in particular).
[ Rafael: Written changelog ] Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com --- drivers/cpufreq/cpufreq_conservative.c | 72 ++++++++++++---------------------- drivers/cpufreq/cpufreq_governor.c | 68 ++++++++++++++++++++++++++++---- drivers/cpufreq/cpufreq_governor.h | 39 +++++++++++++++++- drivers/cpufreq/cpufreq_ondemand.c | 72 ++++++++++++---------------------- 4 files changed, 147 insertions(+), 104 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4f640b028c94..ed081dbce00c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -235,54 +235,33 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; }
-show_store_one(cs, down_threshold); -show_store_one(cs, freq_step); -show_store_one_common(cs, sampling_rate); -show_store_one_common(cs, sampling_down_factor); -show_store_one_common(cs, up_threshold); -show_store_one_common(cs, ignore_nice_load); -show_one_common(cs, min_sampling_rate); - -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(down_threshold); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(min_sampling_rate); - -static struct attribute *dbs_attributes_gov_sys[] = { - &min_sampling_rate_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &up_threshold_gov_sys.attr, - &down_threshold_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &freq_step_gov_sys.attr, +gov_show_one_common(sampling_rate); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(up_threshold); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(cs, down_threshold); +gov_show_one(cs, freq_step); + +gov_attr_rw(sampling_rate); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(up_threshold); +gov_attr_rw(ignore_nice_load); +gov_attr_ro(min_sampling_rate); +gov_attr_rw(down_threshold); +gov_attr_rw(freq_step); + +static struct attribute *cs_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice_load.attr, + &freq_step.attr, NULL };
-static struct attribute_group cs_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "conservative", -}; - -static struct attribute *dbs_attributes_gov_pol[] = { - &min_sampling_rate_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &up_threshold_gov_pol.attr, - &down_threshold_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &freq_step_gov_pol.attr, - NULL -}; - -static struct attribute_group cs_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "conservative", -}; - /************************** sysfs end ************************/
static int cs_init(struct dbs_data *dbs_data, bool notify) @@ -331,8 +310,7 @@ static struct dbs_governor cs_dbs_gov = { .owner = THIS_MODULE, }, .governor = GOV_CONSERVATIVE, - .attr_group_gov_sys = &cs_attr_group_gov_sys, - .attr_group_gov_pol = &cs_attr_group_gov_pol, + .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b168a32cc8f0..bba9d3fb8103 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,12 +25,58 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex);
-static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) +static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { - return have_governor_per_policy() ? - gov->attr_group_gov_pol : gov->attr_group_gov_sys; + return container_of(kobj, struct dbs_data, kobj); }
+static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EIO; + + if (gattr->show) + ret = gattr->show(dbs_data, buf); + + return ret; +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EIO; + + mutex_lock(&dbs_data->mutex); + + if (gattr->store) + ret = gattr->store(dbs_data, buf, count); + + mutex_unlock(&dbs_data->mutex); + + return ret; +} + +/* + * Sysfs Ops for accessing governor attributes. + * + * All show/store invocations for governor specific sysfs attributes, will first + * call the below show/store callbacks and the attribute specific callback will + * be called from within it. + */ +static const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; + void dbs_check_cpu(struct cpufreq_policy *policy) { int cpu = policy->cpu; @@ -351,6 +397,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) }
dbs_data->usage_count = 1; + mutex_init(&dbs_data->mutex);
ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) @@ -373,10 +420,15 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs;
- ret = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(gov)); - if (ret) + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, + get_governor_parent_kobj(policy), + gov->gov.name); + if (ret) { + pr_err("cpufreq: Governor initialization failed (dbs_data kobject initialization error %d)\n", + ret); goto reset_gdbs_data; + }
return 0;
@@ -404,8 +456,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) return -EBUSY;
if (!--dbs_data->usage_count) { - sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(gov)); + kobject_put(&dbs_data->kobj);
policy->governor_data = NULL;
@@ -413,6 +464,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) gov->gdbs_data = NULL;
gov->exit(dbs_data, policy->governor->initialized == 1); + mutex_destroy(&dbs_data->mutex); kfree(dbs_data); } else { policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 17d2c4282dcd..32b131bb2081 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -160,8 +160,44 @@ struct dbs_data { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; + + struct kobject kobj; + /* Protect concurrent updates to governor tunables from sysfs */ + struct mutex mutex; +}; + +/* Governor's specific attributes */ +struct dbs_data; +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct dbs_data *dbs_data, char *buf); + ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, + size_t count); };
+#define gov_show_one(_gov, file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ +{ \ + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} + +#define gov_show_one_common(file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} + +#define gov_attr_ro(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define gov_attr_rw(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + /* Common to all CPUs of a policy */ struct policy_dbs_info { struct cpufreq_policy *policy; @@ -236,8 +272,7 @@ struct dbs_governor { #define GOV_ONDEMAND 0 #define GOV_CONSERVATIVE 1 int governor; - struct attribute_group *attr_group_gov_sys; /* one governor - system */ - struct attribute_group *attr_group_gov_pol; /* one governor - policy */ + struct kobj_type kobj_type;
/* * Common data for platforms that don't set diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 13c64b662fa1..e36792f60348 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -432,54 +432,33 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, return count; }
-show_store_one(od, io_is_busy); -show_store_one(od, powersave_bias); -show_store_one_common(od, sampling_rate); -show_store_one_common(od, up_threshold); -show_store_one_common(od, sampling_down_factor); -show_store_one_common(od, ignore_nice_load); -show_one_common(od, min_sampling_rate); - -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(io_is_busy); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(min_sampling_rate); - -static struct attribute *dbs_attributes_gov_sys[] = { - &min_sampling_rate_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &up_threshold_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &powersave_bias_gov_sys.attr, - &io_is_busy_gov_sys.attr, +gov_show_one_common(sampling_rate); +gov_show_one_common(up_threshold); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(od, io_is_busy); +gov_show_one(od, powersave_bias); + +gov_attr_rw(sampling_rate); +gov_attr_rw(io_is_busy); +gov_attr_rw(up_threshold); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(ignore_nice_load); +gov_attr_rw(powersave_bias); +gov_attr_ro(min_sampling_rate); + +static struct attribute *od_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, NULL };
-static struct attribute_group od_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "ondemand", -}; - -static struct attribute *dbs_attributes_gov_pol[] = { - &min_sampling_rate_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &up_threshold_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &powersave_bias_gov_pol.attr, - &io_is_busy_gov_pol.attr, - NULL -}; - -static struct attribute_group od_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "ondemand", -}; - /************************** sysfs end ************************/
static int od_init(struct dbs_data *dbs_data, bool notify) @@ -544,8 +523,7 @@ static struct dbs_governor od_dbs_gov = { .owner = THIS_MODULE, }, .governor = GOV_ONDEMAND, - .attr_group_gov_sys = &od_attr_group_gov_sys, - .attr_group_gov_pol = &od_attr_group_gov_pol, + .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer,
"The previous commit introduced a new set of macros for creating sysfs attributes that represent governor tunables and the old macros used for this purpose are not needed any more, so drop them."
[ Rafael: Written changelog ] Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com --- drivers/cpufreq/cpufreq_governor.h | 89 -------------------------------------- 1 file changed, 89 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 32b131bb2081..d46ebcb4f16d 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -41,95 +41,6 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
-/* - * Macro for creating governors sysfs routines - * - * - gov_sys: One governor instance per whole system - * - gov_pol: One governor instance per policy - */ - -/* Create attributes */ -#define gov_sys_attr_ro(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) - -#define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) - -#define gov_pol_attr_ro(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) - -#define gov_pol_attr_rw(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) - -#define gov_sys_pol_attr_rw(_name) \ - gov_sys_attr_rw(_name); \ - gov_pol_attr_rw(_name) - -#define gov_sys_pol_attr_ro(_name) \ - gov_sys_attr_ro(_name); \ - gov_pol_attr_ro(_name) - -/* Create show/store routines */ -#define show_one(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct _gov##_dbs_tuners *tuners = _gov##_dbs_gov.gdbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} - -#define store_one(_gov, file_name) \ -static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return store_##file_name(dbs_data, buf, count); \ -} \ - \ -static ssize_t store_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, const char *buf, size_t count) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - return store_##file_name(policy_dbs->dbs_data, buf, count); \ -} - -#define show_store_one(_gov, file_name) \ -show_one(_gov, file_name); \ -store_one(_gov, file_name) - -#define show_one_common(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - return sprintf(buf, "%u\n", dbs_data->file_name); \ -} - -#define show_store_one_common(_gov, file_name) \ -show_one_common(_gov, file_name); \ -store_one(_gov, file_name) - /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \
Earlier, when the struct freq-attr was used to represent governor attributes, the standard cpufreq show/store sysfs attribute callbacks were applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That could have resulted in an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely).
We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time.
The previous commit, "cpufreq: governor: New sysfs show/store callbacks for governor tunables", fixed the original ABBA deadlock by adding new governor specific show/store callbacks.
We don't have to drop rwsem around invocations of governor event CPUFREQ_GOV_POLICY_EXIT anymore, and original fix can be reverted now.
Fixes: 955ef4833574 ("cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT") Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Reported-by: Juri Lelli juri.lelli@arm.com Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com --- drivers/cpufreq/cpufreq.c | 5 ----- include/linux/cpufreq.h | 4 ---- 2 files changed, 9 deletions(-)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 29755fcbf200..9c62bf35b9dc 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2204,10 +2204,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; }
- up_write(&policy->rwsem); ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); - if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2223,9 +2220,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, if (!ret) goto out;
- up_write(&policy->rwsem); __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); }
/* new governor failed, so re-start old one */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 11aa93134493..2a029c587975 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -100,10 +100,6 @@ struct cpufreq_policy { * - Any routine that will write to the policy structure and/or may take away * the policy altogether (eg. CPU hotplug), will hold this lock in write * mode before doing so. - * - * Additional rules: - * - Lock should not be held across - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); */ struct rw_semaphore rwsem;
An instance of 'struct dbs_data' can support multiple 'struct policy_dbs_info' instances. To traverse all policy_dbs supported by a dbs_data, create a list of policy_dbs within dbs_data.
We can traverse this list now, instead of traversing the loop for all online CPUs in update_sampling_rate(), to solve the circular dependency lockdep reported by Juri (and verified by Shilpa) earlier:
====================================================== [ INFO: possible circular locking dependency detected ] 4.4.0+ #445 Not tainted ------------------------------------------------------- trace.sh/1723 is trying to acquire lock: (s_active#48){++++.+}, at: [<c01f78c8>] kernfs_remove_by_name_ns+0x4c/0x94
but task is already holding lock: (od_dbs_cdata.mutex){+.+.+.}, at: [<c05824a0>] cpufreq_governor_dbs+0x34/0x5d4
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (od_dbs_cdata.mutex){+.+.+.}: [<c075b040>] mutex_lock_nested+0x7c/0x434 [<c05824a0>] cpufreq_governor_dbs+0x34/0x5d4 [<c0017c10>] return_to_handler+0x0/0x18
-> #1 (&policy->rwsem){+++++.}: [<c075ca8c>] down_read+0x58/0x94 [<c057c244>] show+0x30/0x60 [<c01f934c>] sysfs_kf_seq_show+0x90/0xfc [<c01f7ad8>] kernfs_seq_show+0x34/0x38 [<c01a22ec>] seq_read+0x1e4/0x4e4 [<c01f8694>] kernfs_fop_read+0x120/0x1a0 [<c01794b4>] __vfs_read+0x3c/0xe0 [<c017a378>] vfs_read+0x98/0x104 [<c017a434>] SyS_read+0x50/0x90 [<c000fd40>] ret_fast_syscall+0x0/0x1c
-> #0 (s_active#48){++++.+}: [<c008238c>] lock_acquire+0xd4/0x20c [<c01f6ae4>] __kernfs_remove+0x288/0x328 [<c01f78c8>] kernfs_remove_by_name_ns+0x4c/0x94 [<c01fa024>] remove_files+0x44/0x88 [<c01fa5a4>] sysfs_remove_group+0x50/0xa4 [<c058285c>] cpufreq_governor_dbs+0x3f0/0x5d4 [<c0017c10>] return_to_handler+0x0/0x18
other info that might help us debug this:
Chain exists of: s_active#48 --> &policy->rwsem --> od_dbs_cdata.mutex
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(od_dbs_cdata.mutex); lock(&policy->rwsem); lock(od_dbs_cdata.mutex); lock(s_active#48);
*** DEADLOCK ***
5 locks held by trace.sh/1723: #0: (sb_writers#6){.+.+.+}, at: [<c017beb8>] __sb_start_write+0xb4/0xc0 #1: (&of->mutex){+.+.+.}, at: [<c01f8418>] kernfs_fop_write+0x6c/0x1c8 #2: (s_active#35){.+.+.+}, at: [<c01f8420>] kernfs_fop_write+0x74/0x1c8 #3: (cpu_hotplug.lock){++++++}, at: [<c0029e6c>] get_online_cpus+0x48/0xb8 #4: (od_dbs_cdata.mutex){+.+.+.}, at: [<c05824a0>] cpufreq_governor_dbs+0x34/0x5d4
stack backtrace: CPU: 2 PID: 1723 Comm: trace.sh Not tainted 4.4.0+ #445 Hardware name: ARM-Versatile Express [<c001883c>] (unwind_backtrace) from [<c0013f50>] (show_stack+0x20/0x24) [<c0013f50>] (show_stack) from [<c044ad90>] (dump_stack+0x80/0xb4) [<c044ad90>] (dump_stack) from [<c0128edc>] (print_circular_bug+0x29c/0x2f0) [<c0128edc>] (print_circular_bug) from [<c0081708>] (__lock_acquire+0x163c/0x1d74) [<c0081708>] (__lock_acquire) from [<c008238c>] (lock_acquire+0xd4/0x20c) [<c008238c>] (lock_acquire) from [<c01f6ae4>] (__kernfs_remove+0x288/0x328) [<c01f6ae4>] (__kernfs_remove) from [<c01f78c8>] (kernfs_remove_by_name_ns+0x4c/0x94) [<c01f78c8>] (kernfs_remove_by_name_ns) from [<c01fa024>] (remove_files+0x44/0x88) [<c01fa024>] (remove_files) from [<c01fa5a4>] (sysfs_remove_group+0x50/0xa4) [<c01fa5a4>] (sysfs_remove_group) from [<c058285c>] (cpufreq_governor_dbs+0x3f0/0x5d4) [<c058285c>] (cpufreq_governor_dbs) from [<c0017c10>] (return_to_handler+0x0/0x18)
This also updates the comment above update_sampling_rate() to make it more relevant to the current state of code.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Reported-by: Juri Lelli juri.lelli@arm.com Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com --- drivers/cpufreq/cpufreq_governor.c | 22 ++++++++-- drivers/cpufreq/cpufreq_governor.h | 7 ++- drivers/cpufreq/cpufreq_ondemand.c | 89 +++++++++++++------------------------- 3 files changed, 54 insertions(+), 64 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index bba9d3fb8103..8e53f804a5af 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -384,9 +384,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = -EINVAL; goto free_policy_dbs_info; } - dbs_data->usage_count++; policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; + + mutex_lock(&dbs_data->mutex); + dbs_data->usage_count++; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + mutex_unlock(&dbs_data->mutex); + return 0; }
@@ -396,7 +401,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; }
- dbs_data->usage_count = 1; + INIT_LIST_HEAD(&dbs_data->policy_dbs_list); mutex_init(&dbs_data->mutex);
ret = gov->init(dbs_data, !policy->governor->initialized); @@ -417,9 +422,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data;
- policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs;
+ policy_dbs->dbs_data = dbs_data; + dbs_data->usage_count = 1; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, get_governor_parent_kobj(policy), @@ -450,12 +458,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; + int count;
/* State should be equivalent to INIT */ if (policy_dbs->policy) return -EBUSY;
- if (!--dbs_data->usage_count) { + mutex_lock(&dbs_data->mutex); + list_del(&policy_dbs->list); + count = dbs_data->usage_count--; + mutex_unlock(&dbs_data->mutex); + + if (!count) { kobject_put(&dbs_data->kobj);
policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index d46ebcb4f16d..4e77efb7db67 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -73,7 +73,11 @@ struct dbs_data { unsigned int up_threshold;
struct kobject kobj; - /* Protect concurrent updates to governor tunables from sysfs */ + struct list_head policy_dbs_list; + /* + * Protect concurrent updates to governor tunables from sysfs, + * policy_dbs_list and usage_count. + */ struct mutex mutex; };
@@ -125,6 +129,7 @@ struct policy_dbs_info { struct work_struct work; /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; + struct list_head list; };
static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e36792f60348..38301c6b31c7 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -226,84 +226,55 @@ static struct dbs_governor od_dbs_gov; * @new_rate: new sampling rate * * If new rate is smaller than the old, simply updating - * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * dbs.sampling_rate might not be appropriate. For example, if the * original sampling_rate was 1 second and the requested new sampling rate is 10 * ms because the user needs immediate reaction from ondemand governor, but not * sure if higher frequency will be required or not, then, the governor may * change the sampling rate too late; up to 1 second later. Thus, if we are * reducing the sampling rate, we need to make the new value effective * immediately. + * + * On the other hand, if new rate is larger than the old, then we may evaluate + * the load too soon, and it might we worth updating sample_delay_ns then as + * well. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. */ static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct cpumask cpumask; - int cpu; + struct policy_dbs_info *policy_dbs;
dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate);
/* - * Lock governor so that governor start/stop can't execute in parallel. + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. */ - mutex_lock(&dbs_data_mutex); - - cpumask_copy(&cpumask, cpu_online_mask); - - for_each_cpu(cpu, &cpumask) { - struct cpufreq_policy *policy; - struct od_cpu_dbs_info_s *dbs_info; - struct cpu_dbs_info *cdbs; - struct policy_dbs_info *policy_dbs; - - dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - cdbs = &dbs_info->cdbs; - policy_dbs = cdbs->policy_dbs; - + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); /* - * A valid policy_dbs and policy_dbs->policy means governor - * hasn't stopped or exited yet. + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. If the returned value is too + * small, the sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect immediately + * anyway. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but the difference should not be too big and it will + * be corrected next time a sample is taken, so it shouldn't be + * significant. */ - if (!policy_dbs || !policy_dbs->policy) - continue; - - policy = policy_dbs->policy; - - /* clear all CPUs of this policy */ - cpumask_andnot(&cpumask, &cpumask, policy->cpus); - - /* - * Update sampling rate for CPUs whose policy is governed by - * dbs_data. In case of governor_per_policy, only a single - * policy will be governed by dbs_data, otherwise there can be - * multiple policies that are governed by the same dbs_data. - */ - if (dbs_data == policy_dbs->dbs_data) { - mutex_lock(&policy_dbs->timer_mutex); - /* - * On 32-bit architectures this may race with the - * sample_delay_ns read in dbs_update_util_handler(), - * but that really doesn't matter. If the read returns - * a value that's too big, the sample will be skipped, - * but the next invocation of dbs_update_util_handler() - * (when the update has been completed) will take a - * sample. If the returned value is too small, the - * sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect - * immediately anyway. - * - * If this runs in parallel with dbs_work_handler(), we - * may end up overwriting the sample_delay_ns value that - * it has just written, but the difference should not be - * too big and it will be corrected next time a sample - * is taken, so it shouldn't be significant. - */ - gov_update_sample_delay(policy_dbs, new_rate); - mutex_unlock(&policy_dbs->timer_mutex); - } + gov_update_sample_delay(policy_dbs, new_rate); + mutex_unlock(&policy_dbs->timer_mutex); } - - mutex_unlock(&dbs_data_mutex); }
static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
On Tuesday, February 09, 2016 09:01:36 AM Viresh Kumar wrote:
An instance of 'struct dbs_data' can support multiple 'struct policy_dbs_info' instances. To traverse all policy_dbs supported by a dbs_data, create a list of policy_dbs within dbs_data.
We can traverse this list now, instead of traversing the loop for all online CPUs in update_sampling_rate(), to solve the circular dependency lockdep reported by Juri (and verified by Shilpa) earlier:
====================================================== [ INFO: possible circular locking dependency detected ] 4.4.0+ #445 Not tainted
trace.sh/1723 is trying to acquire lock: (s_active#48){++++.+}, at: [<c01f78c8>] kernfs_remove_by_name_ns+0x4c/0x94
but task is already holding lock: (od_dbs_cdata.mutex){+.+.+.}, at: [<c05824a0>] cpufreq_governor_dbs+0x34/0x5d4
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (od_dbs_cdata.mutex){+.+.+.}: [<c075b040>] mutex_lock_nested+0x7c/0x434 [<c05824a0>] cpufreq_governor_dbs+0x34/0x5d4 [<c0017c10>] return_to_handler+0x0/0x18
-> #1 (&policy->rwsem){+++++.}: [<c075ca8c>] down_read+0x58/0x94 [<c057c244>] show+0x30/0x60 [<c01f934c>] sysfs_kf_seq_show+0x90/0xfc [<c01f7ad8>] kernfs_seq_show+0x34/0x38 [<c01a22ec>] seq_read+0x1e4/0x4e4 [<c01f8694>] kernfs_fop_read+0x120/0x1a0 [<c01794b4>] __vfs_read+0x3c/0xe0 [<c017a378>] vfs_read+0x98/0x104 [<c017a434>] SyS_read+0x50/0x90 [<c000fd40>] ret_fast_syscall+0x0/0x1c
-> #0 (s_active#48){++++.+}: [<c008238c>] lock_acquire+0xd4/0x20c [<c01f6ae4>] __kernfs_remove+0x288/0x328 [<c01f78c8>] kernfs_remove_by_name_ns+0x4c/0x94 [<c01fa024>] remove_files+0x44/0x88 [<c01fa5a4>] sysfs_remove_group+0x50/0xa4 [<c058285c>] cpufreq_governor_dbs+0x3f0/0x5d4 [<c0017c10>] return_to_handler+0x0/0x18
other info that might help us debug this:
Chain exists of: s_active#48 --> &policy->rwsem --> od_dbs_cdata.mutex
Possible unsafe locking scenario:
CPU0 CPU1 ---- ----
lock(od_dbs_cdata.mutex); lock(&policy->rwsem); lock(od_dbs_cdata.mutex); lock(s_active#48);
*** DEADLOCK ***
5 locks held by trace.sh/1723: #0: (sb_writers#6){.+.+.+}, at: [<c017beb8>] __sb_start_write+0xb4/0xc0 #1: (&of->mutex){+.+.+.}, at: [<c01f8418>] kernfs_fop_write+0x6c/0x1c8 #2: (s_active#35){.+.+.+}, at: [<c01f8420>] kernfs_fop_write+0x74/0x1c8 #3: (cpu_hotplug.lock){++++++}, at: [<c0029e6c>] get_online_cpus+0x48/0xb8 #4: (od_dbs_cdata.mutex){+.+.+.}, at: [<c05824a0>] cpufreq_governor_dbs+0x34/0x5d4
stack backtrace: CPU: 2 PID: 1723 Comm: trace.sh Not tainted 4.4.0+ #445 Hardware name: ARM-Versatile Express [<c001883c>] (unwind_backtrace) from [<c0013f50>] (show_stack+0x20/0x24) [<c0013f50>] (show_stack) from [<c044ad90>] (dump_stack+0x80/0xb4) [<c044ad90>] (dump_stack) from [<c0128edc>] (print_circular_bug+0x29c/0x2f0) [<c0128edc>] (print_circular_bug) from [<c0081708>] (__lock_acquire+0x163c/0x1d74) [<c0081708>] (__lock_acquire) from [<c008238c>] (lock_acquire+0xd4/0x20c) [<c008238c>] (lock_acquire) from [<c01f6ae4>] (__kernfs_remove+0x288/0x328) [<c01f6ae4>] (__kernfs_remove) from [<c01f78c8>] (kernfs_remove_by_name_ns+0x4c/0x94) [<c01f78c8>] (kernfs_remove_by_name_ns) from [<c01fa024>] (remove_files+0x44/0x88) [<c01fa024>] (remove_files) from [<c01fa5a4>] (sysfs_remove_group+0x50/0xa4) [<c01fa5a4>] (sysfs_remove_group) from [<c058285c>] (cpufreq_governor_dbs+0x3f0/0x5d4) [<c058285c>] (cpufreq_governor_dbs) from [<c0017c10>] (return_to_handler+0x0/0x18)
This also updates the comment above update_sampling_rate() to make it more relevant to the current state of code.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Reported-by: Juri Lelli juri.lelli@arm.com Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com
drivers/cpufreq/cpufreq_governor.c | 22 ++++++++-- drivers/cpufreq/cpufreq_governor.h | 7 ++- drivers/cpufreq/cpufreq_ondemand.c | 89 +++++++++++++------------------------- 3 files changed, 54 insertions(+), 64 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index bba9d3fb8103..8e53f804a5af 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -384,9 +384,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = -EINVAL; goto free_policy_dbs_info; }
policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs;dbs_data->usage_count++;
mutex_lock(&dbs_data->mutex);
dbs_data->usage_count++;
list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
mutex_unlock(&dbs_data->mutex);
- return 0; }
@@ -396,7 +401,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; }
- dbs_data->usage_count = 1;
- INIT_LIST_HEAD(&dbs_data->policy_dbs_list); mutex_init(&dbs_data->mutex);
ret = gov->init(dbs_data, !policy->governor->initialized); @@ -417,9 +422,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data;
- policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs;
- policy_dbs->dbs_data = dbs_data;
- dbs_data->usage_count = 1;
- list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
- gov->kobj_type.sysfs_ops = &governor_sysfs_ops; ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, get_governor_parent_kobj(policy),
@@ -450,12 +458,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data;
- int count;
/* State should be equivalent to INIT */ if (policy_dbs->policy) return -EBUSY;
- if (!--dbs_data->usage_count) {
- mutex_lock(&dbs_data->mutex);
- list_del(&policy_dbs->list);
- count = dbs_data->usage_count--;
This appears to be planting a bug.
The way you wrote it the decrementation will take place after the assignment, so count will contain the old value.
- mutex_unlock(&dbs_data->mutex);
- if (!count) { kobject_put(&dbs_data->kobj);
policy->governor_data = NULL;
Thanks, Rafael
On 09-02-16, 21:23, Rafael J. Wysocki wrote:
- count = dbs_data->usage_count--;
This appears to be planting a bug.
The way you wrote it the decrementation will take place after the assignment, so count will contain the old value.
Oh crap, what was I smoking :)
On Tue, Feb 9, 2016 at 4:31 AM, Viresh Kumar viresh.kumar@linaro.org wrote:
An instance of 'struct dbs_data' can support multiple 'struct policy_dbs_info' instances. To traverse all policy_dbs supported by a dbs_data, create a list of policy_dbs within dbs_data.
We can traverse this list now, instead of traversing the loop for all online CPUs in update_sampling_rate(), to solve the circular dependency lockdep reported by Juri (and verified by Shilpa) earlier:
In addition to the previous comment, here's my changelog for this patch:
"The dbs_data_mutex lock is currently used in two places. First, cpufreq_governor_dbs() uses it to guarantee mutual exlusion between invocations of governor operations from the core. Second, it is used by ondemand governor's update_sampling_rate() to ensure the stability of data structures walked by it.
The second usage is quite problematic, because update_sampling_rate() is called from a governor sysfs attribute's ->store callback and that leads to a deadlock scenario involving cpufreq_governor_exit() which runs under dbs_data_mutex. Thus it is better to rework the code so update_sampling_rate() doesn't need to acquire dbs_data_mutex.
To that end, rework update_sampling_rate() to walk a list of policy_dbs objects supported by the dbs_data one it has been called for (instead of walking cpu_dbs_info object for all CPUs). The list manipulation is protected with dbs_data->mutex which also is held around the execution of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in that function any more."
Thanks, Rafael
The dbs_data_mutex lock is currently used in two places. First, cpufreq_governor_dbs() uses it to guarantee mutual exclusion between invocations of governor operations from the core. Second, it is used by ondemand governor's update_sampling_rate() to ensure the stability of data structures walked by it.
The second usage is quite problematic, because update_sampling_rate() is called from a governor sysfs attribute's ->store callback and that leads to a deadlock scenario involving cpufreq_governor_exit() which runs under dbs_data_mutex. Thus it is better to rework the code so update_sampling_rate() doesn't need to acquire dbs_data_mutex.
To that end, rework update_sampling_rate() to walk a list of policy_dbs objects supported by the dbs_data one it has been called for (instead of walking cpu_dbs_info object for all CPUs). The list manipulation is protected with dbs_data->mutex which also is held around the execution of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in that function any more.
Reported-by: Juri Lelli juri.lelli@arm.com Reported-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Tested-by: Juri Lelli juri.lelli@arm.com Tested-by: Shilpasri G Bhat shilpa.bhat@linux.vnet.ibm.com [ rjw: Changelog ]
--- V5: - Updated changelog (Thanks again Rafael) - Fixed bug regarding a-- and --a :) --- drivers/cpufreq/cpufreq_governor.c | 22 ++++++++-- drivers/cpufreq/cpufreq_governor.h | 7 ++- drivers/cpufreq/cpufreq_ondemand.c | 89 +++++++++++++------------------------- 3 files changed, 54 insertions(+), 64 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index bba9d3fb8103..3a9dab752148 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -384,9 +384,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = -EINVAL; goto free_policy_dbs_info; } - dbs_data->usage_count++; policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; + + mutex_lock(&dbs_data->mutex); + dbs_data->usage_count++; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + mutex_unlock(&dbs_data->mutex); + return 0; }
@@ -396,7 +401,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; }
- dbs_data->usage_count = 1; + INIT_LIST_HEAD(&dbs_data->policy_dbs_list); mutex_init(&dbs_data->mutex);
ret = gov->init(dbs_data, !policy->governor->initialized); @@ -417,9 +422,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data;
- policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs;
+ policy_dbs->dbs_data = dbs_data; + dbs_data->usage_count = 1; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, get_governor_parent_kobj(policy), @@ -450,12 +458,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; + int count;
/* State should be equivalent to INIT */ if (policy_dbs->policy) return -EBUSY;
- if (!--dbs_data->usage_count) { + mutex_lock(&dbs_data->mutex); + list_del(&policy_dbs->list); + count = --dbs_data->usage_count; + mutex_unlock(&dbs_data->mutex); + + if (!count) { kobject_put(&dbs_data->kobj);
policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index d46ebcb4f16d..4e77efb7db67 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -73,7 +73,11 @@ struct dbs_data { unsigned int up_threshold;
struct kobject kobj; - /* Protect concurrent updates to governor tunables from sysfs */ + struct list_head policy_dbs_list; + /* + * Protect concurrent updates to governor tunables from sysfs, + * policy_dbs_list and usage_count. + */ struct mutex mutex; };
@@ -125,6 +129,7 @@ struct policy_dbs_info { struct work_struct work; /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; + struct list_head list; };
static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e36792f60348..38301c6b31c7 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -226,84 +226,55 @@ static struct dbs_governor od_dbs_gov; * @new_rate: new sampling rate * * If new rate is smaller than the old, simply updating - * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * dbs.sampling_rate might not be appropriate. For example, if the * original sampling_rate was 1 second and the requested new sampling rate is 10 * ms because the user needs immediate reaction from ondemand governor, but not * sure if higher frequency will be required or not, then, the governor may * change the sampling rate too late; up to 1 second later. Thus, if we are * reducing the sampling rate, we need to make the new value effective * immediately. + * + * On the other hand, if new rate is larger than the old, then we may evaluate + * the load too soon, and it might we worth updating sample_delay_ns then as + * well. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. */ static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct cpumask cpumask; - int cpu; + struct policy_dbs_info *policy_dbs;
dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate);
/* - * Lock governor so that governor start/stop can't execute in parallel. + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. */ - mutex_lock(&dbs_data_mutex); - - cpumask_copy(&cpumask, cpu_online_mask); - - for_each_cpu(cpu, &cpumask) { - struct cpufreq_policy *policy; - struct od_cpu_dbs_info_s *dbs_info; - struct cpu_dbs_info *cdbs; - struct policy_dbs_info *policy_dbs; - - dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - cdbs = &dbs_info->cdbs; - policy_dbs = cdbs->policy_dbs; - + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); /* - * A valid policy_dbs and policy_dbs->policy means governor - * hasn't stopped or exited yet. + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. If the returned value is too + * small, the sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect immediately + * anyway. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but the difference should not be too big and it will + * be corrected next time a sample is taken, so it shouldn't be + * significant. */ - if (!policy_dbs || !policy_dbs->policy) - continue; - - policy = policy_dbs->policy; - - /* clear all CPUs of this policy */ - cpumask_andnot(&cpumask, &cpumask, policy->cpus); - - /* - * Update sampling rate for CPUs whose policy is governed by - * dbs_data. In case of governor_per_policy, only a single - * policy will be governed by dbs_data, otherwise there can be - * multiple policies that are governed by the same dbs_data. - */ - if (dbs_data == policy_dbs->dbs_data) { - mutex_lock(&policy_dbs->timer_mutex); - /* - * On 32-bit architectures this may race with the - * sample_delay_ns read in dbs_update_util_handler(), - * but that really doesn't matter. If the read returns - * a value that's too big, the sample will be skipped, - * but the next invocation of dbs_update_util_handler() - * (when the update has been completed) will take a - * sample. If the returned value is too small, the - * sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect - * immediately anyway. - * - * If this runs in parallel with dbs_work_handler(), we - * may end up overwriting the sample_delay_ns value that - * it has just written, but the difference should not be - * too big and it will be corrected next time a sample - * is taken, so it shouldn't be significant. - */ - gov_update_sample_delay(policy_dbs, new_rate); - mutex_unlock(&policy_dbs->timer_mutex); - } + gov_update_sample_delay(policy_dbs, new_rate); + mutex_unlock(&policy_dbs->timer_mutex); } - - mutex_unlock(&dbs_data_mutex); }
static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
linaro-kernel@lists.linaro.org