MT8173 is a ARMv8 based SoC with 2 clusters. All CPUs in a single cluster share the same power and clock domain. This series tries to add cpufreq support for MT8173 SoC by using DT based cpufreq driver.
From v1 to v2:
1. Add intermediate frequency support in cpufreq-dt driver 2. Use voltage scaling code of cpufreq-dt for little cluster instead of implementaion in notifier of mtk-cpufreq driver 3. Code refinement for mtk-cpufreq driver
pi-cheng.chen (4): cpufreq-dt: add clock domain and intermediate frequency support cpufreq: dt-bindings: add bindings for mtk-cpufreq driver cpufreq: mediatek: add Mediatek cpufreq driver ARM64: dts: mediatek: add cpufreq dts for MT8173 SoC
.../devicetree/bindings/cpufreq/cpufreq-mtk.txt | 17 + arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 10 + arch/arm64/boot/dts/mediatek/mt8173.dtsi | 25 ++ drivers/cpufreq/Kconfig.arm | 6 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/cpufreq-dt.c | 68 +++- drivers/cpufreq/mtk-cpufreq.c | 346 +++++++++++++++++++++ include/linux/cpufreq-dt.h | 7 + 8 files changed, 473 insertions(+), 7 deletions(-) create mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt create mode 100644 drivers/cpufreq/mtk-cpufreq.c
In this patch, CPU clock/power domain information is added into the platform_data of cpufreq-dt so that cpufreq-dt driver could check with CPUs share clock/power. Also, intermediate frequency support is added in this version. Since the program flows of .target_index and .target_intermediate are quite similar, consolidate the flow as a new function to keep readibility.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org --- drivers/cpufreq/cpufreq-dt.c | 68 +++++++++++++++++++++++++++++++++++++++----- include/linux/cpufreq-dt.h | 7 +++++ 2 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bab67db..5948bdf 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -34,25 +34,37 @@ struct private_data { struct regulator *cpu_reg; struct thermal_cooling_device *cdev; unsigned int voltage_tolerance; /* in percentage */ + unsigned long intermediate_freq; };
-static int set_target(struct cpufreq_policy *policy, unsigned int index) +static unsigned int get_intermediate(struct cpufreq_policy *policy, + unsigned int index) +{ + struct private_data *priv = policy->driver_data; + struct cpufreq_frequency_table *freq_table; + unsigned long freq = clk_get_rate(policy->clk); + + freq_table = cpufreq_frequency_get_table(policy->cpu); + + if (freq == priv->intermediate_freq || + freq_table[index].frequency * 1000 == freq) + return 0; + + return priv->intermediate_freq; +} + +static int set_frequency(struct cpufreq_policy *policy, long freq_Hz) { struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table = policy->freq_table; struct clk *cpu_clk = policy->clk; struct private_data *priv = policy->driver_data; struct device *cpu_dev = priv->cpu_dev; struct regulator *cpu_reg = priv->cpu_reg; unsigned long volt = 0, volt_old = 0, tol = 0; unsigned int old_freq, new_freq; - long freq_Hz, freq_exact; + long freq_exact; int ret;
- freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); - if (freq_Hz <= 0) - freq_Hz = freq_table[index].frequency * 1000; - freq_exact = freq_Hz; new_freq = freq_Hz / 1000; old_freq = clk_get_rate(cpu_clk) / 1000; @@ -112,6 +124,29 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) return ret; }
+static int target_intermediate(struct cpufreq_policy *policy, + unsigned int index) +{ + struct private_data *priv = policy->driver_data; + long freq_Hz; + + freq_Hz = priv->intermediate_freq; + return set_frequency(policy, freq_Hz); +} + +static int set_target(struct cpufreq_policy *policy, unsigned int index) +{ + struct cpufreq_frequency_table *freq_table = policy->freq_table; + struct clk *cpu_clk = policy->clk; + long freq_Hz; + + freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); + if (freq_Hz <= 0) + freq_Hz = freq_table[index].frequency * 1000; + + return set_frequency(policy, freq_Hz); +} + static int allocate_resources(int cpu, struct device **cdev, struct regulator **creg, struct clk **cclk) { @@ -296,6 +331,23 @@ static int cpufreq_init(struct cpufreq_policy *policy) pd = cpufreq_get_driver_data(); if (!pd || !pd->independent_clocks) cpumask_setall(policy->cpus); + else if (pd && !list_empty(&pd->domain_list)) { + struct list_head *domain_node; + struct cpufreq_cpu_domain *domain; + + list_for_each(domain_node, &pd->domain_list) { + domain = container_of(domain_node, + struct cpufreq_cpu_domain, node); + if (!cpumask_test_cpu(policy->cpu, &domain->cpus)) + continue; + + if (domain->intermediate_freq) + priv->intermediate_freq = + domain->intermediate_freq; + cpumask_copy(policy->cpus, &domain->cpus); + break; + } + }
of_node_put(np);
@@ -363,6 +415,8 @@ static struct cpufreq_driver dt_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = set_target, .get = cpufreq_generic_get, + .get_intermediate = get_intermediate, + .target_intermediate = target_intermediate, .init = cpufreq_init, .exit = cpufreq_exit, .ready = cpufreq_ready, diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h index 0414009..d6e2097 100644 --- a/include/linux/cpufreq-dt.h +++ b/include/linux/cpufreq-dt.h @@ -10,6 +10,12 @@ #ifndef __CPUFREQ_DT_H__ #define __CPUFREQ_DT_H__
+struct cpufreq_cpu_domain { + struct list_head node; + cpumask_t cpus; + unsigned long intermediate_freq; +}; + struct cpufreq_dt_platform_data { /* * True when each CPU has its own clock to control its @@ -17,6 +23,7 @@ struct cpufreq_dt_platform_data { * clock. */ bool independent_clocks; + struct list_head domain_list; };
#endif /* __CPUFREQ_DT_H__ */
On 4 March 2015 at 14:19, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
In this patch, CPU clock/power domain information is added into the platform_data of cpufreq-dt so that cpufreq-dt driver could check with CPUs share clock/power. Also, intermediate frequency support is added in this
You should have separate patches for logically separate changes.
version. Since the program flows of .target_index and .target_intermediate are quite similar, consolidate the flow as a new function to keep readibility.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org
drivers/cpufreq/cpufreq-dt.c | 68 +++++++++++++++++++++++++++++++++++++++----- include/linux/cpufreq-dt.h | 7 +++++ 2 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bab67db..5948bdf 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -34,25 +34,37 @@ struct private_data { struct regulator *cpu_reg; struct thermal_cooling_device *cdev; unsigned int voltage_tolerance; /* in percentage */
unsigned long intermediate_freq;
};
-static int set_target(struct cpufreq_policy *policy, unsigned int index) +static unsigned int get_intermediate(struct cpufreq_policy *policy,
unsigned int index)
+{
struct private_data *priv = policy->driver_data;
struct cpufreq_frequency_table *freq_table;
unsigned long freq = clk_get_rate(policy->clk);
This will return current freq, which can also be fetched with policy->cur.
freq_table = cpufreq_frequency_get_table(policy->cpu);
instead, freq_table = policy->freq_table.
Always add a comment over such decision making expressions, on why you chose to return 0.
if (freq == priv->intermediate_freq ||
Looks fine, current freq == intermediate freq..
freq_table[index].frequency * 1000 == freq)
Absolutely wrong, current-freq == requested-freq. Instead it should be:
freq_table[index].frequency * 1000 == priv->intermediate_freq.
return 0;
return priv->intermediate_freq;
+}
+static int set_frequency(struct cpufreq_policy *policy, long freq_Hz) { struct dev_pm_opp *opp;
struct cpufreq_frequency_table *freq_table = policy->freq_table; struct clk *cpu_clk = policy->clk; struct private_data *priv = policy->driver_data; struct device *cpu_dev = priv->cpu_dev; struct regulator *cpu_reg = priv->cpu_reg; unsigned long volt = 0, volt_old = 0, tol = 0; unsigned int old_freq, new_freq;
long freq_Hz, freq_exact;
long freq_exact; int ret;
freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
if (freq_Hz <= 0)
freq_Hz = freq_table[index].frequency * 1000;
freq_exact = freq_Hz; new_freq = freq_Hz / 1000; old_freq = clk_get_rate(cpu_clk) / 1000;
@@ -112,6 +124,29 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) return ret; }
+static int target_intermediate(struct cpufreq_policy *policy,
unsigned int index)
+{
struct private_data *priv = policy->driver_data;
long freq_Hz;
freq_Hz = priv->intermediate_freq;
return set_frequency(policy, freq_Hz);
Instead, return set_frequency(policy, priv->intermediate_freq);
+}
+static int set_target(struct cpufreq_policy *policy, unsigned int index) +{
struct cpufreq_frequency_table *freq_table = policy->freq_table;
struct clk *cpu_clk = policy->clk;
long freq_Hz;
freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
Use policy->clk here directly instead of another local variable.
if (freq_Hz <= 0)
freq_Hz = freq_table[index].frequency * 1000;
Why shouldn't we call clk_round_rate() for intermediate freq as well ? I think, it should be called for it as well.. And so you can save intermediate_freq_index instead of the freq..
return set_frequency(policy, freq_Hz);
+}
static int allocate_resources(int cpu, struct device **cdev, struct regulator **creg, struct clk **cclk) { @@ -296,6 +331,23 @@ static int cpufreq_init(struct cpufreq_policy *policy) pd = cpufreq_get_driver_data(); if (!pd || !pd->independent_clocks) cpumask_setall(policy->cpus);
else if (pd && !list_empty(&pd->domain_list)) {
struct list_head *domain_node;
struct cpufreq_cpu_domain *domain;
list_for_each(domain_node, &pd->domain_list) {
domain = container_of(domain_node,
struct cpufreq_cpu_domain, node);
if (!cpumask_test_cpu(policy->cpu, &domain->cpus))
continue;
if (domain->intermediate_freq)
priv->intermediate_freq =
domain->intermediate_freq;
cpumask_copy(policy->cpus, &domain->cpus);
break;
}
}
Do this in a separate patch.
of_node_put(np);
@@ -363,6 +415,8 @@ static struct cpufreq_driver dt_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = set_target, .get = cpufreq_generic_get,
.get_intermediate = get_intermediate,
.target_intermediate = target_intermediate, .init = cpufreq_init, .exit = cpufreq_exit, .ready = cpufreq_ready,
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h index 0414009..d6e2097 100644 --- a/include/linux/cpufreq-dt.h +++ b/include/linux/cpufreq-dt.h @@ -10,6 +10,12 @@ #ifndef __CPUFREQ_DT_H__ #define __CPUFREQ_DT_H__
+struct cpufreq_cpu_domain {
struct list_head node;
cpumask_t cpus;
unsigned long intermediate_freq;
This should come from DT instead of platform data.
+};
This struct will die along with the below one as soon as my patches on OPP bindings V2 get merged.
struct cpufreq_dt_platform_data { /* * True when each CPU has its own clock to control its @@ -17,6 +23,7 @@ struct cpufreq_dt_platform_data { * clock. */ bool independent_clocks;
struct list_head domain_list;
Also update the comment on how what these fields mean..
};
#endif /* __CPUFREQ_DT_H__ */
1.9.1
On 4 March 2015 at 15:45, Viresh Kumar viresh.kumar@linaro.org wrote:
+struct cpufreq_cpu_domain {
struct list_head node;
cpumask_t cpus;
unsigned long intermediate_freq;
This should come from DT instead of platform data.
Well, we are getting fixed this in OPP bindings now, so for now do it from platform data as you have done it. Once OPP-v2 is around, we will get rid of it ..
Hi Viresh,
Thanks for reviewing. Please see my reply below:
On 4 March 2015 at 18:15, Viresh Kumar viresh.kumar@linaro.org wrote:
On 4 March 2015 at 14:19, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
In this patch, CPU clock/power domain information is added into the platform_data of cpufreq-dt so that cpufreq-dt driver could check with CPUs share clock/power. Also, intermediate frequency support is added in this
You should have separate patches for logically separate changes.
Sure. Will do it.
version. Since the program flows of .target_index and .target_intermediate are quite similar, consolidate the flow as a new function to keep readibility.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org
drivers/cpufreq/cpufreq-dt.c | 68 +++++++++++++++++++++++++++++++++++++++----- include/linux/cpufreq-dt.h | 7 +++++ 2 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bab67db..5948bdf 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -34,25 +34,37 @@ struct private_data { struct regulator *cpu_reg; struct thermal_cooling_device *cdev; unsigned int voltage_tolerance; /* in percentage */
unsigned long intermediate_freq;
};
-static int set_target(struct cpufreq_policy *policy, unsigned int index) +static unsigned int get_intermediate(struct cpufreq_policy *policy,
unsigned int index)
+{
struct private_data *priv = policy->driver_data;
struct cpufreq_frequency_table *freq_table;
unsigned long freq = clk_get_rate(policy->clk);
This will return current freq, which can also be fetched with policy->cur.
Will fix it.
freq_table = cpufreq_frequency_get_table(policy->cpu);
instead, freq_table = policy->freq_table.
Will fix it.
Always add a comment over such decision making expressions, on why you chose to return 0.
Will fix it.
if (freq == priv->intermediate_freq ||
Looks fine, current freq == intermediate freq..
freq_table[index].frequency * 1000 == freq)
Absolutely wrong, current-freq == requested-freq. Instead it should be:
freq_table[index].frequency * 1000 == priv->intermediate_freq.
Thanks for correcting. Will fix it.
return 0;
return priv->intermediate_freq;
+}
+static int set_frequency(struct cpufreq_policy *policy, long freq_Hz) { struct dev_pm_opp *opp;
struct cpufreq_frequency_table *freq_table = policy->freq_table; struct clk *cpu_clk = policy->clk; struct private_data *priv = policy->driver_data; struct device *cpu_dev = priv->cpu_dev; struct regulator *cpu_reg = priv->cpu_reg; unsigned long volt = 0, volt_old = 0, tol = 0; unsigned int old_freq, new_freq;
long freq_Hz, freq_exact;
long freq_exact; int ret;
freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
if (freq_Hz <= 0)
freq_Hz = freq_table[index].frequency * 1000;
freq_exact = freq_Hz; new_freq = freq_Hz / 1000; old_freq = clk_get_rate(cpu_clk) / 1000;
@@ -112,6 +124,29 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) return ret; }
+static int target_intermediate(struct cpufreq_policy *policy,
unsigned int index)
+{
struct private_data *priv = policy->driver_data;
long freq_Hz;
freq_Hz = priv->intermediate_freq;
return set_frequency(policy, freq_Hz);
Instead, return set_frequency(policy, priv->intermediate_freq);
Will fix it.
+}
+static int set_target(struct cpufreq_policy *policy, unsigned int index) +{
struct cpufreq_frequency_table *freq_table = policy->freq_table;
struct clk *cpu_clk = policy->clk;
long freq_Hz;
freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
Use policy->clk here directly instead of another local variable.
Will fix it.
if (freq_Hz <= 0)
freq_Hz = freq_table[index].frequency * 1000;
Why shouldn't we call clk_round_rate() for intermediate freq as well ?
Yes. Will do it.
I think, it should be called for it as well.. And so you can save intermediate_freq_index instead of the freq..
Here is the case I wanted to talk to you at HKG15: In the case of Mediatek SoC, the intermediate frequency might not be one entry of OPP table. To elaborate, the source clock node of the CPUs/Cluster on Mediatek SoC is a mux. The mux has several PLLs as parents. When we are doing CPU frequency scaling, the mux should re-parent to another stable PLL, wait until the original parent PLL become stable, and then switch back to the original parent. In this case, we could but we might not want the intermediate frequency as part of OPP table. Therefore I save intermediate_freq instead of intermediate frequency index in the cpufreq_dt_platform_datat struct.
BTW, is this case that intermediate frequency is not necessarily be one entry of OPP table supported in the OPPv2 bindings?
return set_frequency(policy, freq_Hz);
+}
static int allocate_resources(int cpu, struct device **cdev, struct regulator **creg, struct clk **cclk) { @@ -296,6 +331,23 @@ static int cpufreq_init(struct cpufreq_policy *policy) pd = cpufreq_get_driver_data(); if (!pd || !pd->independent_clocks) cpumask_setall(policy->cpus);
else if (pd && !list_empty(&pd->domain_list)) {
struct list_head *domain_node;
struct cpufreq_cpu_domain *domain;
list_for_each(domain_node, &pd->domain_list) {
domain = container_of(domain_node,
struct cpufreq_cpu_domain, node);
if (!cpumask_test_cpu(policy->cpu, &domain->cpus))
continue;
if (domain->intermediate_freq)
priv->intermediate_freq =
domain->intermediate_freq;
cpumask_copy(policy->cpus, &domain->cpus);
break;
}
}
Do this in a separate patch.
Will do it.
of_node_put(np);
@@ -363,6 +415,8 @@ static struct cpufreq_driver dt_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = set_target, .get = cpufreq_generic_get,
.get_intermediate = get_intermediate,
.target_intermediate = target_intermediate, .init = cpufreq_init, .exit = cpufreq_exit, .ready = cpufreq_ready,
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h index 0414009..d6e2097 100644 --- a/include/linux/cpufreq-dt.h +++ b/include/linux/cpufreq-dt.h @@ -10,6 +10,12 @@ #ifndef __CPUFREQ_DT_H__ #define __CPUFREQ_DT_H__
+struct cpufreq_cpu_domain {
struct list_head node;
cpumask_t cpus;
unsigned long intermediate_freq;
This should come from DT instead of platform data.
+};
This struct will die along with the below one as soon as my patches on OPP bindings V2 get merged.
Sure. Will adapt the new way once it's merged.
struct cpufreq_dt_platform_data { /* * True when each CPU has its own clock to control its @@ -17,6 +23,7 @@ struct cpufreq_dt_platform_data { * clock. */ bool independent_clocks;
struct list_head domain_list;
Also update the comment on how what these fields mean..
Will do it.
Thanks.
Best Regards, Pi-Cheng
};
#endif /* __CPUFREQ_DT_H__ */
1.9.1
On 5 March 2015 at 09:02, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
In the case of Mediatek SoC, the intermediate frequency might not be one entry of OPP table. To elaborate, the source clock node of the CPUs/Cluster on Mediatek SoC is a mux. The mux has several PLLs as parents. When we are doing CPU frequency scaling, the mux should re-parent to another stable PLL, wait until the original parent PLL become stable, and then switch back to the original parent. In this case, we could but we might not want the intermediate frequency as part of OPP table. Therefore I save intermediate_freq instead of intermediate frequency index in the cpufreq_dt_platform_datat struct.
Hmm, I remember that discussion. Okay leave it as is.
BTW, is this case that intermediate frequency is not necessarily be one entry of OPP table supported in the OPPv2 bindings?
Not yet, but will add a property for that.
On 5 March 2015 at 11:58, Viresh Kumar viresh.kumar@linaro.org wrote:
On 5 March 2015 at 09:02, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
In the case of Mediatek SoC, the intermediate frequency might not be one entry of OPP table. To elaborate, the source clock node of the CPUs/Cluster on Mediatek SoC is a mux. The mux has several PLLs as parents. When we are doing CPU frequency scaling, the mux should re-parent to another stable PLL, wait until the original parent PLL become stable, and then switch back to the original parent. In this case, we could but we might not want the intermediate frequency as part of OPP table. Therefore I save intermediate_freq instead of intermediate frequency index in the cpufreq_dt_platform_datat struct.
Hmm, I remember that discussion. Okay leave it as is.
Okay.
BTW, is this case that intermediate frequency is not necessarily be one entry of OPP table supported in the OPPv2 bindings?
Not yet, but will add a property for that.
Thanks for taking this case into consideration.
Best Regards, Pi-Cheng
Add dt-binding for mtk-cpufreq driver so that the driver could get SoC specific information (intermediate_clk for now) from device tree.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org --- .../devicetree/bindings/cpufreq/cpufreq-mtk.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt new file mode 100644 index 0000000..53bf108 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt @@ -0,0 +1,17 @@ + +Mediatek cpufreq driver +------------------- + +Mediatek SoC cpufreq driver for CPU frequency scaling. + +Required properties: +- compatible: "mediatek,mtk-cpufreq" +- intermediate_clk: The intermediate clock source which CPUs will switch to +during CPU frequency transition. + +Examples: +-------- + mtk-cpufreq { + compatible = "mediatek,mtk-cpufreq"; + intermediate_clk = <&apmixedsys APMIXED_MAINPLL>; + };
On 4 March 2015 at 14:19, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
Add dt-binding for mtk-cpufreq driver so that the driver could get SoC specific information (intermediate_clk for now) from device tree.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org
.../devicetree/bindings/cpufreq/cpufreq-mtk.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt new file mode 100644 index 0000000..53bf108 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mtk.txt @@ -0,0 +1,17 @@
+Mediatek cpufreq driver +-------------------
+Mediatek SoC cpufreq driver for CPU frequency scaling.
+Required properties: +- compatible: "mediatek,mtk-cpufreq" +- intermediate_clk: The intermediate clock source which CPUs will switch to +during CPU frequency transition.
+Examples: +--------
mtk-cpufreq {
compatible = "mediatek,mtk-cpufreq";
intermediate_clk = <&apmixedsys APMIXED_MAINPLL>;
};
Such nodes aren't allowed. NACK.
In this patch, some SoC specific voltage scaling flow is implemented in the cpufreq notifier of mtk-cpufreq driver.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org --- drivers/cpufreq/Kconfig.arm | 6 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/mtk-cpufreq.c | 346 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 353 insertions(+) create mode 100644 drivers/cpufreq/mtk-cpufreq.c
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 1b06fc4..f421653 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -263,3 +263,9 @@ config ARM_PXA2xx_CPUFREQ This add the CPUFreq driver support for Intel PXA2xx SOCs.
If in doubt, say N. + +config ARM_MTK_CPUFREQ + bool "Mediatek CPUFreq support" + depends on ARCH_MEDIATEK && CPUFREQ_DT && REGULATOR + help + This adds the CPUFreq driver support for Mediatek SoCs. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 82a1821..05cb596 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o +obj-$(CONFIG_ARM_MTK_CPUFREQ) += mtk-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o diff --git a/drivers/cpufreq/mtk-cpufreq.c b/drivers/cpufreq/mtk-cpufreq.c new file mode 100644 index 0000000..344d588 --- /dev/null +++ b/drivers/cpufreq/mtk-cpufreq.c @@ -0,0 +1,346 @@ +/* +* Copyright (c) 2015 Linaro Ltd. +* Author: Pi-Cheng Chen pi-cheng.chen@linaro.org +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License version 2 as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +*/ + +#include <linux/clk.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/cpufreq-dt.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> +#include <linux/regulator/consumer.h> +#include <linux/slab.h> + +#define VOLT_SHIFT_LOWER_LIMIT 100000 +#define VOLT_SHIFT_UPPER_LIMIT 200000 + +struct cpu_opp_table { + unsigned int freq; + int vproc; + int vsram; +}; + +static struct dvfs_info { + struct cpumask cpus; + struct cpu_opp_table *opp_tbl; + struct device *cpu_dev; + struct regulator *proc_reg; + struct regulator *sram_reg; +} *dvfs_info; + +static int cpu_opp_table_get_freq_index(unsigned int freq) +{ + struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl; + int i; + + for (i = 0; opp_tbl[i].freq != 0; i++) { + if (opp_tbl[i].freq >= freq) + return i; + } + + return -1; +} + +static int cpu_opp_table_get_volt_index(unsigned int volt) +{ + struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl; + int i; + + for (i = 0; opp_tbl[i].vproc != -1; i++) + if (opp_tbl[i].vproc >= volt) + return i; + + return -1; +} + +static int get_regulator_voltage_ceil(struct regulator *regulator, int voltage) +{ + int cnt, i, volt = -1; + + cnt = regulator_count_voltages(regulator); + + for (i = 0; i < cnt && volt < voltage; i++) + volt = regulator_list_voltage(regulator, i); + + return volt; +} + +static int mtk_cpufreq_voltage_trace(int old_index, int new_index) +{ + struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl; + int old_vproc, new_vproc, i, j; + + old_vproc = regulator_get_voltage(dvfs_info->proc_reg); + new_vproc = opp_tbl[new_index].vproc; + + if (old_vproc > new_vproc) { + for (i = old_index; i > new_index;) { + for (j = i; j >= new_index; j--) + if (opp_tbl[i].vsram - opp_tbl[j].vproc + > VOLT_SHIFT_UPPER_LIMIT) + break; + i = j + 1; + + regulator_set_voltage_tol(dvfs_info->proc_reg, + opp_tbl[i].vproc, 0); + regulator_set_voltage_tol(dvfs_info->sram_reg, + opp_tbl[i].vsram, 0); + } + } else if (old_vproc < new_vproc) { + for (i = old_index; i < new_index;) { + for (j = i; j <= new_index; j++) + if (opp_tbl[j].vsram - opp_tbl[i].vproc + > VOLT_SHIFT_UPPER_LIMIT) + break; + i = j - 1; + + regulator_set_voltage_tol(dvfs_info->sram_reg, + opp_tbl[i].vsram, 0); + regulator_set_voltage_tol(dvfs_info->proc_reg, + opp_tbl[i].vproc, 0); + } + } + + return 0; +} + +static int mtk_cpufreq_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct cpufreq_freqs *freqs = data; + struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl; + int old_vproc, new_vproc, old_index, new_index; + + if (!cpumask_test_cpu(freqs->cpu, &dvfs_info->cpus)) + return NOTIFY_DONE; + + old_vproc = regulator_get_voltage(dvfs_info->proc_reg); + old_index = cpu_opp_table_get_volt_index(old_vproc); + new_index = cpu_opp_table_get_freq_index(freqs->new * 1000); + new_vproc = opp_tbl[new_index].vproc; + + if (old_vproc == new_vproc) + return 0; + + if ((action == CPUFREQ_PRECHANGE && old_vproc < new_vproc) || + (action == CPUFREQ_POSTCHANGE && old_vproc > new_vproc)) + mtk_cpufreq_voltage_trace(old_index, new_index); + + return NOTIFY_OK; +} + +static struct notifier_block mtk_cpufreq_nb = { + .notifier_call = mtk_cpufreq_notify, +}; + +static int cpu_opp_table_init(struct device *dev) +{ + struct device *cpu_dev = dvfs_info->cpu_dev; + struct cpu_opp_table *opp_tbl; + struct dev_pm_opp *opp; + int ret, cnt, i; + unsigned long rate, vproc, vsram; + + ret = of_init_opp_table(cpu_dev); + if (ret) { + dev_err(dev, "Failed to init mtk_opp_table: %d\n", ret); + return ret; + } + + rcu_read_lock(); + + cnt = dev_pm_opp_get_opp_count(cpu_dev); + if (cnt < 0) { + dev_err(cpu_dev, "No OPP table is found: %d", cnt); + ret = cnt; + goto out_free_opp_tbl; + } + + opp_tbl = devm_kcalloc(dev, (cnt + 1), sizeof(struct cpu_opp_table), + GFP_ATOMIC); + if (!opp_tbl) { + ret = -ENOMEM; + goto out_free_opp_tbl; + } + + for (i = 0, rate = 0; i < cnt; i++, rate++) { + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto out_free_opp_tbl; + } + + vproc = dev_pm_opp_get_voltage(opp); + vproc = get_regulator_voltage_ceil(dvfs_info->proc_reg, vproc); + vsram = vproc + VOLT_SHIFT_LOWER_LIMIT; + vsram = get_regulator_voltage_ceil(dvfs_info->sram_reg, vsram); + + if (vproc < 0 || vsram < 0) { + ret = -EINVAL; + goto out_free_opp_tbl; + } + + opp_tbl[i].freq = rate; + opp_tbl[i].vproc = vproc; + opp_tbl[i].vsram = vsram; + } + + opp_tbl[i].freq = 0; + opp_tbl[i].vproc = -1; + opp_tbl[i].vsram = -1; + dvfs_info->opp_tbl = opp_tbl; + +out_free_opp_tbl: + rcu_read_unlock(); + of_free_opp_table(cpu_dev); + + return ret; +} + +static struct cpufreq_cpu_domain *get_cpu_domain(struct list_head *domain_list, + int cpu) +{ + struct list_head *node; + + list_for_each(node, domain_list) { + struct cpufreq_cpu_domain *domain; + + domain = container_of(node, struct cpufreq_cpu_domain, node); + if (cpumask_test_cpu(cpu, &domain->cpus)) + return domain; + } + + return NULL; +} + +static int mtk_cpufreq_probe(struct platform_device *pdev) +{ + struct clk *inter_clk; + struct cpufreq_dt_platform_data *pd; + struct platform_device *dev; + unsigned long inter_freq; + int cpu, ret; + + inter_clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(inter_clk)) { + if (PTR_ERR(inter_clk) == -EPROBE_DEFER) { + dev_warn(&pdev->dev, "clock not ready. defer probeing.\n"); + return -EPROBE_DEFER; + } + + dev_err(&pdev->dev, "Failed to get intermediate clock\n"); + return -ENODEV; + } + inter_freq = clk_get_rate(inter_clk); + + pd = devm_kzalloc(&pdev->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + dvfs_info = devm_kzalloc(&pdev->dev, sizeof(*dvfs_info), GFP_KERNEL); + if (!dvfs_info) + return -ENOMEM; + + pd->independent_clocks = 1, + INIT_LIST_HEAD(&pd->domain_list); + + for_each_possible_cpu(cpu) { + struct device *cpu_dev; + struct cpufreq_cpu_domain *new_domain; + struct regulator *proc_reg, *sram_reg; + + cpu_dev = get_cpu_device(cpu); + + if (!dvfs_info->cpu_dev) { + proc_reg = regulator_get_exclusive(cpu_dev, "proc"); + sram_reg = regulator_get_exclusive(cpu_dev, "sram"); + + if (PTR_ERR(proc_reg) == -EPROBE_DEFER || + PTR_ERR(sram_reg) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + if (!IS_ERR_OR_NULL(proc_reg) && + !IS_ERR_OR_NULL(sram_reg)) { + dvfs_info->cpu_dev = cpu_dev; + dvfs_info->proc_reg = proc_reg; + dvfs_info->sram_reg = sram_reg; + cpumask_copy(&dvfs_info->cpus, + &cpu_topology[cpu].core_sibling); + } + } + + if (get_cpu_domain(&pd->domain_list, cpu)) + continue; + + new_domain = devm_kzalloc(&pdev->dev, sizeof(*new_domain), + GFP_KERNEL); + if (!new_domain) + return -ENOMEM; + + cpumask_copy(&new_domain->cpus, + &cpu_topology[cpu].core_sibling); + new_domain->intermediate_freq = inter_freq; + list_add(&new_domain->node, &pd->domain_list); + } + + if (IS_ERR_OR_NULL(dvfs_info->proc_reg) || + IS_ERR_OR_NULL(dvfs_info->sram_reg)) { + dev_err(&pdev->dev, "Failed to get regulators\n"); + return -ENODEV; + } + + ret = cpu_opp_table_init(&pdev->dev); + if (ret) { + dev_err(&pdev->dev, "Failed to setup cpu_opp_table: %d\n", + ret); + return ret; + } + + ret = cpufreq_register_notifier(&mtk_cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + if (ret) { + dev_err(&pdev->dev, "Failed to register cpufreq notifier\n"); + return ret; + } + + dev = platform_device_register_data(NULL, "cpufreq-dt", -1, pd, + sizeof(*pd)); + if (IS_ERR(dev)) { + dev_err(&pdev->dev, + "Failed to register cpufreq-dt platform device\n"); + return PTR_ERR(dev); + } + + return 0; +} + +static const struct of_device_id mtk_cpufreq_match[] = { + { + .compatible = "mediatek,mtk-cpufreq", + }, + {} +}; +MODULE_DEVICE_TABLE(of, mtk_cpufreq_match); + +static struct platform_driver mtk_cpufreq_platdrv = { + .driver = { + .name = "mtk-cpufreq", + .of_match_table = mtk_cpufreq_match, + }, + .probe = mtk_cpufreq_probe, +}; +module_platform_driver(mtk_cpufreq_platdrv); +
Haven't reviewed it completely yet, but this is all I have done.
On 4 March 2015 at 14:19, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
+static int mtk_cpufreq_notify(struct notifier_block *nb,
unsigned long action, void *data)
+{
struct cpufreq_freqs *freqs = data;
struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
There is only one dvfs info ? but there are two clusters, sorry got confused a bit..
int old_vproc, new_vproc, old_index, new_index;
if (!cpumask_test_cpu(freqs->cpu, &dvfs_info->cpus))
return NOTIFY_DONE;
old_vproc = regulator_get_voltage(dvfs_info->proc_reg);
old_index = cpu_opp_table_get_volt_index(old_vproc);
new_index = cpu_opp_table_get_freq_index(freqs->new * 1000);
new_vproc = opp_tbl[new_index].vproc;
if (old_vproc == new_vproc)
return 0;
if ((action == CPUFREQ_PRECHANGE && old_vproc < new_vproc) ||
(action == CPUFREQ_POSTCHANGE && old_vproc > new_vproc))
mtk_cpufreq_voltage_trace(old_index, new_index);
return NOTIFY_OK;
+}
+static struct notifier_block mtk_cpufreq_nb = {
.notifier_call = mtk_cpufreq_notify,
+};
+static int cpu_opp_table_init(struct device *dev) +{
struct device *cpu_dev = dvfs_info->cpu_dev;
struct cpu_opp_table *opp_tbl;
struct dev_pm_opp *opp;
int ret, cnt, i;
unsigned long rate, vproc, vsram;
ret = of_init_opp_table(cpu_dev);
if (ret) {
dev_err(dev, "Failed to init mtk_opp_table: %d\n", ret);
return ret;
}
rcu_read_lock();
cnt = dev_pm_opp_get_opp_count(cpu_dev);
if (cnt < 0) {
dev_err(cpu_dev, "No OPP table is found: %d", cnt);
ret = cnt;
goto out_free_opp_tbl;
}
opp_tbl = devm_kcalloc(dev, (cnt + 1), sizeof(struct cpu_opp_table),
GFP_ATOMIC);
if (!opp_tbl) {
ret = -ENOMEM;
goto out_free_opp_tbl;
}
for (i = 0, rate = 0; i < cnt; i++, rate++) {
opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
goto out_free_opp_tbl;
}
vproc = dev_pm_opp_get_voltage(opp);
vproc = get_regulator_voltage_ceil(dvfs_info->proc_reg, vproc);
vsram = vproc + VOLT_SHIFT_LOWER_LIMIT;
vsram = get_regulator_voltage_ceil(dvfs_info->sram_reg, vsram);
if (vproc < 0 || vsram < 0) {
ret = -EINVAL;
goto out_free_opp_tbl;
}
opp_tbl[i].freq = rate;
opp_tbl[i].vproc = vproc;
opp_tbl[i].vsram = vsram;
}
opp_tbl[i].freq = 0;
opp_tbl[i].vproc = -1;
opp_tbl[i].vsram = -1;
dvfs_info->opp_tbl = opp_tbl;
+out_free_opp_tbl:
rcu_read_unlock();
of_free_opp_table(cpu_dev);
return ret;
+}
+static struct cpufreq_cpu_domain *get_cpu_domain(struct list_head *domain_list,
int cpu)
+{
struct list_head *node;
list_for_each(node, domain_list) {
struct cpufreq_cpu_domain *domain;
domain = container_of(node, struct cpufreq_cpu_domain, node);
if (cpumask_test_cpu(cpu, &domain->cpus))
return domain;
}
return NULL;
+}
+static int mtk_cpufreq_probe(struct platform_device *pdev)
On a dual cluster big LITTLE (your system), how many times is probe getting called ? Once or twice, i.e. for each cluster ??
+{
struct clk *inter_clk;
struct cpufreq_dt_platform_data *pd;
struct platform_device *dev;
unsigned long inter_freq;
int cpu, ret;
inter_clk = clk_get(&pdev->dev, NULL);
How is this supposed to work ? How will pdev->dev give intermediate clock ?
if (IS_ERR(inter_clk)) {
if (PTR_ERR(inter_clk) == -EPROBE_DEFER) {
dev_warn(&pdev->dev, "clock not ready. defer probeing.\n");
return -EPROBE_DEFER;
}
dev_err(&pdev->dev, "Failed to get intermediate clock\n");
return -ENODEV;
}
inter_freq = clk_get_rate(inter_clk);
pd = devm_kzalloc(&pdev->dev, sizeof(*pd), GFP_KERNEL);
if (!pd)
return -ENOMEM;
dvfs_info = devm_kzalloc(&pdev->dev, sizeof(*dvfs_info), GFP_KERNEL);
if (!dvfs_info)
return -ENOMEM;
Instead of two allocations, you could have made pd part of dvfs_info and allocated only once.
pd->independent_clocks = 1,
s/,/; ??
INIT_LIST_HEAD(&pd->domain_list);
for_each_possible_cpu(cpu) {
struct device *cpu_dev;
struct cpufreq_cpu_domain *new_domain;
struct regulator *proc_reg, *sram_reg;
cpu_dev = get_cpu_device(cpu);
This should be done in the below if block only.
if (!dvfs_info->cpu_dev) {
proc_reg = regulator_get_exclusive(cpu_dev, "proc");
sram_reg = regulator_get_exclusive(cpu_dev, "sram");
if (PTR_ERR(proc_reg) == -EPROBE_DEFER ||
PTR_ERR(sram_reg) == -EPROBE_DEFER)
return -EPROBE_DEFER;
if (!IS_ERR_OR_NULL(proc_reg) &&
!IS_ERR_OR_NULL(sram_reg)) {
dvfs_info->cpu_dev = cpu_dev;
dvfs_info->proc_reg = proc_reg;
dvfs_info->sram_reg = sram_reg;
cpumask_copy(&dvfs_info->cpus,
&cpu_topology[cpu].core_sibling);
}
}
if (get_cpu_domain(&pd->domain_list, cpu))
continue;
This isn't required if you do below..
new_domain = devm_kzalloc(&pdev->dev, sizeof(*new_domain),
GFP_KERNEL);
if (!new_domain)
return -ENOMEM;
cpumask_copy(&new_domain->cpus,
&cpu_topology[cpu].core_sibling);
new_domain->intermediate_freq = inter_freq;
list_add(&new_domain->node, &pd->domain_list);
Just issue a 'break' from here as you don't want to let this loop run again.
}
if (IS_ERR_OR_NULL(dvfs_info->proc_reg) ||
IS_ERR_OR_NULL(dvfs_info->sram_reg)) {
dev_err(&pdev->dev, "Failed to get regulators\n");
return -ENODEV;
}
If you really need these, then don't allocate new_domain unless you find a CPU with these regulators..
ret = cpu_opp_table_init(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Failed to setup cpu_opp_table: %d\n",
ret);
return ret;
}
ret = cpufreq_register_notifier(&mtk_cpufreq_nb,
CPUFREQ_TRANSITION_NOTIFIER);
if (ret) {
dev_err(&pdev->dev, "Failed to register cpufreq notifier\n");
return ret;
}
Don't want to free OPP table here on error ?
dev = platform_device_register_data(NULL, "cpufreq-dt", -1, pd,
sizeof(*pd));
So this routine is going to be called only once. Then how are you initializing stuff for both the clusters in the upper for loop ? It looked very very confusing.
if (IS_ERR(dev)) {
dev_err(&pdev->dev,
"Failed to register cpufreq-dt platform device\n");
return PTR_ERR(dev);
}
return 0;
+}
+static const struct of_device_id mtk_cpufreq_match[] = {
{
.compatible = "mediatek,mtk-cpufreq",
Can't you use "mediatek,mt8173" here ?
},
{}
+}; +MODULE_DEVICE_TABLE(of, mtk_cpufreq_match);
+static struct platform_driver mtk_cpufreq_platdrv = {
.driver = {
.name = "mtk-cpufreq",
.of_match_table = mtk_cpufreq_match,
},
.probe = mtk_cpufreq_probe,
+}; +module_platform_driver(mtk_cpufreq_platdrv);
Hi Viresh,
Thanks for reviewing. Please see my reply below:
On 4 March 2015 at 19:09, Viresh Kumar viresh.kumar@linaro.org wrote:
Haven't reviewed it completely yet, but this is all I have done.
On 4 March 2015 at 14:19, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
+static int mtk_cpufreq_notify(struct notifier_block *nb,
unsigned long action, void *data)
+{
struct cpufreq_freqs *freqs = data;
struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
There is only one dvfs info ? but there are two clusters, sorry got confused a bit..
There are 2 clusters, but only the big cluster need to do voltage scaling in the notifier, since the voltage controlling is done by cpufreq-dt driver in this version. Therefore only one dvfs_info struct here.
int old_vproc, new_vproc, old_index, new_index;
if (!cpumask_test_cpu(freqs->cpu, &dvfs_info->cpus))
return NOTIFY_DONE;
old_vproc = regulator_get_voltage(dvfs_info->proc_reg);
old_index = cpu_opp_table_get_volt_index(old_vproc);
new_index = cpu_opp_table_get_freq_index(freqs->new * 1000);
new_vproc = opp_tbl[new_index].vproc;
if (old_vproc == new_vproc)
return 0;
if ((action == CPUFREQ_PRECHANGE && old_vproc < new_vproc) ||
(action == CPUFREQ_POSTCHANGE && old_vproc > new_vproc))
mtk_cpufreq_voltage_trace(old_index, new_index);
return NOTIFY_OK;
+}
+static struct notifier_block mtk_cpufreq_nb = {
.notifier_call = mtk_cpufreq_notify,
+};
+static int cpu_opp_table_init(struct device *dev) +{
struct device *cpu_dev = dvfs_info->cpu_dev;
struct cpu_opp_table *opp_tbl;
struct dev_pm_opp *opp;
int ret, cnt, i;
unsigned long rate, vproc, vsram;
ret = of_init_opp_table(cpu_dev);
if (ret) {
dev_err(dev, "Failed to init mtk_opp_table: %d\n", ret);
return ret;
}
rcu_read_lock();
cnt = dev_pm_opp_get_opp_count(cpu_dev);
if (cnt < 0) {
dev_err(cpu_dev, "No OPP table is found: %d", cnt);
ret = cnt;
goto out_free_opp_tbl;
}
opp_tbl = devm_kcalloc(dev, (cnt + 1), sizeof(struct cpu_opp_table),
GFP_ATOMIC);
if (!opp_tbl) {
ret = -ENOMEM;
goto out_free_opp_tbl;
}
for (i = 0, rate = 0; i < cnt; i++, rate++) {
opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
goto out_free_opp_tbl;
}
vproc = dev_pm_opp_get_voltage(opp);
vproc = get_regulator_voltage_ceil(dvfs_info->proc_reg, vproc);
vsram = vproc + VOLT_SHIFT_LOWER_LIMIT;
vsram = get_regulator_voltage_ceil(dvfs_info->sram_reg, vsram);
if (vproc < 0 || vsram < 0) {
ret = -EINVAL;
goto out_free_opp_tbl;
}
opp_tbl[i].freq = rate;
opp_tbl[i].vproc = vproc;
opp_tbl[i].vsram = vsram;
}
opp_tbl[i].freq = 0;
opp_tbl[i].vproc = -1;
opp_tbl[i].vsram = -1;
dvfs_info->opp_tbl = opp_tbl;
+out_free_opp_tbl:
rcu_read_unlock();
of_free_opp_table(cpu_dev);
return ret;
+}
+static struct cpufreq_cpu_domain *get_cpu_domain(struct list_head *domain_list,
int cpu)
+{
struct list_head *node;
list_for_each(node, domain_list) {
struct cpufreq_cpu_domain *domain;
domain = container_of(node, struct cpufreq_cpu_domain, node);
if (cpumask_test_cpu(cpu, &domain->cpus))
return domain;
}
return NULL;
+}
+static int mtk_cpufreq_probe(struct platform_device *pdev)
On a dual cluster big LITTLE (your system), how many times is probe getting called ? Once or twice, i.e. for each cluster ??
The probe function will be called only once since it's triggered by the device tree node in the 2nd patch of this series. Though it's not acceptable obviously.
+{
struct clk *inter_clk;
struct cpufreq_dt_platform_data *pd;
struct platform_device *dev;
unsigned long inter_freq;
int cpu, ret;
inter_clk = clk_get(&pdev->dev, NULL);
How is this supposed to work ? How will pdev->dev give intermediate clock ?
It works with the the device tree binding in the 2nd patch of this series, too. Since the cpufreq node is not allowed, would you have some suggestions on how to get the intermediate clock source in this case?
if (IS_ERR(inter_clk)) {
if (PTR_ERR(inter_clk) == -EPROBE_DEFER) {
dev_warn(&pdev->dev, "clock not ready. defer probeing.\n");
return -EPROBE_DEFER;
}
dev_err(&pdev->dev, "Failed to get intermediate clock\n");
return -ENODEV;
}
inter_freq = clk_get_rate(inter_clk);
pd = devm_kzalloc(&pdev->dev, sizeof(*pd), GFP_KERNEL);
if (!pd)
return -ENOMEM;
dvfs_info = devm_kzalloc(&pdev->dev, sizeof(*dvfs_info), GFP_KERNEL);
if (!dvfs_info)
return -ENOMEM;
Instead of two allocations, you could have made pd part of dvfs_info and allocated only once.
Will do it.
pd->independent_clocks = 1,
s/,/; ??
It's strange that I didn't get a compiling error here. Will fix it.
INIT_LIST_HEAD(&pd->domain_list);
for_each_possible_cpu(cpu) {
struct device *cpu_dev;
struct cpufreq_cpu_domain *new_domain;
struct regulator *proc_reg, *sram_reg;
cpu_dev = get_cpu_device(cpu);
This should be done in the below if block only.
Will do it.
if (!dvfs_info->cpu_dev) {
proc_reg = regulator_get_exclusive(cpu_dev, "proc");
sram_reg = regulator_get_exclusive(cpu_dev, "sram");
if (PTR_ERR(proc_reg) == -EPROBE_DEFER ||
PTR_ERR(sram_reg) == -EPROBE_DEFER)
return -EPROBE_DEFER;
if (!IS_ERR_OR_NULL(proc_reg) &&
!IS_ERR_OR_NULL(sram_reg)) {
dvfs_info->cpu_dev = cpu_dev;
dvfs_info->proc_reg = proc_reg;
dvfs_info->sram_reg = sram_reg;
cpumask_copy(&dvfs_info->cpus,
&cpu_topology[cpu].core_sibling);
}
}
if (get_cpu_domain(&pd->domain_list, cpu))
continue;
This isn't required if you do below..
Please see below.
new_domain = devm_kzalloc(&pdev->dev, sizeof(*new_domain),
GFP_KERNEL);
if (!new_domain)
return -ENOMEM;
cpumask_copy(&new_domain->cpus,
&cpu_topology[cpu].core_sibling);
new_domain->intermediate_freq = inter_freq;
list_add(&new_domain->node, &pd->domain_list);
Just issue a 'break' from here as you don't want to let this loop run again.
Please see below.
}
if (IS_ERR_OR_NULL(dvfs_info->proc_reg) ||
IS_ERR_OR_NULL(dvfs_info->sram_reg)) {
dev_err(&pdev->dev, "Failed to get regulators\n");
return -ENODEV;
}
If you really need these, then don't allocate new_domain unless you find a CPU with these regulators..
Please see below.
ret = cpu_opp_table_init(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Failed to setup cpu_opp_table: %d\n",
ret);
return ret;
}
ret = cpufreq_register_notifier(&mtk_cpufreq_nb,
CPUFREQ_TRANSITION_NOTIFIER);
if (ret) {
dev_err(&pdev->dev, "Failed to register cpufreq notifier\n");
return ret;
}
Don't want to free OPP table here on error ?
Please correct me if I was wrong. Since the OPP table in the dvfs_info is allocated by devm_kzalloc(), it is supposed to be freed if the probe function failed, isn't it?
And the OPP table initialized by of_init_opp_table() in cpu_opp_table_init() was freed right before the function return since it will be initialized again in the cpufreq-dt driver.
dev = platform_device_register_data(NULL, "cpufreq-dt", -1, pd,
sizeof(*pd));
So this routine is going to be called only once. Then how are you initializing stuff for both the clusters in the upper for loop ? It looked very very confusing.
Please let me clarify this here. We have two clusters, one for big and another for little cores. For the little cores' cluster, only one voltage source needs to be controlled when doing CPU DVFS. Therefore the voltage scaling of little cores' cluster is done in the cpufreq-dt. But for the big cores' cluster, there are two voltage sources here to be controlled and these two voltage source need to be scaled up and down in a SoC specific manner which is implemented in the mtk_cpufreq_voltage_trace() function. Hence, we put the voltage scaling of big cores' cluster in the cpufreq notifier and that's also why we need a mtk-cpufreq driver in addition to cpufreq-dt.
In the confusing loop above, I am trying to solve two problems: 1. to find out which CPUs shares the same clock / power domains among all CPUs 2. to initialize the dvfs_info which is only needed by big cores' cluster
I think that's why the loop looks so confusing. Maybe doing it in two separate loops will make the code more readable? I'll try it in next version.
if (IS_ERR(dev)) {
dev_err(&pdev->dev,
"Failed to register cpufreq-dt platform device\n");
return PTR_ERR(dev);
}
return 0;
+}
+static const struct of_device_id mtk_cpufreq_match[] = {
{
.compatible = "mediatek,mtk-cpufreq",
Can't you use "mediatek,mt8173" here ?
Again, the device tree binding in the 2nd patch of this series. I am trying to get the intermediate clock source from the device tree. The reason why I am doing this is the intermediate clock source might be different among different Mediatek SoCs. Either different clock ID or different intermediate frequency. I want to keep the flexibility of the driver so I am trying to specify the intermediate clock source in the device tree. I think I need to find out some other way to do it since it's not allowed to do it by creating a "cpufreq node" in device tree.
Thanks again for reviewing.
Best Regards, Pi-Cheng
},
{}
+}; +MODULE_DEVICE_TABLE(of, mtk_cpufreq_match);
+static struct platform_driver mtk_cpufreq_platdrv = {
.driver = {
.name = "mtk-cpufreq",
.of_match_table = mtk_cpufreq_match,
},
.probe = mtk_cpufreq_probe,
+}; +module_platform_driver(mtk_cpufreq_platdrv);
On 5 March 2015 at 12:57, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
On 4 March 2015 at 19:09, Viresh Kumar viresh.kumar@linaro.org wrote: There are 2 clusters, but only the big cluster need to do voltage scaling in the notifier, since the voltage controlling is done by cpufreq-dt driver in this version. Therefore only one dvfs_info struct here.
Do you really think its readable enough that way? You must have added some comments on how this is working. Also, what about putting this stuff in your regulator driver, so that you don't really have to do this in PRE/POST notifiers.
inter_clk = clk_get(&pdev->dev, NULL);
How is this supposed to work ? How will pdev->dev give intermediate clock ?
It works with the the device tree binding in the 2nd patch of this series, too. Since the cpufreq node is not allowed, would you have some suggestions on how to get the intermediate clock source in this case?
How exactly? I am not doubting your work, just that I don't know how that DT binding will reflect here with clock_get for pdev->dev..
pd->independent_clocks = 1,
s/,/; ??
It's strange that I didn't get a compiling error here. Will fix it.
Its a perfectly valid statement :) and so no errors. Both will execute as they will in case of ';', just that output of the later one will be returned. But there in no variable on LHS (left-hand-side) and so the value doesn't matter.
Don't want to free OPP table here on error ?
Please correct me if I was wrong. Since the OPP table in the dvfs_info is allocated by devm_kzalloc(), it is supposed to be freed if the probe function failed, isn't it?
And the OPP table initialized by of_init_opp_table() in cpu_opp_table_init() was freed right before the function return since it will be initialized again in the cpufreq-dt driver.
Okay, I was talking about this only and I missed it. We probably need to fix this in OPP library so that multiple callers are allowed.
dev = platform_device_register_data(NULL, "cpufreq-dt", -1, pd,
sizeof(*pd));
So this routine is going to be called only once. Then how are you initializing stuff for both the clusters in the upper for loop ? It looked very very confusing.
Please let me clarify this here. We have two clusters, one for big and another for little cores. For the little cores' cluster, only one voltage source needs to be controlled when doing CPU DVFS. Therefore the voltage scaling of little cores' cluster is done in the cpufreq-dt. But for the big cores' cluster, there are two voltage sources here to be controlled and these two voltage source need to be scaled up and down in a SoC specific manner which is implemented in the mtk_cpufreq_voltage_trace() function. Hence, we put the voltage scaling of big cores' cluster in the cpufreq notifier and that's also why we need a mtk-cpufreq driver in addition to cpufreq-dt.
In the confusing loop above, I am trying to solve two problems:
- to find out which CPUs shares the same clock / power domains among all CPUs
- to initialize the dvfs_info which is only needed by big cores' cluster
I think that's why the loop looks so confusing. Maybe doing it in two separate loops will make the code more readable? I'll try it in next version.
Yes.
+cc Sascha
On 5 March 2015 at 17:55, Viresh Kumar viresh.kumar@linaro.org wrote:
On 5 March 2015 at 12:57, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
On 4 March 2015 at 19:09, Viresh Kumar viresh.kumar@linaro.org wrote: There are 2 clusters, but only the big cluster need to do voltage scaling in the notifier, since the voltage controlling is done by cpufreq-dt driver in this version. Therefore only one dvfs_info struct here.
Do you really think its readable enough that way? You must have added some comments on how this is working. Also, what about putting this stuff in your regulator driver, so that you don't really have to do this in PRE/POST notifiers.
Okay. I will add comments to describe some details about this. About putting those stuff into regulator driver, I think you mean creating a "virtual regulator device" and put all the voltage controlling complex into the driver, right? Maybe it's a good idea in this case, but I am sure if this kind of virtual regulator is acceptable. And the flexibility might be an issue, since we might use different PMIC for same SoC on different board.
inter_clk = clk_get(&pdev->dev, NULL);
How is this supposed to work ? How will pdev->dev give intermediate clock ?
It works with the the device tree binding in the 2nd patch of this series, too. Since the cpufreq node is not allowed, would you have some suggestions on how to get the intermediate clock source in this case?
How exactly? I am not doubting your work, just that I don't know how that DT binding will reflect here with clock_get for pdev->dev..
Please correct me if I was wrong. IIUC, It does: clk_get() -> __of_clk_get_by_name() -> __of_clk_get() The "mtk-cpufreq" device tree node specified the intermediate clock source in "clocks" property. And the pdev here came from the "mtk-cpufreq" device tree node, so we can get the "clock specifier" by calling of_parse_phandle_with_args() to find "clocks" property in __of_clk_get().
pd->independent_clocks = 1,
s/,/; ??
It's strange that I didn't get a compiling error here. Will fix it.
Its a perfectly valid statement :) and so no errors. Both will execute as they will in case of ';', just that output of the later one will be returned. But there in no variable on LHS (left-hand-side) and so the value doesn't matter.
Thanks for your explanation. :)
Don't want to free OPP table here on error ?
Please correct me if I was wrong. Since the OPP table in the dvfs_info is allocated by devm_kzalloc(), it is supposed to be freed if the probe function failed, isn't it?
And the OPP table initialized by of_init_opp_table() in cpu_opp_table_init() was freed right before the function return since it will be initialized again in the cpufreq-dt driver.
Okay, I was talking about this only and I missed it. We probably need to fix this in OPP library so that multiple callers are allowed.
dev = platform_device_register_data(NULL, "cpufreq-dt", -1, pd,
sizeof(*pd));
So this routine is going to be called only once. Then how are you initializing stuff for both the clusters in the upper for loop ? It looked very very confusing.
Please let me clarify this here. We have two clusters, one for big and another for little cores. For the little cores' cluster, only one voltage source needs to be controlled when doing CPU DVFS. Therefore the voltage scaling of little cores' cluster is done in the cpufreq-dt. But for the big cores' cluster, there are two voltage sources here to be controlled and these two voltage source need to be scaled up and down in a SoC specific manner which is implemented in the mtk_cpufreq_voltage_trace() function. Hence, we put the voltage scaling of big cores' cluster in the cpufreq notifier and that's also why we need a mtk-cpufreq driver in addition to cpufreq-dt.
In the confusing loop above, I am trying to solve two problems:
- to find out which CPUs shares the same clock / power domains among all CPUs
- to initialize the dvfs_info which is only needed by big cores' cluster
I think that's why the loop looks so confusing. Maybe doing it in two separate loops will make the code more readable? I'll try it in next version.
Yes.
Combining comments and suggestions from you and Sascha[1], I conclude some architectural changes are going to be made in the next version:
1. Use set_rate hook instead of determine_rate in clk driver, and switch to intermeidate PLL parent and back to original CPU PLL parent explicitly in set_rate 2. Therefore we don't need intermediate frequency support in cpufreq-dt to implement cpufreq support for Mediatek SoC 3. Use clk notifier to handle voltage controlling corresponding to intermediate clock rate 4. Due to 3. we need to move all voltage controlling part back into the notifier in mtk-cpufreq (Voltage controlling for little cores' cluster is handled in cpufreq-dt in this version.)
And I have some other questions: 1. According to the discussion[1], should we keep on working on the intermediate frequency support in cpufreq-dt? 2. Will the code be simpler to have a Mediatek cpufreq driver to handle all CPU DVFS complexity instead of cpufreq-dt in the situation that all voltage scaling things need to be done in the clk / cpufreq notifier of mtk-cpufreq driver?
[1] http://marc.info/?l=linux-kernel&m=142546618015551&w=2
Best Regards, Pi-Cheng
On 6 March 2015 at 11:19, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
On 5 March 2015 at 17:55, Viresh Kumar viresh.kumar@linaro.org wrote:
About putting those stuff into regulator driver, I think you mean creating a "virtual regulator device" and put all the voltage controlling complex into the driver, right? Maybe it's a good idea in this case, but I am sure if this kind of virtual regulator is acceptable.
@Mark: Is this allowed to create virtual regulator for a CPU ?
And the flexibility might be an issue, since we might use different PMIC for same SoC on different board.
We can talk about that separately once Mark replies to my query.
Combining comments and suggestions from you and Sascha[1], I conclude some architectural changes are going to be made in the next version:
- Use set_rate hook instead of determine_rate in clk driver, and
switch to intermeidate PLL parent and back to original CPU PLL parent explicitly in set_rate
Lets wait for Russell's answer to the query I posted before making any progress here.
On Tue, Mar 10, 2015 at 08:20:43AM +0530, Viresh Kumar wrote:
Please don't send upstream e-mail to my work account, I use this address pretty consistently for upstream. Upstream mail to my work account frequently ends up unread.
On 6 March 2015 at 11:19, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
On 5 March 2015 at 17:55, Viresh Kumar viresh.kumar@linaro.org wrote:
About putting those stuff into regulator driver, I think you mean creating a "virtual regulator device" and put all the voltage controlling complex into the driver, right? Maybe it's a good idea in this case, but I am sure if this kind of virtual regulator is acceptable.
@Mark: Is this allowed to create virtual regulator for a CPU ?
I don't really know what the above means or what problem it's supposed to solve.
On 11 March 2015 at 16:23, Mark Brown broonie@kernel.org wrote:
On Tue, Mar 10, 2015 at 08:20:43AM +0530, Viresh Kumar wrote:
Please don't send upstream e-mail to my work account, I use this address pretty consistently for upstream. Upstream mail to my work account frequently ends up unread.
Sorry about that, I did exactly opposite of this earlier :(
On 6 March 2015 at 11:19, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
On 5 March 2015 at 17:55, Viresh Kumar viresh.kumar@linaro.org wrote:
About putting those stuff into regulator driver, I think you mean creating a "virtual regulator device" and put all the voltage controlling complex into the driver, right? Maybe it's a good idea in this case, but I am sure if this kind of virtual regulator is acceptable.
@Mark: Is this allowed to create virtual regulator for a CPU ?
I don't really know what the above means or what problem it's supposed to solve.
On mediatek platform, they need to configure two regulators in order to change DVFS state of the big cluster. The generic cpufreq-dt driver and earlier OPP bindings have support for a single regulator only and so what Pi-cheng tried to do is, - Configure one of the regulators using cpufreq-dt - And other one using cpufreq frequency change notifiers
This looks awkward..
What I suggested was to create another virtual regulator for CPU which will eventually configure both the regulators. And so the question that such virtual regulators are allowed or not.
Am Mittwoch, den 11.03.2015, 16:33 +0530 schrieb Viresh Kumar:
On 11 March 2015 at 16:23, Mark Brown broonie@kernel.org wrote:
On Tue, Mar 10, 2015 at 08:20:43AM +0530, Viresh Kumar wrote:
Please don't send upstream e-mail to my work account, I use this address pretty consistently for upstream. Upstream mail to my work account frequently ends up unread.
Sorry about that, I did exactly opposite of this earlier :(
On 6 March 2015 at 11:19, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
On 5 March 2015 at 17:55, Viresh Kumar viresh.kumar@linaro.org wrote:
About putting those stuff into regulator driver, I think you mean creating a "virtual regulator device" and put all the voltage controlling complex into the driver, right? Maybe it's a good idea in this case, but I am sure if this kind of virtual regulator is acceptable.
@Mark: Is this allowed to create virtual regulator for a CPU ?
I don't really know what the above means or what problem it's supposed to solve.
On mediatek platform, they need to configure two regulators in order to change DVFS state of the big cluster. The generic cpufreq-dt driver and earlier OPP bindings have support for a single regulator only and so what Pi-cheng tried to do is,
- Configure one of the regulators using cpufreq-dt
- And other one using cpufreq frequency change notifiers
This looks awkward..
What I suggested was to create another virtual regulator for CPU which will eventually configure both the regulators. And so the question that such virtual regulators are allowed or not.
Instead of creating virtual regulators I would be strongly in favor of reviving the voltage-domain work. That would allow us to push all those voltage dependencies we have seen on various SoCs into the domain handling code and don't care about it in the drivers.
In that case cpufreq-dt wouldn't control a regulator directly, but request a specific voltage from the domain the CPUs are located in and those in turn would control the regulators supplying them.
Regards, Lucas
On 11 March 2015 at 17:12, Lucas Stach l.stach@pengutronix.de wrote:
Instead of creating virtual regulators I would be strongly in favor of reviving the voltage-domain work. That would allow us to push all those voltage dependencies we have seen on various SoCs into the domain handling code and don't care about it in the drivers.
In that case cpufreq-dt wouldn't control a regulator directly, but request a specific voltage from the domain the CPUs are located in and those in turn would control the regulators supplying them.
I agree that it would be the right approach but who is going to do that stuff ?
I think until the time we revive the voltage-domain stuff we need to support mediatek's driver. And probably a virtual regulator is the best approach unless someone else comes up with another idea.
On Wed, Mar 11, 2015 at 05:16:42PM +0530, Viresh Kumar wrote:
I think until the time we revive the voltage-domain stuff we need to support mediatek's driver. And probably a virtual regulator is the best approach unless someone else comes up with another idea.
Why not just write a custom cpufreq driver if it's too hard to abstract?
On Wed, Mar 11, 2015 at 04:33:49PM +0530, Viresh Kumar wrote:
On mediatek platform, they need to configure two regulators in order to change DVFS state of the big cluster. The generic cpufreq-dt driver and earlier OPP bindings have support for a single regulator only and so what Pi-cheng tried to do is,
- Configure one of the regulators using cpufreq-dt
- And other one using cpufreq frequency change notifiers
This looks awkward..
What I suggested was to create another virtual regulator for CPU which will eventually configure both the regulators. And so the question that such virtual regulators are allowed or not.
Ugh, no - that's a hideous bodge which is only going to create trouble later. Remember, DT is an ABI and should describe the hardware so if we're doing bodges that are visible there to shoehorn things onto our implementation that's bad. The concerns that Pi-Cheng had about what happens if the PMIC gets changed definitely seem relevant here too.
On 11 March 2015 at 18:15, Mark Brown broonie@kernel.org wrote:
Ugh, no - that's a hideous bodge which is only going to create trouble later. Remember, DT is an ABI and should describe the hardware so if we're doing bodges that are visible there to shoehorn things onto our implementation that's bad. The concerns that Pi-Cheng had about what happens if the PMIC gets changed definitely seem relevant here too.
Hmm..
Why not just write a custom cpufreq driver if it's too hard to abstract?
Hmm, probably all that can be solved with the new OPP bindings where we can have support for multiple regulator or clock sources to the CPU.
@Pi-cheng: How are you going to pass voltages for both the regulators as OPPs today only support a single regulator, in case you have to write your own driver.
On Thu, Mar 12, 2015 at 5:28 PM, Viresh Kumar viresh.kumar@linaro.org wrote:
On 11 March 2015 at 18:15, Mark Brown broonie@kernel.org wrote:
Ugh, no - that's a hideous bodge which is only going to create trouble later. Remember, DT is an ABI and should describe the hardware so if we're doing bodges that are visible there to shoehorn things onto our implementation that's bad. The concerns that Pi-Cheng had about what happens if the PMIC gets changed definitely seem relevant here too.
Hmm..
Why not just write a custom cpufreq driver if it's too hard to abstract?
Hmm, probably all that can be solved with the new OPP bindings where we can have support for multiple regulator or clock sources to the CPU.
@Pi-cheng: How are you going to pass voltages for both the regulators as OPPs today only support a single regulator, in case you have to write your own driver.
The voltages of the two regulators need to be always under a limitation: 100mV < Vsram - Vproc < 200mV For now, I just calculate the OPPs of Vsram from OPPs of Vproc.
Another thing I should mention, if the voltage difference of two adjacent OPPs is greater than 100mV, we need to set the regulator to some voltages which are not in OPP table considering the limitation above. I think that will make it more difficult to model such flow in a generic framework.
Best Regards, Pi-Cheng
Linux-mediatek mailing list Linux-mediatek@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-mediatek
On 12 March 2015 at 16:45, Pi-Cheng Chen pi-cheng.chen@linaro.org wrote:
The voltages of the two regulators need to be always under a limitation: 100mV < Vsram - Vproc < 200mV For now, I just calculate the OPPs of Vsram from OPPs of Vproc.
Another thing I should mention, if the voltage difference of two adjacent OPPs is greater than 100mV, we need to set the regulator to some voltages which are not in OPP table considering the limitation above. I think that will make it more difficult to model such flow in a generic framework.
Actually the problem is that OPP tables are insufficient for such cases, and we are adding work-arounds to fix that. Though it will get fixed with the new bindings we are adding.
I am confused on what we should be doing here. Even if you write your own driver, you will be abusing DT with incorrect information.
Even if you go around adding a new driver, I would like you to fallback to cpufreq-dt ones the new OPP bindings are in place.
On Wed, Mar 04, 2015 at 04:49:15PM +0800, pi-cheng.chen wrote:
+static int cpu_opp_table_get_freq_index(unsigned int freq) +{
- struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
- int i;
- for (i = 0; opp_tbl[i].freq != 0; i++) {
if (opp_tbl[i].freq >= freq)
return i;
- }
- return -1;
My "return -1" detector fired on this...
+static int cpu_opp_table_get_volt_index(unsigned int volt) +{
- struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
- int i;
- for (i = 0; opp_tbl[i].vproc != -1; i++)
if (opp_tbl[i].vproc >= volt)
return i;
- return -1;
And this.
+static int mtk_cpufreq_notify(struct notifier_block *nb,
unsigned long action, void *data)
+{
- struct cpufreq_freqs *freqs = data;
- struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
- int old_vproc, new_vproc, old_index, new_index;
- if (!cpumask_test_cpu(freqs->cpu, &dvfs_info->cpus))
return NOTIFY_DONE;
- old_vproc = regulator_get_voltage(dvfs_info->proc_reg);
- old_index = cpu_opp_table_get_volt_index(old_vproc);
- new_index = cpu_opp_table_get_freq_index(freqs->new * 1000);
- new_vproc = opp_tbl[new_index].vproc;
Let's say that cpu_opp_table_get_freq_index() returns -1. We then do no error checking on this, and access the memory immediately preceding opp_tbl[0].
Since we'll be loading garbage from opp_tbl[-1], this probably means that mtk_cpufreq_voltage_trace() will go wrong. Your method of using the vproc values to work out which direction we should walk between old_index...new_index means that we could end up walking through almost the whole UINT_MAX range to wrap around to the new index.
Yet again, "return -1" proves to be a sure sign of a bug.
On 10 March 2015 at 00:28, Russell King - ARM Linux linux@arm.linux.org.uk wrote:
On Wed, Mar 04, 2015 at 04:49:15PM +0800, pi-cheng.chen wrote:
+static int cpu_opp_table_get_freq_index(unsigned int freq) +{
struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
int i;
for (i = 0; opp_tbl[i].freq != 0; i++) {
if (opp_tbl[i].freq >= freq)
return i;
}
return -1;
My "return -1" detector fired on this...
+static int cpu_opp_table_get_volt_index(unsigned int volt) +{
struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
int i;
for (i = 0; opp_tbl[i].vproc != -1; i++)
if (opp_tbl[i].vproc >= volt)
return i;
return -1;
And this.
+static int mtk_cpufreq_notify(struct notifier_block *nb,
unsigned long action, void *data)
+{
struct cpufreq_freqs *freqs = data;
struct cpu_opp_table *opp_tbl = dvfs_info->opp_tbl;
int old_vproc, new_vproc, old_index, new_index;
if (!cpumask_test_cpu(freqs->cpu, &dvfs_info->cpus))
return NOTIFY_DONE;
old_vproc = regulator_get_voltage(dvfs_info->proc_reg);
old_index = cpu_opp_table_get_volt_index(old_vproc);
new_index = cpu_opp_table_get_freq_index(freqs->new * 1000);
new_vproc = opp_tbl[new_index].vproc;
Let's say that cpu_opp_table_get_freq_index() returns -1. We then do no error checking on this, and access the memory immediately preceding opp_tbl[0].
Since we'll be loading garbage from opp_tbl[-1], this probably means that mtk_cpufreq_voltage_trace() will go wrong. Your method of using the vproc values to work out which direction we should walk between old_index...new_index means that we could end up walking through almost the whole UINT_MAX range to wrap around to the new index.
Yet again, "return -1" proves to be a sure sign of a bug.
Hi Russell,
Thanks for your reviewing. I'll fix it in next version.
Best Regards, Pi-Cheng
-- FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up according to speedtest.net.
This patch illustrates how to enable mtk-cpufreq driver for a Mediatek SoC in device tree using MT8173 as an example. This patch was tested on MT8173 EVB with several patches which are not yet posted on public mailing list.
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org --- arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 10 ++++++++++ arch/arm64/boot/dts/mediatek/mt8173.dtsi | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+)
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index b57f095..cc3b954 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -417,3 +417,13 @@ status = "okay"; clock-frequency = <100000>; }; + +&cpu0 { + cpu-supply = <&mt6397_vpca15_reg>; + voltage-tolerance = <1>; +}; + +&cpu2 { + proc-supply = <&da9211_vcpu_reg>; + sram-supply = <&mt6397_vsramca7_reg>; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index dd0a445..4ad75a6 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -51,6 +51,16 @@ device_type = "cpu"; compatible = "arm,cortex-a53"; reg = <0x000>; + clocks = <&infracfg INFRA_CA53SEL>; + operating-points = < + 1508000 1109000 + 1404000 1083000 + 1183000 1028000 + 1105000 1009000 + 1001000 983000 + 702000 908000 + 507000 859000 + >; };
cpu1: cpu@1 { @@ -65,6 +75,16 @@ compatible = "arm,cortex-a57"; reg = <0x100>; enable-method = "psci"; + clocks = <&infracfg INFRA_CA57SEL>; + operating-points = < + 1807000 1089000 + 1612000 1049000 + 1404000 1007000 + 1209000 968000 + 1001000 927000 + 702000 867000 + 507000 828000 + >; };
cpu3: cpu@101 { @@ -75,6 +95,11 @@ }; };
+ cpufreq { + compatible = "mediatek,mtk-cpufreq"; + clocks = <&apmixedsys APMIXED_MAINPLL>; + }; + psci { compatible = "arm,psci"; method = "smc";
linaro-kernel@lists.linaro.org