Hi Tamas
Thanks for the patch. I have tested this on my Juno board and it works fine. I have some comments inline.
On 16/06/2022 16:18, Tamas Zsoldos wrote:
etm4x devices cannot be successfully probed when their CPU is offline. This adds a mechanism to delay the probing in this case and try again once the CPU is brought online.
It may be worth elaborating this a bit. e.g,
"For e.g., if the system is booted with maxcpus=n, the ETM won't be probed on CPUs n+1, even when the CPUs are brought up online later"
, >
Signed-off-by: Tamas Zsoldos tamas.zsoldos@arm.com
.../coresight/coresight-etm4x-core.c | 195 ++++++++++++++---- 1 file changed, 155 insertions(+), 40 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 7f416a12000e..f0bff08e9de9 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -70,6 +70,14 @@ struct etm4_init_arg { struct csdev_access *csa; }; +struct etm4_delayed_probe {
- u32 etm_pid;
- struct device *dev;
+};
+static DEFINE_PER_CPU(struct etm4_delayed_probe *, delayed_probe); +static enum cpuhp_state hp_probe;
I am wondering if we can reuse the existing hotplug notifier ? See more on this below.
- /*
- Check if TRCSSPCICRn(i) is implemented for a given instance.
@@ -1938,48 +1946,20 @@ static void etm4_pm_clear(void) } } -static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) +static int etm4_add_coresight_dev(struct device *dev,
struct csdev_access *access)
Could this be : etm4_add_coresight_dev(struct etm4_init_arg *init_arg)
That provides us with "access". Also if replace
init_arg.drvdata field with init_arg.dev
we can use that for etm4_init_arch_data() and etm4_add_coresight_dev.
we can get to drvdata from the dev via, dev_get_drvdata(dev).
{ int ret; struct coresight_platform_data *pdata = NULL;
- struct etmv4_drvdata *drvdata;
- struct etmv4_drvdata *drvdata = dev_get_drvdata(dev); struct coresight_desc desc = { 0 };
- struct etm4_init_arg init_arg = { 0 }; u8 major, minor; char *type_name;
- drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata)
return -ENOMEM;
- dev_set_drvdata(dev, drvdata);
- if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
pm_save_enable = coresight_loses_context_with_cpu(dev) ?
PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
- if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
drvdata->save_state = devm_kmalloc(dev,
sizeof(struct etmv4_save_state), GFP_KERNEL);
if (!drvdata->save_state)
return -ENOMEM;
- }
- drvdata->base = base;
- spin_lock_init(&drvdata->spinlock);
- drvdata->cpu = coresight_get_cpu(dev);
- if (drvdata->cpu < 0)
return drvdata->cpu;
- init_arg.drvdata = drvdata;
- init_arg.csa = &desc.access;
- init_arg.pid = etm_pid;
return -EINVAL;
- if (smp_call_function_single(drvdata->cpu,
etm4_init_arch_data, &init_arg, 1))
dev_err(dev, "ETM arch init failed\n");
- desc.access = *access;
if (!drvdata->arch) return -EINVAL; @@ -2050,6 +2030,69 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) return 0; } +static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) +{
- struct etmv4_drvdata *drvdata;
- struct csdev_access access = { 0 };
- struct etm4_init_arg init_arg = { 0 };
- struct etm4_delayed_probe *delayed;
- drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
- if (!drvdata)
return -ENOMEM;
- dev_set_drvdata(dev, drvdata);
- if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
pm_save_enable = coresight_loses_context_with_cpu(dev) ?
PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
- if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
drvdata->save_state = devm_kmalloc(dev,
sizeof(struct etmv4_save_state), GFP_KERNEL);
if (!drvdata->save_state)
return -ENOMEM;
- }
- drvdata->base = base;
- spin_lock_init(&drvdata->spinlock);
- drvdata->cpu = coresight_get_cpu(dev);
- if (drvdata->cpu < 0)
return drvdata->cpu;
- init_arg.drvdata = drvdata;
- init_arg.csa = &access;
- init_arg.pid = etm_pid;
- /*
* Serialize against CPUHP callbacks to avoid race condition
* between the smp call and saving the delayed probe.
*/
- cpus_read_lock();
- if (smp_call_function_single(drvdata->cpu,
etm4_init_arch_data, &init_arg, 1)) {
/* The CPU was offline, try again once it comes online. */
delayed = devm_kmalloc(dev, sizeof(*delayed), GFP_KERNEL);
if (!delayed) {
cpus_read_unlock();
return -ENOMEM;
}
delayed->etm_pid = etm_pid;
delayed->dev = dev;
per_cpu(delayed_probe, drvdata->cpu) = delayed;
cpus_read_unlock();
return 0;
- }
- cpus_read_unlock();
- return etm4_add_coresight_dev(dev, &access);
+}
- static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id) { void __iomem *base;
@@ -2088,6 +2131,64 @@ static int etm4_probe_platform_dev(struct platform_device *pdev) return ret; } +static int etm4_probe_cpu(unsigned int cpu) +{
- int ret;
- struct etm4_delayed_probe di;
- struct csdev_access access = { 0 };
- struct etm4_init_arg init_arg;
- struct etm4_delayed_probe *dp = *this_cpu_ptr(&delayed_probe);
This could be called from the existing etm4_starting_cpu() ?
static int etm4_starting_cpu(unsigned int cpu) { if (!etmdrvdata[cpu]) - return 0; + return etm4_probe_cpu(cpu);
spin_lock(&etmdrvdata[cpu]->spinlock); if (!etmdrvdata[cpu]->os_unlock) etm4_os_unlock(etmdrvdata[cpu]);
if (local_read(&etmdrvdata[cpu]->mode)) etm4_enable_hw(etmdrvdata[cpu]); spin_unlock(&etmdrvdata[cpu]->spinlock);
}
- if (!dp)
return 0;
- di = *dp;
- devm_kfree(di.dev, dp);
- *this_cpu_ptr(&delayed_probe) = NULL;
- ret = pm_runtime_resume_and_get(di.dev);
- if (ret < 0) {
dev_err(di.dev, "Failed to get PM runtime!\n");
return 0;
- }
- init_arg.drvdata = dev_get_drvdata(di.dev);
- if (!init_arg.drvdata)
return 0;
- init_arg.csa = &access;
- init_arg.pid = di.etm_pid;
- etm4_init_arch_data(&init_arg);
- etm4_add_coresight_dev(di.dev, &access);
- pm_runtime_put(di.dev);
- return 0;
+}
+static int etm4_setup_cpuhp_probe(void) +{
- int ret;
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"arm/coresight4:delayed_probe",
etm4_probe_cpu, NULL);
- if (ret > 0) {
hp_probe = ret;
return 0;
- }
- return ret;
+}
+static void etm4_remove_cpuhp_probe(void) +{
- if (hp_probe)
cpuhp_remove_state_nocalls(hp_probe);
- hp_probe = 0;
+}
And we could get rid of the above ^^
static struct amba_cs_uci_id uci_id_etm4[] = { { /* ETMv4 UCI data */ @@ -2102,16 +2203,20 @@ static void clear_etmdrvdata(void *info) int cpu = *(int *)info; etmdrvdata[cpu] = NULL;
- per_cpu(delayed_probe, cpu) = NULL; }
static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) {
- etm_perf_symlink(drvdata->csdev, false);
- bool had_dev; /*
*/ cpus_read_lock();
- Taking hotplug lock here to avoid racing between etm4_remove_dev()
- and CPU hotplug call backs.
- had_dev = etmdrvdata[drvdata->cpu];
Is this ever false ? Why do we need this change ? How does this patch affect the tear down ?
Suzuki
Hi Suzuki,
On 6/23/22 11:25, Suzuki K Poulose wrote:
Hi Tamas
Thanks for the patch. I have tested this on my Juno board and it works fine. I have some comments inline.
Thank you for taking a look. I will send out a v2 based on your suggestions.
On 16/06/2022 16:18, Tamas Zsoldos wrote:
etm4x devices cannot be successfully probed when their CPU is offline. This adds a mechanism to delay the probing in this case and try again once the CPU is brought online.
It may be worth elaborating this a bit. e.g,
"For e.g., if the system is booted with maxcpus=n, the ETM won't be probed on CPUs n+1, even when the CPUs are brought up online later"
, >
Signed-off-by: Tamas Zsoldos tamas.zsoldos@arm.com
.../coresight/coresight-etm4x-core.c | 195 ++++++++++++++---- 1 file changed, 155 insertions(+), 40 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 7f416a12000e..f0bff08e9de9 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -70,6 +70,14 @@ struct etm4_init_arg { struct csdev_access *csa; }; +struct etm4_delayed_probe { + u32 etm_pid; + struct device *dev; +};
+static DEFINE_PER_CPU(struct etm4_delayed_probe *, delayed_probe); +static enum cpuhp_state hp_probe;
I am wondering if we can reuse the existing hotplug notifier ? See more on this below.
/* * Check if TRCSSPCICRn(i) is implemented for a given instance. * @@ -1938,48 +1946,20 @@ static void etm4_pm_clear(void) } } -static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) +static int etm4_add_coresight_dev(struct device *dev, + struct csdev_access *access)
Could this be : etm4_add_coresight_dev(struct etm4_init_arg *init_arg)
That provides us with "access". Also if replace
init_arg.drvdata field with init_arg.dev
we can use that for etm4_init_arch_data() and etm4_add_coresight_dev.
we can get to drvdata from the dev via, dev_get_drvdata(dev).
{ int ret; struct coresight_platform_data *pdata = NULL; - struct etmv4_drvdata *drvdata; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev); struct coresight_desc desc = { 0 }; - struct etm4_init_arg init_arg = { 0 }; u8 major, minor; char *type_name; - drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) - return -ENOMEM;
- dev_set_drvdata(dev, drvdata);
- if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) - pm_save_enable = coresight_loses_context_with_cpu(dev) ? - PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
- if (pm_save_enable != PARAM_PM_SAVE_NEVER) { - drvdata->save_state = devm_kmalloc(dev, - sizeof(struct etmv4_save_state), GFP_KERNEL); - if (!drvdata->save_state) - return -ENOMEM; - }
- drvdata->base = base;
- spin_lock_init(&drvdata->spinlock);
- drvdata->cpu = coresight_get_cpu(dev); - if (drvdata->cpu < 0) - return drvdata->cpu;
- init_arg.drvdata = drvdata; - init_arg.csa = &desc.access; - init_arg.pid = etm_pid; + return -EINVAL; - if (smp_call_function_single(drvdata->cpu, - etm4_init_arch_data, &init_arg, 1)) - dev_err(dev, "ETM arch init failed\n"); + desc.access = *access; if (!drvdata->arch) return -EINVAL; @@ -2050,6 +2030,69 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) return 0; } +static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) +{
+ struct etmv4_drvdata *drvdata; + struct csdev_access access = { 0 }; + struct etm4_init_arg init_arg = { 0 }; + struct etm4_delayed_probe *delayed;
+ drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM;
+ dev_set_drvdata(dev, drvdata);
+ if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) + pm_save_enable = coresight_loses_context_with_cpu(dev) ? + PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
+ if (pm_save_enable != PARAM_PM_SAVE_NEVER) { + drvdata->save_state = devm_kmalloc(dev, + sizeof(struct etmv4_save_state), GFP_KERNEL); + if (!drvdata->save_state) + return -ENOMEM; + }
+ drvdata->base = base;
+ spin_lock_init(&drvdata->spinlock);
+ drvdata->cpu = coresight_get_cpu(dev); + if (drvdata->cpu < 0) + return drvdata->cpu;
+ init_arg.drvdata = drvdata; + init_arg.csa = &access; + init_arg.pid = etm_pid;
+ /* + * Serialize against CPUHP callbacks to avoid race condition + * between the smp call and saving the delayed probe. + */ + cpus_read_lock(); + if (smp_call_function_single(drvdata->cpu, + etm4_init_arch_data, &init_arg, 1)) { + /* The CPU was offline, try again once it comes online. */ + delayed = devm_kmalloc(dev, sizeof(*delayed), GFP_KERNEL); + if (!delayed) { + cpus_read_unlock(); + return -ENOMEM; + }
+ delayed->etm_pid = etm_pid; + delayed->dev = dev;
+ per_cpu(delayed_probe, drvdata->cpu) = delayed;
+ cpus_read_unlock(); + return 0; + } + cpus_read_unlock();
+ return etm4_add_coresight_dev(dev, &access); +}
static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id) { void __iomem *base; @@ -2088,6 +2131,64 @@ static int etm4_probe_platform_dev(struct platform_device *pdev) return ret; } +static int etm4_probe_cpu(unsigned int cpu) +{ + int ret; + struct etm4_delayed_probe di; + struct csdev_access access = { 0 }; + struct etm4_init_arg init_arg; + struct etm4_delayed_probe *dp = *this_cpu_ptr(&delayed_probe);
This could be called from the existing etm4_starting_cpu() ?
etm4_starting_cpu() is too early (it results in a "bad: scheduling from the idle thread!" through the AMBA pm runtime), but etm4_online_cpu() works.
static int etm4_starting_cpu(unsigned int cpu) { if (!etmdrvdata[cpu]) - return 0; + return etm4_probe_cpu(cpu);
spin_lock(&etmdrvdata[cpu]->spinlock); if (!etmdrvdata[cpu]->os_unlock) etm4_os_unlock(etmdrvdata[cpu]);
if (local_read(&etmdrvdata[cpu]->mode)) etm4_enable_hw(etmdrvdata[cpu]); spin_unlock(&etmdrvdata[cpu]->spinlock);
}
+ if (!dp) + return 0;
+ di = *dp; + devm_kfree(di.dev, dp); + *this_cpu_ptr(&delayed_probe) = NULL;
+ ret = pm_runtime_resume_and_get(di.dev); + if (ret < 0) { + dev_err(di.dev, "Failed to get PM runtime!\n"); + return 0; + }
+ init_arg.drvdata = dev_get_drvdata(di.dev); + if (!init_arg.drvdata) + return 0;
+ init_arg.csa = &access; + init_arg.pid = di.etm_pid; + etm4_init_arch_data(&init_arg);
+ etm4_add_coresight_dev(di.dev, &access);
+ pm_runtime_put(di.dev); + return 0; +}
+static int etm4_setup_cpuhp_probe(void) +{ + int ret;
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "arm/coresight4:delayed_probe", + etm4_probe_cpu, NULL);
+ if (ret > 0) { + hp_probe = ret; + return 0; + } + return ret; +}
+static void etm4_remove_cpuhp_probe(void) +{ + if (hp_probe) + cpuhp_remove_state_nocalls(hp_probe);
+ hp_probe = 0; +}
And we could get rid of the above ^^
static struct amba_cs_uci_id uci_id_etm4[] = { { /* ETMv4 UCI data */ @@ -2102,16 +2203,20 @@ static void clear_etmdrvdata(void *info) int cpu = *(int *)info; etmdrvdata[cpu] = NULL; + per_cpu(delayed_probe, cpu) = NULL; } static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) { - etm_perf_symlink(drvdata->csdev, false); + bool had_dev; /* * Taking hotplug lock here to avoid racing between etm4_remove_dev() * and CPU hotplug call backs. */ cpus_read_lock();
+ had_dev = etmdrvdata[drvdata->cpu];
Is this ever false ? Why do we need this change ? How does this patch affect the tear down ?
Yes, it will be false when a delayed probe is pending. Since the patch introduces a new state the driver can be in (the delayed probe), it needs to make sure it does not try to undo things that have not been done yet. This is what this condition is meant to do: it basically checks if etm4_add_coresight_dev() has already run for this device.
It would be probably easier to understand if it just checked if there's a delayed probe, I will change it to that.
Thanks, Tamás