On Wed, Apr 21, 2021 at 02:04:13PM +0200, Daniel Kiss wrote:
ETR might fill up the buffer sooner than an event makes perf to trigger the synchronisation especially in system wide trace. Polling runs periodically to sync the ETR buffer. Period is configurable via sysfs, disabled by default.
Signed-off-by: Daniel Kiss daniel.kiss@arm.com Signed-off-by: Branislav Rankov Branislav.Rankov@arm.com
.../testing/sysfs-bus-coresight-devices-tmc | 8 + drivers/hwtracing/coresight/Makefile | 2 +- .../hwtracing/coresight/coresight-etm-perf.c | 8 + .../coresight/coresight-etr-perf-polling.c | 316 ++++++++++++++++++ .../coresight/coresight-etr-perf-polling.h | 42 +++ .../hwtracing/coresight/coresight-tmc-core.c | 2 + .../hwtracing/coresight/coresight-tmc-etr.c | 9 + 7 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 drivers/hwtracing/coresight/coresight-etr-perf-polling.c create mode 100644 drivers/hwtracing/coresight/coresight-etr-perf-polling.h
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc index 6aa527296c710..4ca7af22a3686 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc @@ -91,3 +91,11 @@ Contact: Mathieu Poirier mathieu.poirier@linaro.org Description: (RW) Size of the trace buffer for TMC-ETR when used in SYSFS mode. Writable only for TMC-ETR configurations. The value should be aligned to the kernel pagesize.
+What: /sys/bus/coresight/devices/<memory_map>.tmc/polling/period +Date: April 2021 +KernelVersion: 5.13 +Contact: Daniel Kiss daniel.kiss@arm.com +Description: (RW) Time in milliseconds when the TMC-ETR is synced.
Default value is 0, means the feature is disabled.
Writable only for TMC-ETR configurations.
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile index d60816509755c..4df90b71d98cd 100644 --- a/drivers/hwtracing/coresight/Makefile +++ b/drivers/hwtracing/coresight/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_CORESIGHT) += coresight.o coresight-y := coresight-core.o coresight-etm-perf.o coresight-platform.o \
coresight-sysfs.o
coresight-sysfs.o coresight-etr-perf-polling.o
obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o coresight-tmc-y := coresight-tmc-core.o coresight-tmc-etf.o \ coresight-tmc-etr.o diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 78a55fc2bcab5..910a99944eea8 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include "coresight-etm-perf.h" +#include "coresight-etr-perf-polling.h" #include "coresight-priv.h" static struct pmu etm_pmu; @@ -438,6 +439,8 @@ static void etm_event_start(struct perf_event *event, int flags) /* Tell the perf core the event is alive */ event->hw.state = 0;
- etr_perf_polling_event_start(event, event_data, handle);
- /* Finally enable the tracer */ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) goto fail_disable_path;
@@ -497,6 +500,8 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!sink) return;
- etr_perf_polling_event_stop(event, event_data);
- /* stop tracer */ source_ops(csdev)->disable(csdev, event);
@@ -741,6 +746,8 @@ int __init etm_perf_init(void) etm_pmu.addr_filters_validate = etm_addr_filters_validate; etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX;
- etr_perf_polling_init();
- ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1); if (ret == 0) etm_perf_up = true;
@@ -750,5 +757,6 @@ int __init etm_perf_init(void) void __exit etm_perf_exit(void) {
- etr_perf_polling_exit(); perf_pmu_unregister(&etm_pmu);
} diff --git a/drivers/hwtracing/coresight/coresight-etr-perf-polling.c b/drivers/hwtracing/coresight/coresight-etr-perf-polling.c new file mode 100644 index 0000000000000..aa0352908873a --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-etr-perf-polling.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- Copyright(C) 2021 Arm Limited. All rights reserved.
- Author: Daniel Kiss daniel.kiss@arm.com
- */
+#include <linux/coresight.h> +#include <linux/coresight-pmu.h> +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/percpu-defs.h> +#include <linux/perf_event.h> +#include <linux/slab.h> +#include <linux/stringhash.h> +#include <linux/types.h> +#include <linux/workqueue.h>
+#include "coresight-etr-perf-polling.h" +#include "coresight-priv.h" +#include "coresight-tmc.h"
+#if defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) || \
- defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC_MODULE)
It's good to add a new config "CONFIG_CORESIGHT_ETM_PERF_POLL" and developers can selectively enable it when build kernel.
+struct polling_event_list {
- struct perf_event *perf_event;
- struct etm_event_data *etm_event_data;
- struct perf_output_handle *ctx_handle;
- void (*tmc_etr_reset_hw)(struct tmc_drvdata *);
- struct list_head list;
+};
+struct polling {
- int cpu;
- struct list_head polled_events;
Based the structure definition, every CPU has its own polling structure, and there have multiple polled event for one CPU. In theory, should every CPU have only one perf event for polling?
If so, it's not necessary to create event list for every CPU; in other words, we can create a list which can be used to maintain all events cross all CPUs.
- struct delayed_work delayed_work;
+};
Every CPU has its own delayed work, a potential issue is it's hard to synchronize within CPUs; and after the CPU number increases, the situation will get worse. For example, when polling for multiple CPUs, there might have no chance to stop all tracers attached to CPUs.
I understand this patch simply captures the trace data for the first CPU which reigstered its handler in the driver; IOW, we have no chance to stop all tracers to record a clean tracing data (here "clean" means there have no mixed behaviours that one CPU is reading trace data from ETR buffer and tracers from other CPUs still write data into the ETR buffer).
+static atomic_t period; +static spinlock_t spinlock_re; +static struct list_head registered_events;
+static DEFINE_PER_CPU(struct polling, polling);
+static ssize_t period_show(struct device *dev, struct device_attribute *attr,
char *buf)
+{
- int temp;
- struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
- if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
return -EPERM;
IMHO, the checking is redundant.
- temp = atomic_read(&period);
- return sprintf(buf, "%i\n", temp);
+}
+static ssize_t period_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
+{
- int temp = 0;
- struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
- if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
return -EPERM;
Ditto.
- if ((1 == sscanf(buf, "%i", &temp)) && (temp >= 0))
atomic_set(&period, temp);
- return count;
+}
+static DEVICE_ATTR_RW(period);
+static struct attribute *coresight_tmc_polling_attrs[] = {
- &dev_attr_period.attr,
- NULL,
+}; +const struct attribute_group coresight_tmc_polling_group = {
- .attrs = coresight_tmc_polling_attrs,
- .name = "polling",
+}; +EXPORT_SYMBOL_GPL(coresight_tmc_polling_group);
Don't need to export the symbol.
+static inline void polling_sched_worker(struct polling *p) +{
- int tickrate = atomic_read(&period);
- if (!list_empty(&p->polled_events) && (tickrate > 0))
schedule_delayed_work_on(p->cpu, &p->delayed_work,
msecs_to_jiffies(tickrate));
+}
+static inline bool is_etr_related(struct etm_event_data *etm_event_data, int cpu) +{
- struct list_head *path;
- struct coresight_device *sink;
- struct tmc_drvdata *drvdata;
- path = etm_event_cpu_path(etm_event_data, cpu);
- if (WARN_ON(!path))
return false;
- sink = coresight_get_sink(path);
- if (WARN_ON(!sink))
return false;
- drvdata = dev_get_drvdata(sink->dev.parent);
- if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
return false;
- return true;
+}
Understand these operations are from etm_event_start(); we can avoid the duplicate code if arrange code more reasonable.
I suggest below flow:
- From the function etm_setup_aux(), insert an AUX event into the polling list; - From the function etm_event_start(), enable the AUX event, so this AUX event will be polled periodically; - From the function etm_event_stop(), disable the AUX event, the event will not be polled anymore; - From the function etm_free_AUX(), remove the AUX event from the polling list.
+/*
- Adds the event to the polled events list.
- */
+void etr_perf_polling_event_start(struct perf_event *event,
struct etm_event_data *etm_event_data,
struct perf_output_handle *ctx_handle)
+{
- int cpu = smp_processor_id();
- struct polling *p = per_cpu_ptr(&polling, cpu);
- struct polling_event_list *element;
- struct list_head *i, *tmp;
- if (!is_etr_related(etm_event_data, cpu))
return;
- spin_lock(&spinlock_re);
- list_for_each_safe (i, tmp, ®istered_events) {
Suprious space before parenthese; and better to use list_for_each_entry_safe().
element = list_entry(i, struct polling_event_list, list);
if (element->ctx_handle == ctx_handle) {
element->perf_event = event;
element->etm_event_data = etm_event_data;
list_del(&element->list);
spin_unlock(&spinlock_re);
list_add(&element->list, &p->polled_events);
polling_sched_worker(p);
return;
}
- }
- spin_unlock(&spinlock_re);
+} +EXPORT_SYMBOL_GPL(etr_perf_polling_event_start);
No need to export the symbol.
+/*
- Removes the event from the to be polled events list.
- */
+void etr_perf_polling_event_stop(struct perf_event *event,
struct etm_event_data *etm_event_data)
+{
- int cpu = smp_processor_id();
- struct list_head *i, *tmp;
- struct polling *p = per_cpu_ptr(&polling, cpu);
- if (!is_etr_related(etm_event_data, cpu))
return;
- list_for_each_safe (i, tmp, &p->polled_events) {
struct polling_event_list *element =
list_entry(i, struct polling_event_list, list);
if (element->perf_event == event) {
list_del(&element->list);
element->perf_event = NULL;
element->etm_event_data = NULL;
spin_lock(&spinlock_re);
list_add(&element->list, ®istered_events);
spin_unlock(&spinlock_re);
if (list_empty(&p->polled_events)) {
cancel_delayed_work(&p->delayed_work);
}
return;
}
- }
+} +EXPORT_SYMBOL_GPL(etr_perf_polling_event_stop);
Ditto.
+/*
- The polling worker is a workqueue job which is periodically
- woken up to update the perf aux buffer from the etr shrink.
- */
+static void etr_perf_polling_worker(struct work_struct *work) +{
- unsigned long flags;
- int cpu = smp_processor_id();
- struct polling *p = per_cpu_ptr(&polling, cpu);
- struct list_head *i, *tmp;
- if (!atomic_read(&period))
return;
- /*
* Scheduling would do the same from the perf hooks,
* this should be done in one go.
*/
- local_irq_save(flags);
- preempt_disable();
The locking usage is questionable...
local_irq_save() will disable the local interrupt, and it also disables preemption; after disabling interrupt, it's needless to disable preemption.
Here neither local_irq_save() nor preempt_disable() is the right locking to be used; alternatively, should use the pair functions spinlock(&spinlock_re) / spin_unlock(&spinlock_re), this is because it needs to protect the poll list.
- /* Perf requires rcu lock. */
- rcu_read_lock();
Though it gives comments, I still don't understand why need to use rcu_read_lock(). Which critical resource it protects?
- polling_sched_worker(p);
- list_for_each_safe (i, tmp, &p->polled_events) {
struct list_head *path;
struct coresight_device *sink;
struct polling_event_list *element =
list_entry(i, struct polling_event_list, list);
path = etm_event_cpu_path(element->etm_event_data, cpu);
if (WARN_ON(!path))
continue;
sink = coresight_get_sink(path);
if (WARN_ON(!sink))
continue;
When inserting the event into polling list, it have checked for path and validate sink; so it's no need to check the path again.
if (sink_ops(sink)->update_buffer) {
int size, refcnt;
struct tmc_drvdata *drvdata = dev_get_drvdata(sink->dev.parent);
/*
* Act as now we are the only users of the sink. Due to the locks
* we are safe.
*/
refcnt = atomic_xchg(sink->refcnt, 1);
size = sink_ops(sink)->update_buffer(
sink, element->ctx_handle,
element->etm_event_data->snk_config);
refcnt = atomic_xchg(sink->refcnt, refcnt);
This is tricky. This change is like a workaround, it's better to refactor the code for "sink->refcnt"; maybe we can refactor the coe to track the sink's reference counter in the polling list rather than in the low level driver.
/*
* Restart the trace.
*/
if (element->tmc_etr_reset_hw)
element->tmc_etr_reset_hw(drvdata);
WARN_ON(size < 0);
if (size > 0) {
struct etm_event_data *new_event_data;
perf_aux_output_end(element->ctx_handle, size);
new_event_data = perf_aux_output_begin(
element->ctx_handle,
element->perf_event);
if (WARN_ON(new_event_data == NULL))
continue;
element->etm_event_data = new_event_data;
WARN_ON(new_event_data->snk_config !=
element->etm_event_data->snk_config);
}
}
- }
- rcu_read_unlock();
- preempt_enable();
- local_irq_restore(flags);
+}
+void etr_perf_polling_handle_register(struct perf_output_handle *handle,
void (*tmc_etr_reset_hw)(struct tmc_drvdata *drvdata))
+{
- struct polling_event_list *element;
- element = kmalloc(sizeof(*element), GFP_KERNEL);
- if (WARN_ON(!element))
return;
- memset(element, 0, sizeof(*element));
- element->ctx_handle = handle;
- element->tmc_etr_reset_hw = tmc_etr_reset_hw;
- spin_lock(&spinlock_re);
- list_add(&element->list, ®istered_events);
- spin_unlock(&spinlock_re);
+} +EXPORT_SYMBOL_GPL(etr_perf_polling_handle_register);
+void etr_perf_polling_handle_deregister(struct perf_output_handle *handle) +{
- struct list_head *i, *tmp;
- spin_lock(&spinlock_re);
- list_for_each_safe (i, tmp, ®istered_events) {
struct polling_event_list *element =
list_entry(i, struct polling_event_list, list);
if (element->ctx_handle == handle) {
list_del(&element->list);
spin_unlock(&spinlock_re);
kfree(element);
return;
}
- }
- spin_unlock(&spinlock_re);
+} +EXPORT_SYMBOL_GPL(etr_perf_polling_handle_deregister);
+void etr_perf_polling_init(void) +{
- int cpu;
- spin_lock_init(&spinlock_re);
- INIT_LIST_HEAD(®istered_events);
- atomic_set(&period, 0);
- for_each_possible_cpu (cpu) {
struct polling *p = per_cpu_ptr(&polling, cpu);
p->cpu = cpu;
INIT_LIST_HEAD(&p->polled_events);
INIT_DELAYED_WORK(&p->delayed_work, etr_perf_polling_worker);
- }
+} +EXPORT_SYMBOL_GPL(etr_perf_polling_init);
+void etr_perf_polling_exit(void) +{
- int cpu;
- for_each_possible_cpu (cpu) {
struct polling *p = per_cpu_ptr(&polling, cpu);
cancel_delayed_work_sync(&p->delayed_work);
WARN_ON(!list_empty(&p->polled_events));
- }
- WARN_ON(!list_empty(®istered_events));
+} +EXPORT_SYMBOL_GPL(etr_perf_polling_exit);
+#endif diff --git a/drivers/hwtracing/coresight/coresight-etr-perf-polling.h b/drivers/hwtracing/coresight/coresight-etr-perf-polling.h new file mode 100644 index 0000000000000..5917e1fa408bb --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-etr-perf-polling.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*
- Copyright(C) 2021 Arm Limited. All rights reserved.
- Author: Daniel Kiss daniel.kiss@arm.com
- */
+#ifndef _CORESIGHT_ETM_PERF_POLLING_H +#define _CORESIGHT_ETM_PERF_POLLING_H
+#include <linux/coresight.h> +#include <linux/perf_event.h> +#include "coresight-etm-perf.h" +#include "coresight-tmc.h"
+#if defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) || \
- defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC_MODULE)
+void etr_perf_polling_init(void); +void etr_perf_polling_exit(void); +void etr_perf_polling_handle_register(struct perf_output_handle *handle,
void (*tmc_etr_reset_hw)(struct tmc_drvdata *drvdata));
+void etr_perf_polling_handle_deregister(struct perf_output_handle *handle); +void etr_perf_polling_event_start(struct perf_event *event,
struct etm_event_data *etm_event_data,
struct perf_output_handle *ctx_handle);
+void etr_perf_polling_event_stop(struct perf_event *event,
struct etm_event_data *etm_event_data);
+extern const struct attribute_group coresight_tmc_polling_group; +#define CORESIGHT_TMP_POLLING_GROUP &coresight_tmc_polling_group,
+#else /* !CONFIG_CORESIGHT_LINK_AND_SINK_TMC */ +#define etr_perf_polling_init() +#define etr_perf_polling_exit() +#define etr_perf_polling_handle_register(...) +#define etr_perf_polling_handle_deregister(...) +#define etr_perf_polling_event_start(...) +#define etr_perf_polling_event_stop(...) +#define CORESIGHT_TMP_POLLING_GROUP +#endif
+#endif diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 74c6323d4d6ab..51e705ef3ffa3 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -26,6 +26,7 @@ #include "coresight-priv.h" #include "coresight-tmc.h" +#include "coresight-etr-perf-polling.h" DEFINE_CORESIGHT_DEVLIST(etb_devs, "tmc_etb"); DEFINE_CORESIGHT_DEVLIST(etf_devs, "tmc_etf"); @@ -365,6 +366,7 @@ static const struct attribute_group coresight_tmc_mgmt_group = { static const struct attribute_group *coresight_tmc_groups[] = { &coresight_tmc_group, &coresight_tmc_mgmt_group,
- CORESIGHT_TMP_POLLING_GROUP
This is a bit wired for me. It's more readable with:
#ifdef CONFIG_CORESIGHT_ETM_PERF_POLL &coresight_tmc_polling_group, #endif
NULL, }; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index bf9f6311d8663..021b594e38e71 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -16,6 +16,7 @@ #include <linux/vmalloc.h> #include "coresight-catu.h" #include "coresight-etm-perf.h" +#include "coresight-etr-perf-polling.h" #include "coresight-priv.h" #include "coresight-tmc.h" @@ -1139,6 +1140,12 @@ void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) drvdata->etr_buf = NULL; } +static void tmc_etr_reset_hw(struct tmc_drvdata *drvdata) +{
- __tmc_etr_disable_hw(drvdata);
- __tmc_etr_enable_hw(drvdata);
+}
static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { int ret = 0; @@ -1630,6 +1637,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) drvdata->mode = CS_MODE_PERF; drvdata->perf_buf = etr_perf->etr_buf; drvdata->perf_handle = handle;
atomic_inc(csdev->refcnt); }etr_perf_polling_handle_register(handle, tmc_etr_reset_hw);
@@ -1677,6 +1685,7 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) drvdata->mode = CS_MODE_DISABLED; /* Reset perf specific data */ drvdata->perf_buf = NULL;
- etr_perf_polling_handle_deregister(drvdata->perf_handle); drvdata->perf_handle = NULL;
spin_unlock_irqrestore(&drvdata->spinlock, flags); -- 2.25.1