Hi Mike,
Here is the second patchset that adds barrier packets to traces collected from ETB and ETR devices. It applies cleanly on top of perf-opencsd-master (4.12-rc1). Let me know how those work out for you.
Thanks, Mathieu
Mathieu Poirier (6): coresight: Correct buffer lost increment coresight: etf: Add barrier packet for synchronisation coresight: etb10: Remove useless conversion to LE coresight: etb10: Add barrier packet for synchronisation coresight: etr: Correct buffer lost increment coresight: etr: Add barrier packet for synchronisation
drivers/hwtracing/coresight/coresight-etb10.c | 36 ++++++++++++++----------- drivers/hwtracing/coresight/coresight-priv.h | 2 ++ drivers/hwtracing/coresight/coresight-tmc-etf.c | 15 +++++++++-- drivers/hwtracing/coresight/coresight-tmc-etr.c | 15 +++++++++-- drivers/hwtracing/coresight/coresight.c | 8 ++++++ 5 files changed, 57 insertions(+), 19 deletions(-)
Many conditions may cause synchronisation to be lost when updating the perf ring buffer but the end result is still the same: synchronisation is lost. As such there is no way to increment the lost count for each condition, just once will suffice.
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- drivers/hwtracing/coresight/coresight-etb10.c | 10 +++++++--- drivers/hwtracing/coresight/coresight-tmc-etf.c | 8 ++++++-- 2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d5b96423e1a5..d9c233135d6d 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -353,6 +353,7 @@ static void etb_update_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { + bool lost = false; int i, cur; u8 *buf_ptr; u32 read_ptr, write_ptr, capacity; @@ -384,7 +385,7 @@ static void etb_update_buffer(struct coresight_device *csdev, (unsigned long)write_ptr);
write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1); - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; }
/* @@ -395,7 +396,7 @@ static void etb_update_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + ETB_STATUS_REG); if (status & ETB_STATUS_RAM_FULL) { - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; to_read = capacity; read_ptr = write_ptr; } else { @@ -428,9 +429,12 @@ static void etb_update_buffer(struct coresight_device *csdev, if (read_ptr > (drvdata->buffer_depth - 1)) read_ptr -= drvdata->buffer_depth; /* let the decoder know we've skipped ahead */ - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; }
+ if (lost) + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + /* finally tell HW where we want to start reading from */ writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e3b9fb82eb8d..2e0fb5b9372c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -369,6 +369,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { + bool lost = false; int i, cur; u32 *buf_ptr; u32 read_ptr, write_ptr; @@ -397,7 +398,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + TMC_STS); if (status & TMC_STS_FULL) { - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; to_read = drvdata->size; } else { to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); @@ -442,9 +443,12 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, read_ptr -= drvdata->size; /* Tell the HW */ writel_relaxed(read_ptr, drvdata->base + TMC_RRP); - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; }
+ if (lost) + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + cur = buf->cur; offset = buf->offset;
When a buffer overflows happen the synchronisation patckets usually present at the beginning of the buffer are lost, a situation that prevents the decoder from knowing the context of the traces being decoded.
This patch add a barrier packet at the beginning of an overflow buffer. That packet is used by the decoding library as a marker that forces re-synchronisation.
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- drivers/hwtracing/coresight/coresight-priv.h | 2 ++ drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 +++++++ drivers/hwtracing/coresight/coresight.c | 8 ++++++++ 3 files changed, 17 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 5f662d82052c..3e25b1dd1a1a 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -56,6 +56,8 @@ static ssize_t name##_show(struct device *_dev, \ } \ static DEVICE_ATTR_RO(name)
+extern const u32 barrier_pkt[5]; + enum etm_addr_type { ETM_ADDR_TYPE_NONE, ETM_ADDR_TYPE_SINGLE, diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 2e0fb5b9372c..0e3024d314ef 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -371,6 +371,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, { bool lost = false; int i, cur; + const u32 *barrier; u32 *buf_ptr; u32 read_ptr, write_ptr; u32 status, to_read; @@ -451,12 +452,18 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
cur = buf->cur; offset = buf->offset; + barrier = barrier_pkt;
/* for every byte to read */ for (i = 0; i < to_read; i += 4) { buf_ptr = buf->data_pages[cur] + offset; *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD);
+ if (lost && *barrier) { + *buf_ptr = *barrier; + barrier++; + } + offset += 4; if (offset >= PAGE_SIZE) { offset = 0; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 6a0202b7384f..672d0a81293b 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -53,6 +53,14 @@ static DEFINE_PER_CPU(struct list_head *, tracer_path); */ static struct list_head *stm_path;
+/* + * When losing synchronisation a new barrier packet needs to be inserted at + * beginning of the data collected in a buffer. That way the decoder knows that + * it needs to look for another sync sequence. + */ +const u32 barrier_pkt[5] = {0x7fffffff, 0x7fffffff, + 0x7fffffff, 0x7fffffff, 0x0}; + static int coresight_id_match(struct device *dev, void *data) { int trace_id, i_trace_id;
Internal CoreSight components are rendering trace data in little-endian format. As such there is no need to convert the data once more, hence removing the extra step.
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- drivers/hwtracing/coresight/coresight-etb10.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d9c233135d6d..5a653b95fde1 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -202,8 +202,7 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) { int i; u8 *buf_ptr; - u32 read_data, depth; - u32 read_ptr, write_ptr; + u32 depth, read_ptr, write_ptr; u32 frame_off, frame_endoff;
CS_UNLOCK(drvdata->base); @@ -231,12 +230,9 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) depth = drvdata->buffer_depth; buf_ptr = drvdata->buf; for (i = 0; i < depth; i++) { - read_data = readl_relaxed(drvdata->base + - ETB_RAM_READ_DATA_REG); - *buf_ptr++ = read_data >> 0; - *buf_ptr++ = read_data >> 8; - *buf_ptr++ = read_data >> 16; - *buf_ptr++ = read_data >> 24; + *(u32*)buf_ptr = readl_relaxed(drvdata->base + + ETB_RAM_READ_DATA_REG); + buf_ptr += 4; }
if (frame_off) { @@ -444,10 +440,8 @@ static void etb_update_buffer(struct coresight_device *csdev, buf_ptr = buf->data_pages[cur] + offset; read_data = readl_relaxed(drvdata->base + ETB_RAM_READ_DATA_REG); - *buf_ptr++ = read_data >> 0; - *buf_ptr++ = read_data >> 8; - *buf_ptr++ = read_data >> 16; - *buf_ptr++ = read_data >> 24; + *(u32*)buf_ptr = read_data; + buf_ptr += 4;
offset += 4; if (offset >= PAGE_SIZE) {
Following what was done for ETF, this patch adds a barrier packet at the beginning of an overflow buffer. That packet is used by the decoder as a marker to force re-synchronisation.
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- drivers/hwtracing/coresight/coresight-etb10.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 5a653b95fde1..1d3c817d9f2b 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -352,6 +352,7 @@ static void etb_update_buffer(struct coresight_device *csdev, bool lost = false; int i, cur; u8 *buf_ptr; + const u32 *barrier; u32 read_ptr, write_ptr, capacity; u32 status, read_data, to_read; unsigned long offset; @@ -436,10 +437,17 @@ static void etb_update_buffer(struct coresight_device *csdev,
cur = buf->cur; offset = buf->offset; + barrier = barrier_pkt; + for (i = 0; i < to_read; i += 4) { buf_ptr = buf->data_pages[cur] + offset; read_data = readl_relaxed(drvdata->base + ETB_RAM_READ_DATA_REG); + if (lost && *barrier) { + read_data = *barrier; + barrier++; + } + *(u32*)buf_ptr = read_data; buf_ptr += 4;
Many conditions may cause synchronisation to be lost when updating the perf ring buffer but the end result is still the same: synchronisation is lost. As such there is no way to increment the lost count for each condition, just once will suffice.
This patch should not be upstreamed.
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index e761cf85e98d..49f7686edf06 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -357,6 +357,7 @@ static void tmc_update_etr_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { + bool lost = false; int i, cur; u32 *buf_ptr; u32 read_ptr, write_ptr; @@ -385,7 +386,7 @@ static void tmc_update_etr_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + TMC_STS); if (status & TMC_STS_FULL) { - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; to_read = drvdata->size; } else { to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); @@ -433,9 +434,12 @@ static void tmc_update_etr_buffer(struct coresight_device *csdev, read_ptr -= drvdata->size; /* Tell the HW */ writel_relaxed(read_ptr, drvdata->base + TMC_RRP); - perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + lost = true; }
+ if (lost) + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + cur = buf->cur; offset = buf->offset;
This patch add a barrier packet at the beginning of an overflow buffer. That packet is used by the decoding library as a marker that forces re-synchronisation.
This patch should not be upstreamed.
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 49f7686edf06..7fc2781b5a7a 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -359,6 +359,7 @@ static void tmc_update_etr_buffer(struct coresight_device *csdev, { bool lost = false; int i, cur; + const u32 *barrier; u32 *buf_ptr; u32 read_ptr, write_ptr; u32 status, to_read; @@ -442,12 +443,18 @@ static void tmc_update_etr_buffer(struct coresight_device *csdev,
cur = buf->cur; offset = buf->offset; + barrier = barrier_pkt;
/* for every byte to read */ for (i = 0; i < to_read; i += 4) { buf_ptr = buf->data_pages[cur] + offset; *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD);
+ if (lost && *barrier) { + *buf_ptr = *barrier; + barrier++; + } + offset += 4; if (offset >= PAGE_SIZE) { offset = 0;