This patchset consists of refactoring to allow the decoder to be created in advance when the AUX records are iterated over. The AUX record flags are used to communicate whether the data is formatted or not which is the reason this refactoring is required.
These changes result in some simplifications, removal of early exit conditions etc.
A change was also made to --dump-raw-trace code to allow the formatted/unformatted status to persist and for the decoder to not be continually deleted and recreated.
The changes apply on top of the previous patchset "[PATCH v7 0/2] perf cs-etm: Split Coresight decode by aux records".
James Clark (6): perf cs-etm: Refactor initialisation of kernel start address perf cs-etm: Split setup and timestamp search functions perf cs-etm: Only setup queues when they are modified perf cs-etm: Suppress printing when resetting decoder perf cs-etm: Use existing decoder instead of resetting it perf cs-etm: Pass unformatted flag to decoder
.../perf/util/cs-etm-decoder/cs-etm-decoder.c | 10 +- tools/perf/util/cs-etm.c | 174 ++++++++---------- 2 files changed, 84 insertions(+), 100 deletions(-)
The kernel start address is already cached in the machine struct once it is initialised, so storing it in the cs_etm struct is unnecessary.
It also depends on kernel maps being available to be initialised. Therefore cs_etm__setup_queues() isn't an appropriate place to call it because it could be called before processing starts. It would be better to initialise it at the point when it is needed, then we can be sure that all the necessary maps are available. Also by calling machine__kernel_start() multiple times it can be initialised at some point, even if it failed to initialise previously due to missing maps.
In a later commit cs_etm__setup_queues() will be moved which is the motivation for this change.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index bc1f64873c8f..4c69ef391f60 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -62,7 +62,6 @@ struct cs_etm_auxtrace { u64 instructions_sample_period; u64 instructions_id; u64 **metadata; - u64 kernel_start; unsigned int pmu_type; };
@@ -691,7 +690,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
machine = etmq->etm->machine;
- if (address >= etmq->etm->kernel_start) { + if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else @@ -901,9 +900,6 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) unsigned int i; int ret;
- if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - for (i = 0; i < etm->queues.nr_queues; i++) { ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); if (ret)
This refactoring has some benefits: * Decoding is done to find the timestamp. If we want to print errors when maps aren't available, then doing it from cs_etm__setup_queue() may cause warnings to be printed. * The cs_etm__setup_queue() flow is shared between timed and timeless modes, so it needs to be guarded by an if statement which can now be removed. * Allows moving the setup queues function earlier. * If data was piped in, then not all queues would be filled so it wouldn't have worked properly anyway. Now it waits for flush so data in all queues will be available.
The motivation for this is to decouple setup functions with ones that involve decoding. That way we can move the setup function earlier when the formatted/unformatted trace information is available.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4c69ef391f60..426e99c07ca9 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -809,29 +809,32 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, unsigned int queue_nr) { - int ret = 0; - unsigned int cs_queue_nr; - u8 trace_chan_id; - u64 cs_timestamp; struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq) - goto out; + return 0;
etmq = cs_etm__alloc_queue(etm);
- if (!etmq) { - ret = -ENOMEM; - goto out; - } + if (!etmq) + return -ENOMEM;
queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; etmq->offset = 0;
- if (etm->timeless_decoding) - goto out; + return 0; +} + +static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, + struct cs_etm_queue *etmq, + unsigned int queue_nr) +{ + int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 cs_timestamp;
/* * We are under a CPU-wide trace scenario. As such we need to know @@ -2218,13 +2221,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0; - unsigned int cs_queue_nr, queue_nr; + unsigned int cs_queue_nr, queue_nr, i; u8 trace_chan_id; u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq;
+ /* + * Pre-populate the heap with one entry from each queue so that we can + * start processing in time order across all queues. + */ + for (i = 0; i < etm->queues.nr_queues; i++) { + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); + if (ret) + return ret; + } + while (1) { if (!etm->heap.heap_cnt) goto out;
Continually creating queues in cs_etm__process_event() is unnecessary. They only need to be created when a buffer for a new CPU or thread is encountered. This can be in two places, when building the queues in advance in cs_etm__process_auxtrace_info(), or in cs_etm__process_auxtrace_event() when data_queued is false and the index wasn't available (pipe mode).
This change will allow the 'formatted' decoder setting to applied when iterating over aux records in a later commit.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 54 +++++++++++----------------------------- 1 file changed, 14 insertions(+), 40 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 426e99c07ca9..2d07e52ffd3c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -96,7 +96,6 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list;
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -564,7 +563,6 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -574,11 +572,6 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL;
- ret = cs_etm__update_queues(etm); - - if (ret < 0) - return ret; - if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1);
@@ -898,30 +891,6 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, return ret; }
-static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -{ - unsigned int i; - int ret; - - for (i = 0; i < etm->queues.nr_queues; i++) { - ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); - if (ret) - return ret; - } - - return 0; -} - -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) -{ - if (etm->queues.new_data) { - etm->queues.new_data = false; - return cs_etm__setup_queues(etm); - } - - return 0; -} - static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) @@ -2395,7 +2364,6 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - int err = 0; u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2414,12 +2382,6 @@ static int cs_etm__process_event(struct perf_session *session, else sample_kernel_timestamp = 0;
- if (sample_kernel_timestamp || etm->timeless_decoding) { - err = cs_etm__update_queues(etm); - if (err) - return err; - } - /* * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because @@ -2476,6 +2438,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, int fd = perf_data__fd(session->data); bool is_pipe = perf_data__is_pipe(session->data); int err; + int idx = event->auxtrace.idx;
if (is_pipe) data_offset = 0; @@ -2490,6 +2453,11 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err;
+ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx); + if (err) + return err; + if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { cs_etm__dump_event(etm, buffer); @@ -2732,6 +2700,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o struct perf_record_auxtrace *auxtrace_event; union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; + __u32 idx;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2793,8 +2762,13 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); - return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, - file_offset, NULL); + err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + if (err) + return err; + + idx = auxtrace_event->idx; + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx); }
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
The decoder is quite noisy when being reset. In a future commit, dump-raw-trace will use a code path that resets the decoder rather than creating a new one, so printing has to be suppressed to not flood the output.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3e1a05bc82cc..ed1f0326f859 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -35,6 +35,7 @@ struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); + bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; @@ -74,9 +75,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) ocsd_datapath_resp_t dp_ret;
decoder->prev_return = OCSD_RESP_CONT; - + decoder->suppress_printing = true; dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); + decoder->suppress_printing = false; if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) return -1;
@@ -146,8 +148,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) { - if (p_context && str_len) - ((struct cs_etm_decoder *)p_context)->packet_printer(msg); + const struct cs_etm_decoder *decoder = p_context; + + if (p_context && str_len && !decoder->suppress_printing) + decoder->packet_printer(msg); }
static int
When dumping trace, the decoder is continually deleted and recreated to decode each buffer. To support both formatted and unformatted trace in a later commit, the decoder will be configured in advance.
This commit removes the deletion of the decoder and allows the formatted/unformatted setting to persist.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2d07e52ffd3c..760050ea936d 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -508,14 +508,11 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, return ret; }
-static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +static void cs_etm__dump_event(struct cs_etm_queue *etmq, struct auxtrace_buffer *buffer) { int ret; const char *color = PERF_COLOR_BLUE; - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; - struct cs_etm_decoder *decoder; size_t buffer_used = 0;
fprintf(stdout, "\n"); @@ -523,29 +520,11 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, ". ... CoreSight ETM Trace data: size %zu bytes\n", buffer->size);
- /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); - - if (!t_params) - return; - - if (cs_etm__init_trace_params(t_params, etm)) - goto out_free; - - /* Set decoder parameters to simply print the trace packets */ - if (cs_etm__init_decoder_params(&d_params, NULL, - CS_ETM_OPERATION_PRINT)) - goto out_free; - - decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - - if (!decoder) - goto out_free; do { size_t consumed;
ret = cs_etm_decoder__process_data_block( - decoder, buffer->offset, + etmq->decoder, buffer->offset, &((u8 *)buffer->data)[buffer_used], buffer->size - buffer_used, &consumed); if (ret) @@ -554,10 +533,7 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, buffer_used += consumed; } while (buffer_used < buffer->size);
- cs_etm_decoder__free(decoder); - -out_free: - zfree(&t_params); + cs_etm_decoder__reset(etmq->decoder); }
static int cs_etm__flush_events(struct perf_session *session, @@ -769,7 +745,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
/* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, - CS_ETM_OPERATION_DECODE)) + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE)) goto out_free;
etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); @@ -2422,7 +2399,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm, for (i = 0; i < etm->queues.nr_queues; ++i) list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) if (buf->reference == event->reference) - cs_etm__dump_event(etm, buf); + cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); }
static int cs_etm__process_auxtrace_event(struct perf_session *session, @@ -2460,7 +2437,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { - cs_etm__dump_event(etm, buffer); + cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); auxtrace_buffer__put_data(buffer); } } else if (dump_trace)
The TRBE (Trace Buffer Extension) feature allows a separate trace buffer for each trace source, therefore the trace wouldn't need to be formatted. The driver was introduced in commit 3fbf7f011f24 ("coresight: sink: Add TRBE driver").
The formatted/unformatted mode is encoded in one of the flags of the AUX record. The first AUX record encountered for each event is used to determine the mode, and this will persist for the remaining trace that is either decoded or dumped.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 42 +++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 760050ea936d..62769a84a53f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -461,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, }
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm) + struct cs_etm_auxtrace *etm, + int decoders_per_cpu) { int i; u32 etmidr; u64 architecture;
- for (i = 0; i < etm->num_cpu; i++) { + for (i = 0; i < decoders_per_cpu; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) { @@ -488,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode) + enum cs_etm_decoder_operation mode, + bool formatted) { int ret = -EINVAL;
@@ -498,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = true; + d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -720,11 +722,13 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, return len; }
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + bool formatted) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; + int decoders_per_cpu = formatted ? etm->num_cpu : 1;
etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -735,21 +739,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free;
/* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + t_params = zalloc(sizeof(*t_params) * decoders_per_cpu);
if (!t_params) goto out_free;
- if (cs_etm__init_trace_params(t_params, etm)) + if (cs_etm__init_trace_params(t_params, etm, decoders_per_cpu)) goto out_free;
/* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, dump_trace ? CS_ETM_OPERATION_PRINT : - CS_ETM_OPERATION_DECODE)) + CS_ETM_OPERATION_DECODE, + formatted)) goto out_free;
- etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + etmq->decoder = cs_etm_decoder__new(decoders_per_cpu, &d_params, + t_params);
if (!etmq->decoder) goto out_free; @@ -777,14 +783,15 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr) + unsigned int queue_nr, + bool formatted) { struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq) return 0;
- etmq = cs_etm__alloc_queue(etm); + etmq = cs_etm__alloc_queue(etm, formatted);
if (!etmq) return -ENOMEM; @@ -2430,8 +2437,14 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err;
+ /* + * Knowing if the trace is formatted or not requires a lookup of + * the aux record so only works in non-piped mode where data is + * queued in cs_etm__queue_aux_records(). Always assume + * formatted in piped mode (true). + */ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], - idx); + idx, true); if (err) return err;
@@ -2678,6 +2691,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; __u32 idx; + bool formatted;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2745,7 +2759,9 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return err;
idx = auxtrace_event->idx; - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx); + formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, formatted); }
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
On Tue, Jul 13, 2021 at 04:40:03PM +0100, James Clark wrote:
The kernel start address is already cached in the machine struct once it is initialised, so storing it in the cs_etm struct is unnecessary.
It also depends on kernel maps being available to be initialised. Therefore cs_etm__setup_queues() isn't an appropriate place to call it because it could be called before processing starts. It would be better to initialise it at the point when it is needed, then we can be sure that all the necessary maps are available. Also by calling machine__kernel_start() multiple times it can be initialised at some point, even if it failed to initialise previously due to missing maps.
In a later commit cs_etm__setup_queues() will be moved which is the motivation for this change.
Signed-off-by: James Clark james.clark@arm.com
Reviewed-by: Mathieu Poirier mathieu.poirier@linaro.org
tools/perf/util/cs-etm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index bc1f64873c8f..4c69ef391f60 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -62,7 +62,6 @@ struct cs_etm_auxtrace { u64 instructions_sample_period; u64 instructions_id; u64 **metadata;
- u64 kernel_start; unsigned int pmu_type;
}; @@ -691,7 +690,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) machine = etmq->etm->machine;
- if (address >= etmq->etm->kernel_start) {
- if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else
@@ -901,9 +900,6 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) unsigned int i; int ret;
- if (!etm->kernel_start)
etm->kernel_start = machine__kernel_start(etm->machine);
- for (i = 0; i < etm->queues.nr_queues; i++) { ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); if (ret)
-- 2.28.0
On Tue, Jul 13, 2021 at 04:40:04PM +0100, James Clark wrote:
This refactoring has some benefits:
- Decoding is done to find the timestamp. If we want to print errors when maps aren't available, then doing it from cs_etm__setup_queue() may cause warnings to be printed.
- The cs_etm__setup_queue() flow is shared between timed and timeless modes, so it needs to be guarded by an if statement which can now be removed.
- Allows moving the setup queues function earlier.
- If data was piped in, then not all queues would be filled so it wouldn't have worked properly anyway. Now it waits for flush so data in all queues will be available.
The motivation for this is to decouple setup functions with ones that involve decoding. That way we can move the setup function earlier when the formatted/unformatted trace information is available.
Signed-off-by: James Clark james.clark@arm.com
tools/perf/util/cs-etm.c | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-)
Reviewed-by: Mathieu Poirier mathieu.poirier@linaro.org
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4c69ef391f60..426e99c07ca9 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -809,29 +809,32 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, unsigned int queue_nr) {
- int ret = 0;
- unsigned int cs_queue_nr;
- u8 trace_chan_id;
- u64 cs_timestamp; struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
goto out;
return 0;
etmq = cs_etm__alloc_queue(etm);
- if (!etmq) {
ret = -ENOMEM;
goto out;
- }
- if (!etmq)
return -ENOMEM;
queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; etmq->offset = 0;
- if (etm->timeless_decoding)
goto out;
- return 0;
+}
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
struct cs_etm_queue *etmq,
unsigned int queue_nr)
+{
- int ret = 0;
- unsigned int cs_queue_nr;
- u8 trace_chan_id;
- u64 cs_timestamp;
/* * We are under a CPU-wide trace scenario. As such we need to know @@ -2218,13 +2221,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0;
- unsigned int cs_queue_nr, queue_nr;
- unsigned int cs_queue_nr, queue_nr, i; u8 trace_chan_id; u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq;
- /*
* Pre-populate the heap with one entry from each queue so that we can
* start processing in time order across all queues.
*/
- for (i = 0; i < etm->queues.nr_queues; i++) {
etmq = etm->queues.queue_array[i].priv;
if (!etmq)
continue;
ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
if (ret)
return ret;
- }
- while (1) { if (!etm->heap.heap_cnt) goto out;
-- 2.28.0
On Tue, Jul 13, 2021 at 04:40:05PM +0100, James Clark wrote:
Continually creating queues in cs_etm__process_event() is unnecessary. They only need to be created when a buffer for a new CPU or thread is encountered. This can be in two places, when building the queues in advance in cs_etm__process_auxtrace_info(), or in cs_etm__process_auxtrace_event() when data_queued is false and the index wasn't available (pipe mode).
This change will allow the 'formatted' decoder setting to applied when iterating over aux records in a later commit.
Signed-off-by: James Clark james.clark@arm.com
tools/perf/util/cs-etm.c | 54 +++++++++++----------------------------- 1 file changed, 14 insertions(+), 40 deletions(-)
Reviewed-by: Mathieu Poirier mathieu.poirier@linaro.org
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 426e99c07ca9..2d07e52ffd3c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -96,7 +96,6 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list; -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -564,7 +563,6 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) {
- int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
@@ -574,11 +572,6 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL;
- ret = cs_etm__update_queues(etm);
- if (ret < 0)
return ret;
- if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1);
@@ -898,30 +891,6 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, return ret; } -static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -{
- unsigned int i;
- int ret;
- for (i = 0; i < etm->queues.nr_queues; i++) {
ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
if (ret)
return ret;
- }
- return 0;
-}
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) -{
- if (etm->queues.new_data) {
etm->queues.new_data = false;
return cs_etm__setup_queues(etm);
- }
- return 0;
-}
static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) @@ -2395,7 +2364,6 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) {
- int err = 0; u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace,
@@ -2414,12 +2382,6 @@ static int cs_etm__process_event(struct perf_session *session, else sample_kernel_timestamp = 0;
- if (sample_kernel_timestamp || etm->timeless_decoding) {
err = cs_etm__update_queues(etm);
if (err)
return err;
- }
- /*
- Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
- need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
@@ -2476,6 +2438,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, int fd = perf_data__fd(session->data); bool is_pipe = perf_data__is_pipe(session->data); int err;
int idx = event->auxtrace.idx;
if (is_pipe) data_offset = 0; @@ -2490,6 +2453,11 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err;
err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
idx);
if (err)
return err;
- if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { cs_etm__dump_event(etm, buffer);
@@ -2732,6 +2700,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o struct perf_record_auxtrace *auxtrace_event; union perf_event auxtrace_fragment; __u64 aux_offset, aux_size;
- __u32 idx;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2793,8 +2762,13 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
file_offset, NULL);
err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
file_offset, NULL);
if (err)
return err;
idx = auxtrace_event->idx;
}return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx);
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ -- 2.28.0
On Tue, Jul 13, 2021 at 04:40:06PM +0100, James Clark wrote:
The decoder is quite noisy when being reset. In a future commit, dump-raw-trace will use a code path that resets the decoder rather than creating a new one, so printing has to be suppressed to not flood the output.
Signed-off-by: James Clark james.clark@arm.com
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
Reviewed-by: Mathieu Poirier mathieu.poirier@linaro.org
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3e1a05bc82cc..ed1f0326f859 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -35,6 +35,7 @@ struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg);
- bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return;
@@ -74,9 +75,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) ocsd_datapath_resp_t dp_ret; decoder->prev_return = OCSD_RESP_CONT;
- decoder->suppress_printing = true; dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL);
- decoder->suppress_printing = false; if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) return -1;
@@ -146,8 +148,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) {
- if (p_context && str_len)
((struct cs_etm_decoder *)p_context)->packet_printer(msg);
- const struct cs_etm_decoder *decoder = p_context;
- if (p_context && str_len && !decoder->suppress_printing)
decoder->packet_printer(msg);
} static int -- 2.28.0
On Tue, Jul 13, 2021 at 04:40:07PM +0100, James Clark wrote:
When dumping trace, the decoder is continually deleted and recreated to decode each buffer. To support both formatted and unformatted trace in a later commit, the decoder will be configured in advance.
This commit removes the deletion of the decoder and allows the formatted/unformatted setting to persist.
Signed-off-by: James Clark james.clark@arm.com
Reviewed-by: Mathieu Poirier mathieu.poirier@linaro.org
tools/perf/util/cs-etm.c | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2d07e52ffd3c..760050ea936d 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -508,14 +508,11 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, return ret; } -static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +static void cs_etm__dump_event(struct cs_etm_queue *etmq, struct auxtrace_buffer *buffer) { int ret; const char *color = PERF_COLOR_BLUE;
- struct cs_etm_decoder_params d_params;
- struct cs_etm_trace_params *t_params;
- struct cs_etm_decoder *decoder; size_t buffer_used = 0;
fprintf(stdout, "\n"); @@ -523,29 +520,11 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, ". ... CoreSight ETM Trace data: size %zu bytes\n", buffer->size);
- /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
- if (!t_params)
return;
- if (cs_etm__init_trace_params(t_params, etm))
goto out_free;
- /* Set decoder parameters to simply print the trace packets */
- if (cs_etm__init_decoder_params(&d_params, NULL,
CS_ETM_OPERATION_PRINT))
goto out_free;
- decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
- if (!decoder)
do { size_t consumed;goto out_free;
ret = cs_etm_decoder__process_data_block(
decoder, buffer->offset,
if (ret)etmq->decoder, buffer->offset, &((u8 *)buffer->data)[buffer_used], buffer->size - buffer_used, &consumed);
@@ -554,10 +533,7 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, buffer_used += consumed; } while (buffer_used < buffer->size);
- cs_etm_decoder__free(decoder);
-out_free:
- zfree(&t_params);
- cs_etm_decoder__reset(etmq->decoder);
} static int cs_etm__flush_events(struct perf_session *session, @@ -769,7 +745,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq,
CS_ETM_OPERATION_DECODE))
dump_trace ? CS_ETM_OPERATION_PRINT :
goto out_free;CS_ETM_OPERATION_DECODE))
etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); @@ -2422,7 +2399,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm, for (i = 0; i < etm->queues.nr_queues; ++i) list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) if (buf->reference == event->reference)
cs_etm__dump_event(etm, buf);
cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
} static int cs_etm__process_auxtrace_event(struct perf_session *session, @@ -2460,7 +2437,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) {
cs_etm__dump_event(etm, buffer);
} else if (dump_trace)cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); auxtrace_buffer__put_data(buffer); }
-- 2.28.0
On Tue, Jul 13, 2021 at 04:40:08PM +0100, James Clark wrote:
The TRBE (Trace Buffer Extension) feature allows a separate trace buffer for each trace source, therefore the trace wouldn't need to be formatted. The driver was introduced in commit 3fbf7f011f24 ("coresight: sink: Add TRBE driver").
The formatted/unformatted mode is encoded in one of the flags of the AUX record. The first AUX record encountered for each event is used to determine the mode, and this will persist for the remaining trace that is either decoded or dumped.
Signed-off-by: James Clark james.clark@arm.com
tools/perf/util/cs-etm.c | 42 +++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 760050ea936d..62769a84a53f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -461,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, } static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm)
struct cs_etm_auxtrace *etm,
int decoders_per_cpu)
{ int i; u32 etmidr; u64 architecture;
- for (i = 0; i < etm->num_cpu; i++) {
- for (i = 0; i < decoders_per_cpu; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) { @@ -488,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq,
enum cs_etm_decoder_operation mode)
enum cs_etm_decoder_operation mode,
bool formatted)
{ int ret = -EINVAL; @@ -498,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq;
- d_params->formatted = true;
- d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true;
@@ -720,11 +722,13 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
bool formatted)
{ struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq;
- int decoders_per_cpu = formatted ? etm->num_cpu : 1;
etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -735,21 +739,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
- t_params = zalloc(sizeof(*t_params) * decoders_per_cpu);
if (!t_params) goto out_free;
- if (cs_etm__init_trace_params(t_params, etm))
- if (cs_etm__init_trace_params(t_params, etm, decoders_per_cpu)) goto out_free;
/* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, dump_trace ? CS_ETM_OPERATION_PRINT :
CS_ETM_OPERATION_DECODE))
CS_ETM_OPERATION_DECODE,
goto out_free;formatted))
- etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
- etmq->decoder = cs_etm_decoder__new(decoders_per_cpu, &d_params,
t_params);
if (!etmq->decoder) goto out_free; @@ -777,14 +783,15 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue,
unsigned int queue_nr)
unsigned int queue_nr,
bool formatted)
{ struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) return 0;
- etmq = cs_etm__alloc_queue(etm);
- etmq = cs_etm__alloc_queue(etm, formatted);
if (!etmq) return -ENOMEM; @@ -2430,8 +2437,14 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err;
/*
* Knowing if the trace is formatted or not requires a lookup of
* the aux record so only works in non-piped mode where data is
* queued in cs_etm__queue_aux_records(). Always assume
* formatted in piped mode (true).
err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],*/
idx);
if (err) return err;idx, true);
@@ -2678,6 +2691,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; __u32 idx;
- bool formatted;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2745,7 +2759,9 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return err; idx = auxtrace_event->idx;
return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx);
formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
}idx, formatted);
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
I'm running out of time with this one. I'll continue tomorrow.
-- 2.28.0
On Tue, Jul 13, 2021 at 04:40:08PM +0100, James Clark wrote:
The TRBE (Trace Buffer Extension) feature allows a separate trace buffer for each trace source, therefore the trace wouldn't need to be formatted. The driver was introduced in commit 3fbf7f011f24 ("coresight: sink: Add TRBE driver").
The formatted/unformatted mode is encoded in one of the flags of the AUX record. The first AUX record encountered for each event is used to determine the mode, and this will persist for the remaining trace that is either decoded or dumped.
Signed-off-by: James Clark james.clark@arm.com
tools/perf/util/cs-etm.c | 42 +++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 760050ea936d..62769a84a53f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -461,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, } static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm)
struct cs_etm_auxtrace *etm,
int decoders_per_cpu)
{ int i; u32 etmidr; u64 architecture;
- for (i = 0; i < etm->num_cpu; i++) {
- for (i = 0; i < decoders_per_cpu; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) { @@ -488,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq,
enum cs_etm_decoder_operation mode)
enum cs_etm_decoder_operation mode,
bool formatted)
{ int ret = -EINVAL; @@ -498,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq;
- d_params->formatted = true;
- d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true;
@@ -720,11 +722,13 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
bool formatted)
{ struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq;
- int decoders_per_cpu = formatted ? etm->num_cpu : 1;
I really tripped on the name "decoders_per_cpu", to a point where I had to review the current code before looking at this patch. I find the "_per_cpu" part especially puzzling. In the end the variable determines the amount of decoders to instantiate for a specific queue...
Couldn't it be just "decoders"? Or maybe it just needs a little comment to disambiguate things?
Thanks, Mathieu
etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -735,21 +739,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
- t_params = zalloc(sizeof(*t_params) * decoders_per_cpu);
if (!t_params) goto out_free;
- if (cs_etm__init_trace_params(t_params, etm))
- if (cs_etm__init_trace_params(t_params, etm, decoders_per_cpu)) goto out_free;
/* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, dump_trace ? CS_ETM_OPERATION_PRINT :
CS_ETM_OPERATION_DECODE))
CS_ETM_OPERATION_DECODE,
goto out_free;formatted))
- etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
- etmq->decoder = cs_etm_decoder__new(decoders_per_cpu, &d_params,
t_params);
if (!etmq->decoder) goto out_free; @@ -777,14 +783,15 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue,
unsigned int queue_nr)
unsigned int queue_nr,
bool formatted)
{ struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) return 0;
- etmq = cs_etm__alloc_queue(etm);
- etmq = cs_etm__alloc_queue(etm, formatted);
if (!etmq) return -ENOMEM; @@ -2430,8 +2437,14 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err;
/*
* Knowing if the trace is formatted or not requires a lookup of
* the aux record so only works in non-piped mode where data is
* queued in cs_etm__queue_aux_records(). Always assume
* formatted in piped mode (true).
err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],*/
idx);
if (err) return err;idx, true);
@@ -2678,6 +2691,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; __u32 idx;
- bool formatted;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2745,7 +2759,9 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return err; idx = auxtrace_event->idx;
return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx);
formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
}idx, formatted);
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ -- 2.28.0
On 20/07/2021 16:45, Mathieu Poirier wrote:
On Tue, Jul 13, 2021 at 04:40:08PM +0100, James Clark wrote:
The TRBE (Trace Buffer Extension) feature allows a separate trace buffer for each trace source, therefore the trace wouldn't need to be formatted. The driver was introduced in commit 3fbf7f011f24 ("coresight: sink: Add TRBE driver").
The formatted/unformatted mode is encoded in one of the flags of the AUX record. The first AUX record encountered for each event is used to determine the mode, and this will persist for the remaining trace that is either decoded or dumped.
Signed-off-by: James Clark james.clark@arm.com
tools/perf/util/cs-etm.c | 42 +++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 760050ea936d..62769a84a53f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -461,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, } static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm)
struct cs_etm_auxtrace *etm,
int decoders_per_cpu)
{ int i; u32 etmidr; u64 architecture;
- for (i = 0; i < etm->num_cpu; i++) {
- for (i = 0; i < decoders_per_cpu; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) { @@ -488,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq,
enum cs_etm_decoder_operation mode)
enum cs_etm_decoder_operation mode,
bool formatted)
{ int ret = -EINVAL; @@ -498,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq;
- d_params->formatted = true;
- d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true;
@@ -720,11 +722,13 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
bool formatted)
{ struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq;
- int decoders_per_cpu = formatted ? etm->num_cpu : 1;
I really tripped on the name "decoders_per_cpu", to a point where I had to review the current code before looking at this patch. I find the "_per_cpu" part especially puzzling. In the end the variable determines the amount of decoders to instantiate for a specific queue...
Couldn't it be just "decoders"? Or maybe it just needs a little comment to disambiguate things?
Yeah I think just decoders will be good if I add a comment explaining why there is a difference with formatted vs unformatted. I will re-submit with the change.
Thanks for the reviews.
James
Thanks, Mathieu
etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -735,21 +739,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
- t_params = zalloc(sizeof(*t_params) * decoders_per_cpu);
if (!t_params) goto out_free;
- if (cs_etm__init_trace_params(t_params, etm))
- if (cs_etm__init_trace_params(t_params, etm, decoders_per_cpu)) goto out_free;
/* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, dump_trace ? CS_ETM_OPERATION_PRINT :
CS_ETM_OPERATION_DECODE))
CS_ETM_OPERATION_DECODE,
goto out_free;formatted))
- etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
- etmq->decoder = cs_etm_decoder__new(decoders_per_cpu, &d_params,
t_params);
if (!etmq->decoder) goto out_free; @@ -777,14 +783,15 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue,
unsigned int queue_nr)
unsigned int queue_nr,
bool formatted)
{ struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) return 0;
- etmq = cs_etm__alloc_queue(etm);
- etmq = cs_etm__alloc_queue(etm, formatted);
if (!etmq) return -ENOMEM; @@ -2430,8 +2437,14 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err;
/*
* Knowing if the trace is formatted or not requires a lookup of
* the aux record so only works in non-piped mode where data is
* queued in cs_etm__queue_aux_records(). Always assume
* formatted in piped mode (true).
err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],*/
idx);
if (err) return err;idx, true);
@@ -2678,6 +2691,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; __u32 idx;
- bool formatted;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2745,7 +2759,9 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return err; idx = auxtrace_event->idx;
return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx);
formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
}idx, formatted);
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ -- 2.28.0