The following patches fix opening perf.data files that have timestamps (ordered data), aren't recorded with --per-thread, and that have discontinuous data in a single aux trace buffer.
I have some open questions: * Can cs_etm__update_queues() be removed from cs_etm__flush_events()? * Why does the second commit start making some files process correctly? * Is it ok to wait for the flush to start processing? Previously processing happened when the first aux record was delivered to cs_etm__process_event(). * Do the aux records need to be saved into a new buffer or can they be pulled from elsewhere?
I also have some further changes to make to make per-thread mode work where the cpu field of the sample is set to -1. And when there are no timestamps cs_etm__process_timeless_queues() is used, which is a completely different code path.
Thanks James
James Clark (5): perf cs-etm: Split up etm queue setup function perf cs-etm: Only search timestamp in current sample's queue. perf cs-etm: Save aux records in each etm queue perf cs-etm: don't process queues until cs_etm__flush_events perf cs-etm: split decode by aux records.
tools/perf/util/cs-etm.c | 200 +++++++++++++++++++++++---------------- 1 file changed, 121 insertions(+), 79 deletions(-)
Refactor the function into separate allocation and timestamp search parts. Later the timestamp search will be done multiple times.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 60 +++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 29 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a2a369e2fbb6..440001cdd3b8 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -765,33 +765,12 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) return NULL; }
-static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, - struct auxtrace_queue *queue, - unsigned int queue_nr) +static int cs_etm__seach_first_timestamp(struct cs_etm_queue *etmq) { int ret = 0; + u64 timestamp; unsigned int cs_queue_nr; u8 trace_chan_id; - u64 timestamp; - struct cs_etm_queue *etmq = queue->priv; - - if (list_empty(&queue->head) || etmq) - goto out; - - etmq = cs_etm__alloc_queue(etm); - - if (!etmq) { - ret = -ENOMEM; - goto out; - } - - queue->priv = etmq; - etmq->etm = etm; - etmq->queue_nr = queue_nr; - etmq->offset = 0; - - if (etm->timeless_decoding) - goto out;
/* * We are under a CPU-wide trace scenario. As such we need to know @@ -808,7 +787,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, */ ret = cs_etm__get_data_block(etmq); if (ret <= 0) - goto out; + return ret;
/* * Run decoder on the trace block. The decoder will stop when @@ -817,7 +796,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, */ ret = cs_etm__decode_data_block(etmq); if (ret) - goto out; + return ret;
/* * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all @@ -849,10 +828,33 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * Note that packets decoded above are still in the traceID's packet * queue and will be processed in cs_etm__process_queues(). */ - cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); -out: - return ret; + cs_queue_nr = TO_CS_QUEUE_NR(etmq->queue_nr, trace_chan_id); + return auxtrace_heap__add(&etmq->etm->heap, cs_queue_nr, timestamp); +} + +static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct cs_etm_queue *etmq = queue->priv; + + if (list_empty(&queue->head) || etmq) + return 0; + + etmq = cs_etm__alloc_queue(etm); + + if (!etmq) + return -ENOMEM; + + queue->priv = etmq; + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->offset = 0; + + if (etm->timeless_decoding) + return 0; + else + return cs_etm__seach_first_timestamp(etmq); }
static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
Change initial timestamp search to only operate on the queue related to the current event. In a later change the bounds of the aux record will also be used to reset the decoder and the record is only relevant to a single queue.
This doesn't work --per-thread mode where cpu == -1 so a further change will be required.
Also this change makes some files that had coresight data but didn't syntesise any events start working and generating events. I'm not sure of the reason for that. I'd expect this change to only affect the ordering of events.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 440001cdd3b8..9ebe43d60d1e 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -97,7 +97,7 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list;
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -524,7 +524,7 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - int ret; + //int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -534,10 +534,12 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL;
- ret = cs_etm__update_queues(etm); + // TODO: does this need to be here? It is already called in cs_etm__process_event + // when the aux records are available. + //ret = cs_etm__update_queues(etm);
- if (ret < 0) - return ret; + //if (ret < 0) + // return ret;
if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1); @@ -851,10 +853,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, etmq->queue_nr = queue_nr; etmq->offset = 0;
- if (etm->timeless_decoding) - return 0; - else - return cs_etm__seach_first_timestamp(etmq); + return 0; }
static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) @@ -874,14 +873,20 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) return 0; }
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu) { + int ret; if (etm->queues.new_data) { etm->queues.new_data = false; - return cs_etm__setup_queues(etm); + ret = cs_etm__setup_queues(etm); + if (ret) + return ret; }
- return 0; + if (!etm->timeless_decoding) + return cs_etm__seach_first_timestamp(etm->queues.queue_array[cpu].priv); + else + return 0; }
static inline @@ -2358,8 +2363,9 @@ static int cs_etm__process_event(struct perf_session *session, else timestamp = 0;
- if (timestamp || etm->timeless_decoding) { - err = cs_etm__update_queues(etm); + if ((timestamp || etm->timeless_decoding) + && event->header.type == PERF_RECORD_AUX) { + err = cs_etm__update_queues(etm, sample->cpu); if (err) return err; }
The aux records will be used set the bounds of decoding in a later commit. In the future we may also want to use the flags of each record to control decoding.
Do these need to be saved in their entirety, or can pointers to each record safely be saved instead for later access?
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 9ebe43d60d1e..efe418a7c82e 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -92,12 +92,16 @@ struct cs_etm_queue { /* Conversion between traceID and index in traceid_queues array */ struct intlist *traceid_queues_list; struct cs_etm_traceid_queue **traceid_queues; + int aux_record_list_len; + int aux_record_list_idx; + struct perf_record_aux *aux_record_list; };
/* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list;
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu); +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu, + struct perf_record_aux *aux_record); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -593,6 +597,7 @@ static void cs_etm__free_queue(void *priv)
cs_etm_decoder__free(etmq->decoder); cs_etm__free_traceid_queues(etmq); + free(etmq->aux_record_list); free(etmq); }
@@ -767,13 +772,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) return NULL; }
-static int cs_etm__seach_first_timestamp(struct cs_etm_queue *etmq) +static int cs_etm__seach_first_timestamp(struct cs_etm_queue *etmq, + struct perf_record_aux *aux_record) { int ret = 0; u64 timestamp; unsigned int cs_queue_nr; u8 trace_chan_id;
+ etmq->aux_record_list = reallocarray(etmq->aux_record_list, + etmq->aux_record_list_len+1, + sizeof(*etmq->aux_record_list)); + if (!etmq->aux_record_list) + return -ENOMEM; + + etmq->aux_record_list[etmq->aux_record_list_len++] = *aux_record; + /* * We are under a CPU-wide trace scenario. As such we need to know * when the code that generated the traces started to execute so that @@ -873,7 +887,7 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) return 0; }
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu) +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu, struct perf_record_aux *aux) { int ret; if (etm->queues.new_data) { @@ -884,7 +898,7 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu) }
if (!etm->timeless_decoding) - return cs_etm__seach_first_timestamp(etm->queues.queue_array[cpu].priv); + return cs_etm__seach_first_timestamp(etm->queues.queue_array[cpu].priv, aux); else return 0; } @@ -2365,7 +2379,7 @@ static int cs_etm__process_event(struct perf_session *session,
if ((timestamp || etm->timeless_decoding) && event->header.type == PERF_RECORD_AUX) { - err = cs_etm__update_queues(etm, sample->cpu); + err = cs_etm__update_queues(etm, sample->cpu, &event->aux); if (err) return err; }
To make sure processing happens in the correct order, queue processing shouldn't start until every aux queue has had its first timestamp found.
Now that we're only searching for timestamps within each aux record, we need to wait until all aux records are delivered before starting the processing.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index efe418a7c82e..0aaa1f6d2822 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2394,10 +2394,6 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event);
- if (!etm->timeless_decoding && - event->header.type == PERF_RECORD_AUX) - return cs_etm__process_queues(etm); - return 0; }
The trace data between aux records is not continuous, so the decoder must be reset between each record to ensure that parsing happens correctly and without any early exits.
Signed-off-by: James Clark james.clark@arm.com --- tools/perf/util/cs-etm.c | 108 ++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 42 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0aaa1f6d2822..b0f464a50e2f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -95,6 +95,7 @@ struct cs_etm_queue { int aux_record_list_len; int aux_record_list_idx; struct perf_record_aux *aux_record_list; + bool timestamp_found; };
/* RB tree for quick conversion between traceID and metadata pointers */ @@ -788,6 +789,9 @@ static int cs_etm__seach_first_timestamp(struct cs_etm_queue *etmq,
etmq->aux_record_list[etmq->aux_record_list_len++] = *aux_record;
+ if (etmq->timestamp_found) + return 0; + /* * We are under a CPU-wide trace scenario. As such we need to know * when the code that generated the traces started to execute so that @@ -796,56 +800,60 @@ static int cs_etm__seach_first_timestamp(struct cs_etm_queue *etmq, * timestamp. The timestamp is then added to the auxtrace min heap * in order to know what nibble (of all the etmqs) to decode first. */ - while (1) { - /* - * Fetch an aux_buffer from this etmq. Bail if no more - * blocks or an error has been encountered. - */ - ret = cs_etm__get_data_block(etmq); - if (ret <= 0) - return ret; - - /* - * Run decoder on the trace block. The decoder will stop when - * encountering a timestamp, a full packet queue or the end of - * trace for that block. - */ - ret = cs_etm__decode_data_block(etmq); + /* + * Fetch an aux_buffer from this etmq. Bail if no more + * blocks or an error has been encountered. + */ + if (etmq->aux_record_list[etmq->aux_record_list_idx].aux_size <= 0) { + etmq->aux_record_list_idx++; + ret = cs_etm_decoder__reset(etmq->decoder); if (ret) return ret; + } + ret = cs_etm__get_data_block(etmq); + if (ret <= 0) + return ret;
- /* - * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all - * the timestamp calculation for us. - */ - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + /* + * Run decoder on the trace block. The decoder will stop when + * encountering a timestamp, a full packet queue or the end of + * trace for that block. + */ + ret = cs_etm__decode_data_block(etmq); + if (ret) + return ret;
- /* We found a timestamp, no need to continue. */ - if (timestamp) - break; + /* + * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all + * the timestamp calculation for us. + */ + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+ /* We found a timestamp, no need to continue. */ + if (timestamp) { /* - * We didn't find a timestamp so empty all the traceid packet - * queues before looking for another timestamp packet, either - * in the current data block or a new one. Packets that were - * just decoded are useless since no timestamp has been - * associated with them. As such simply discard them. + * We have a timestamp. Add it to the min heap to reflect when + * instructions conveyed by the range packets of this traceID queue + * started to execute. Once the same has been done for all the traceID + * queues of each etmq, redenring and decoding can start in + * chronological order. + * + * Note that packets decoded above are still in the traceID's packet + * queue and will be processed in cs_etm__process_queues(). */ - cs_etm__clear_all_packet_queues(etmq); + etmq->timestamp_found = true; + cs_queue_nr = TO_CS_QUEUE_NR(etmq->queue_nr, trace_chan_id); + return auxtrace_heap__add(&etmq->etm->heap, cs_queue_nr, timestamp); } - /* - * We have a timestamp. Add it to the min heap to reflect when - * instructions conveyed by the range packets of this traceID queue - * started to execute. Once the same has been done for all the traceID - * queues of each etmq, redenring and decoding can start in - * chronological order. - * - * Note that packets decoded above are still in the traceID's packet - * queue and will be processed in cs_etm__process_queues(). + * We didn't find a timestamp so empty all the traceid packet + * queues before looking for another timestamp packet, either + * in the current data block or a new one. Packets that were + * just decoded are useless since no timestamp has been + * associated with them. As such simply discard them. */ - cs_queue_nr = TO_CS_QUEUE_NR(etmq->queue_nr, trace_chan_id); - return auxtrace_heap__add(&etmq->etm->heap, cs_queue_nr, timestamp); + cs_etm__clear_all_packet_queues(etmq); + return 0; }
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, @@ -2012,6 +2020,15 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) { int ret = 0; size_t processed = 0; + u64 decode_size; + + if (etmq->aux_record_list_idx >= etmq->aux_record_list_len || + etmq->aux_record_list[etmq->aux_record_list_idx].aux_size > etmq->buf_len) { + // Assume that aux records always equally divide up the aux buffer + // so aux_size should never exceed the remaining buffer to decode. + ret = -1; + goto out; + }
/* * Packets are decoded and added to the decoder's packet queue @@ -2020,10 +2037,11 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) * operations that stop processing are a timestamp packet or a full * decoder buffer queue. */ + decode_size = etmq->aux_record_list[etmq->aux_record_list_idx].aux_size; ret = cs_etm_decoder__process_data_block(etmq->decoder, etmq->offset, &etmq->buf[etmq->buf_used], - etmq->buf_len, + decode_size, &processed); if (ret) goto out; @@ -2031,7 +2049,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) etmq->offset += processed; etmq->buf_used += processed; etmq->buf_len -= processed; - + etmq->aux_record_list[etmq->aux_record_list_idx].aux_size -= processed; out: return ret; } @@ -2247,6 +2265,12 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) * if need be. */ refetch: + if (etmq->aux_record_list[etmq->aux_record_list_idx].aux_size <= 0) { + etmq->aux_record_list_idx++; + ret = cs_etm_decoder__reset(etmq->decoder); + if (ret) + return ret; + } ret = cs_etm__get_data_block(etmq); if (ret < 0) goto out;