Add support for CPU-wide trace scenarios by correlating range packets with timestamp packets. That way range packets received on different CPUs can be processed and synthesized in chronological order (min heap).
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- tools/perf/util/cs-etm.c | 161 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 153 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f75942acfdc3..91d50023a590 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -89,8 +89,12 @@ struct cs_etm_queue { };
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid, u64 time_); +static int cs_etm__get_trace(struct cs_etm_queue *etmq); +static int cs_etm__get_data_block(struct cs_etm_queue *etmq); +static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
static void cs_etm__packet_dump(const char *pkt_string) { @@ -253,15 +257,16 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL;
- if (!etm->timeless_decoding) - return -EINVAL; - ret = cs_etm__update_queues(etm);
if (ret < 0) return ret;
- return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); + if (etm->timeless_decoding) + return cs_etm__process_timeless_queues(etm, -1, + MAX_TIMESTAMP - 1); + + return cs_etm__process_queues(etm); }
static void cs_etm__free_queue(void *priv) @@ -465,6 +470,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, unsigned int queue_nr) { int ret = 0, cpu = queue->cpu; + u64 timestamp; struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq) @@ -486,6 +492,66 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, etmq->offset = 0; etmq->period_instructions = 0;
+ if (etm->timeless_decoding) + goto out; + + /* + * We are under a CPU-wide trace scenario. As such we need to know + * when the code that generated the traces started to execute so that + * it can be correlated with execution on other CPUs. So we get a + * handle on the beginning of traces and decode until we find a + * timestamp. Substracted from that timestamp is the amount of + * instruction that were executed in the range packet(s) that have + * been reported, yielding a fairly good estimate of then the trace + * started. + */ + + while (1) { + /* + * Fetch an aux_buffer from this etmq. Bail if no more + * blocks or an error has been encountered. + */ + ret = cs_etm__get_data_block(etmq); + if (ret <= 0) + goto out; + + /* + * Run decoder on the trace block. The decoder will stop when + * encountering a timestamp of end of trace for that block. + */ + ret = cs_etm__decode_data_block(etmq); + if (ret) + goto out; + + /* The decoder code does all the timestamp calculation for us */ + timestamp = cs_etm_decoder__get_timestamp(etmq->decoder); + + /* We found a timestamp, no need to continue. */ + if (timestamp) + break; + + /* + * We didn't find a timestamp so empty the decoder packet queue + * before fetching another data block. Packets that were + * decoded are useless since no timestamp have been associated + * with them. + */ + cs_etm_decoder__clear_buffer(etmq->decoder); + } + + + /* + * Add to the min heap the time at which execution of traces in the + * first range started. Once the same has been done for each etmq, + * redenring and decoding can start in chronological order. + * + * Note that decoded packets are still in the decoder's packet queue + * and will be processed in cs_etm__process_queues(). + */ + ret = auxtrace_heap__add(&etm->heap, queue_nr, timestamp); + if (ret) + goto out; + out: return ret; } @@ -1246,6 +1312,83 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, return 0; }
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) +{ + int ret = 0; + unsigned int queue_nr; + u64 timestamp; + struct auxtrace_queue *queue; + struct cs_etm_queue *etmq; + + while (1) { + if (!etm->heap.heap_cnt) + goto out; + + /* Take the entry at the top of the min heap */ + queue_nr = etm->heap.heap_array[0].queue_nr; + queue = &etm->queues.queue_array[queue_nr]; + etmq = queue->priv; + + auxtrace_heap__pop(&etm->heap); + + /* + * Packets associated with this timestamp are already in + * the decoder queue, so process them. + */ + ret = cs_etm__process_decoder_queue(etmq); + if (ret < 0) + goto out; + + /* + * Packets for this timestamp have been process, time to + * move on to the next timestamp, fetching a new aux_buffer if + * need be. + */ + ret = cs_etm__get_data_block(etmq); + if (ret < 0) + goto out; + + /* + * No more blocks to process in this queue, simply move on + * with the other queues. + */ + if (!ret) + continue; + + ret = cs_etm__decode_data_block(etmq); + if (ret) + goto out; + + timestamp = cs_etm_decoder__get_timestamp(etmq->decoder); + + if (!timestamp) { + /* + * Looks like there is no more traces to process... + * Empty the queue. + */ + ret = cs_etm__process_decoder_queue(etmq); + + /* + * Generate an instruction sample with the remaining + * branchstack entries. + */ + cs_etm__flush(etmq); + continue; + } + + /* + * Add to the min heap the timestamp for packets that have + * just been decoded. They will be processed and synthesized + * during the next call to cs_etm__process_decoder_queue() for + * this queue. + */ + ret = auxtrace_heap__add(&etm->heap, queue_nr, timestamp); + } + +out: + return ret; +} + static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, union perf_event *event) { @@ -1328,9 +1471,6 @@ static int cs_etm__process_event(struct perf_session *session, return -EINVAL; }
- if (!etm->timeless_decoding) - return -EINVAL; - if (sample->time && (sample->time != (u64) -1)) timestamp = sample->time; else @@ -1342,11 +1482,16 @@ static int cs_etm__process_event(struct perf_session *session, return err; }
- if (event->header.type == PERF_RECORD_EXIT) + if (etm->timeless_decoding && + event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, event->fork.tid, sample->time);
+ if (!etm->timeless_decoding && + event->header.type == PERF_RECORD_AUX) + return cs_etm__process_queues(etm); + if (event->header.type == PERF_RECORD_ITRACE_START) return cs_etm__process_itrace_start(etm, event); else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)