This patch fixes the branch instruction samples produced from CoreSight ETM trace: - The branch stack should only include taken branches and exclude non-taken branches - Use the number of instructions in each range reported by the decoder when determining the period between samples - Correct the address reported for each sample
Signed-off-by: Robert Walker robert.walker@arm.com --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 2 +- tools/perf/util/cs-etm.c | 87 +++++++++++++------------ 3 files changed, 47 insertions(+), 46 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8abf10d..bcbda7d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -166,13 +166,13 @@ static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, switch (elem->last_i_type) { case OCSD_INSTR_BR: case OCSD_INSTR_BR_INDIRECT: - decoder->packet_buffer[et].last_instruction_is_branch = true; + decoder->packet_buffer[et].last_instruction_is_taken_branch = elem->last_instr_exec; break; case OCSD_INSTR_OTHER: case OCSD_INSTR_ISB: case OCSD_INSTR_DSB_DMB: default: - decoder->packet_buffer[et].last_instruction_is_branch = false; + decoder->packet_buffer[et].last_instruction_is_taken_branch = false; break; } decoder->packet_buffer[et].exc = false; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 4fd5a9b..c889078 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -49,7 +49,7 @@ struct cs_etm_packet { enum cs_etm_sample_type sample_type; uint64_t start_addr; uint64_t end_addr; - bool last_instruction_is_branch; + bool last_instruction_is_taken_branch; bool exc; bool exc_ret; int cpu; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 24783b7..e9e00a8 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -89,6 +89,7 @@ struct cs_etm_queue { u64 offset; bool eot; bool kernel_mapped; + u64 period_insns; /* * Stack of branches in reverse chronological order that will be copied * to a last branch event sample. @@ -379,6 +380,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; struct cs_etm_queue *etmq; + size_t szp = sizeof(struct cs_etm_packet); size_t i;
etmq = zalloc(sizeof(struct cs_etm_queue)); @@ -396,9 +398,11 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->chain = NULL; }
+ etmq->packet = zalloc(szp); + if (!etmq->packet) + goto out_free; if (etm->synth_opts.last_branch) { size_t sz = sizeof(struct branch_stack); - size_t szp = sizeof(struct cs_etm_packet);
sz += etm->synth_opts.last_branch_sz * sizeof(struct branch_entry); @@ -411,9 +415,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->prev_packet = zalloc(szp); if (!etmq->prev_packet) goto out_free; - etmq->packet = zalloc(szp); - if (!etmq->packet) - goto out_free; }
etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -457,6 +458,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
etmq->offset = 0; etmq->eot = false; + etmq->period_insns = 0;
return etmq;
@@ -741,29 +743,25 @@ static struct cs_etm_cache_entry *cs_etm__cache_lookup(struct dso *dso, } #endif
-static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq) +static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, u64 addr, u64 period, u32 cpu) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; union perf_event *event = etmq->event_buf; struct perf_sample sample = {.ip = 0,}; - struct cs_etm_packet *packet = etmq->packet; - uint64_t start_addr = packet->start_addr; - uint64_t end_addr = packet->end_addr;
event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header);
- sample.ip = start_addr; + sample.ip = addr; sample.pid = etmq->pid; sample.tid = etmq->tid; - sample.addr = end_addr; sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; - sample.period = (end_addr - start_addr) >> 2; - sample.cpu = packet->cpu; + sample.period = period; + sample.cpu = cpu; sample.flags = 0; // etmq->flags; sample.insn_len = 1; // etmq->insn_len; sample.cpumode = event->header.misc; @@ -964,6 +962,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, int *cpu) struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int err = cs_etm_decoder__get_packet(etmq->decoder, etmq->packet); + u64 insns;
/* * If there is no sample, it returns err = -1, no real error. @@ -971,29 +970,38 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, int *cpu) if (err) return err;
- if (etm->synth_opts.last_branch) { - *cpu = etmq->packet->cpu; + /* A64 instructions are always 4 bytes */ + insns = (etmq->packet->end_addr - etmq->packet->start_addr) >> 2; + etmq->period_insns += insns;
- /* - * FIXME: as the trace sampling does not work for now, (for - * example perf inject --itrace=i100us will not generate events - * every 100 micro-seconds), generate a last branch event after - * having decoded last_branch_sz branch samples. This condition - * should be rewritten as "if reached sampling period". - */ - if (etmq->last_branch_rb->nr == - etm->synth_opts.last_branch_sz) { - err = cs_etm__synth_instruction_sample(etmq); - if (err) - return err; - } - /* - * Record a branch when the last instruction in PREV_PACKET is a - * branch. - */ - if (etmq->prev_packet->last_instruction_is_branch) - cs_etm__update_last_branch_rb(etmq); + *cpu = etmq->packet->cpu; + + /* + * Record a branch when the last instruction in PREV_PACKET is a + * branch. + */ + if (etm->synth_opts.last_branch && + etmq->prev_packet->last_instruction_is_taken_branch) + cs_etm__update_last_branch_rb(etmq); + + if (etmq->period_insns >= etm->instructions_sample_period) { + /* Emit instruction sample periodically */ + /* TODO: allow period to be defined in cycles and clock time */ + + u64 insns_over = etmq->period_insns - etm->instructions_sample_period; + u64 addr = etmq->packet->end_addr - 4 - (insns_over<<2);
+ err = cs_etm__synth_instruction_sample(etmq, + addr, + etm->instructions_sample_period, + etmq->packet->cpu); + if (err) + return err; + + etmq->period_insns = insns_over; + } + + if (etm->synth_opts.last_branch) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -1001,12 +1009,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, int *cpu) tmp = etmq->packet; etmq->packet = etmq->prev_packet; etmq->prev_packet = tmp; - } else if (etmq->packet->sample_type & CS_ETM_RANGE) { - err = cs_etm__synth_instruction_sample(etmq); - if (err) - return err; } - return 0; + + return 0; }
static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) @@ -1053,11 +1058,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) struct branch_stack *bs = etmq->last_branch_rb; struct branch_entry *be = &bs->entries[etmq->last_branch_pos];
- etmq->packet->cpu = cpu; - etmq->packet->start_addr = be->to; - etmq->packet->end_addr = be->to + 4; - - err = cs_etm__synth_instruction_sample(etmq); + err = cs_etm__synth_instruction_sample(etmq, be->to, 1, cpu); if (err) return err; }