On Mon, Nov 13, 2017 at 03:11:42PM +0000, Robert Walker wrote:
This patch fixes the branch instruction samples produced from CoreSight ETM trace:
- The branch stack should only include taken branches and exclude non-taken branches
- Use the number of instructions in each range reported by the decoder when determining the period between samples
- Correct the address reported for each sample
Signed-off-by: Robert Walker robert.walker@arm.com
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 2 +- tools/perf/util/cs-etm.c | 87 +++++++++++++------------ 3 files changed, 47 insertions(+), 46 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8abf10d..bcbda7d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -166,13 +166,13 @@ static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, switch (elem->last_i_type) { case OCSD_INSTR_BR: case OCSD_INSTR_BR_INDIRECT:
decoder->packet_buffer[et].last_instruction_is_branch = true;
break; case OCSD_INSTR_OTHER: case OCSD_INSTR_ISB: case OCSD_INSTR_DSB_DMB: default:decoder->packet_buffer[et].last_instruction_is_taken_branch = elem->last_instr_exec;
decoder->packet_buffer[et].last_instruction_is_branch = false;
break; } decoder->packet_buffer[et].exc = false;decoder->packet_buffer[et].last_instruction_is_taken_branch = false;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 4fd5a9b..c889078 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -49,7 +49,7 @@ struct cs_etm_packet { enum cs_etm_sample_type sample_type; uint64_t start_addr; uint64_t end_addr;
- bool last_instruction_is_branch;
- bool last_instruction_is_taken_branch;
Wouldn't "last_instruction_is_branch_taken" be better?
bool exc; bool exc_ret; int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 24783b7..e9e00a8 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -89,6 +89,7 @@ struct cs_etm_queue { u64 offset; bool eot; bool kernel_mapped;
u64 period_insns;
Please add a comment to describe what "insns" means.
/* * Stack of branches in reverse chronological order that will be copied * to a last branch event sample. @@ -379,6 +380,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; struct cs_etm_queue *etmq;
size_t szp = sizeof(struct cs_etm_packet); size_t i;
etmq = zalloc(sizeof(struct cs_etm_queue)); @@ -396,9 +398,11 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->chain = NULL; }
- etmq->packet = zalloc(szp);
- if (!etmq->packet)
if (etm->synth_opts.last_branch) { size_t sz = sizeof(struct branch_stack);goto out_free;
size_t szp = sizeof(struct cs_etm_packet);
sz += etm->synth_opts.last_branch_sz * sizeof(struct branch_entry); @@ -411,9 +415,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->prev_packet = zalloc(szp); if (!etmq->prev_packet) goto out_free;
etmq->packet = zalloc(szp);
if (!etmq->packet)
}goto out_free;
etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -457,6 +458,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->offset = 0; etmq->eot = false;
etmq->period_insns = 0;
return etmq; @@ -741,29 +743,25 @@ static struct cs_etm_cache_entry *cs_etm__cache_lookup(struct dso *dso, } #endif -static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq) +static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, u64 addr, u64 period, u32 cpu) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; union perf_event *event = etmq->event_buf; struct perf_sample sample = {.ip = 0,};
- struct cs_etm_packet *packet = etmq->packet;
uint64_t start_addr = packet->start_addr;
uint64_t end_addr = packet->end_addr;
event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = start_addr;
sample.ip = addr; sample.pid = etmq->pid; sample.tid = etmq->tid;
sample.addr = end_addr; sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id;
sample.period = (end_addr - start_addr) >> 2;
sample.cpu = packet->cpu;
sample.period = period;
sample.cpumode = event->header.misc;sample.cpu = cpu; sample.flags = 0; // etmq->flags; sample.insn_len = 1; // etmq->insn_len;
@@ -964,6 +962,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, int *cpu) struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int err = cs_etm_decoder__get_packet(etmq->decoder, etmq->packet);
- u64 insns;
/* * If there is no sample, it returns err = -1, no real error. @@ -971,29 +970,38 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, int *cpu) if (err) return err;
- if (etm->synth_opts.last_branch) {
*cpu = etmq->packet->cpu;
- /* A64 instructions are always 4 bytes *
What about other processors?
- insns = (etmq->packet->end_addr - etmq->packet->start_addr) >> 2;
Please add comments when using hard coded nubers.
- etmq->period_insns += insns;
/*
* FIXME: as the trace sampling does not work for now, (for
* example perf inject --itrace=i100us will not generate events
* every 100 micro-seconds), generate a last branch event after
* having decoded last_branch_sz branch samples. This condition
* should be rewritten as "if reached sampling period".
*/
if (etmq->last_branch_rb->nr ==
etm->synth_opts.last_branch_sz) {
err = cs_etm__synth_instruction_sample(etmq);
if (err)
return err;
}
/*
* Record a branch when the last instruction in PREV_PACKET is a
* branch.
*/
if (etmq->prev_packet->last_instruction_is_branch)
cs_etm__update_last_branch_rb(etmq);
- *cpu = etmq->packet->cpu;
- /*
* Record a branch when the last instruction in PREV_PACKET is a
* branch.
*/
- if (etm->synth_opts.last_branch &&
etmq->prev_packet->last_instruction_is_taken_branch)
cs_etm__update_last_branch_rb(etmq);
- if (etmq->period_insns >= etm->instructions_sample_period) {
/* Emit instruction sample periodically */
/* TODO: allow period to be defined in cycles and clock time */
u64 insns_over = etmq->period_insns - etm->instructions_sample_period;
u64 addr = etmq->packet->end_addr - 4 - (insns_over<<2);
1) Please add comments when using hard coded nubers.
2) s/insns_over<<2/insns_over << 2
err = cs_etm__synth_instruction_sample(etmq,
addr,
etm->instructions_sample_period,
etmq->packet->cpu);
if (err)
return err;
etmq->period_insns = insns_over;
- }
- if (etm->synth_opts.last_branch) { /*
- Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- the next incoming packet.
@@ -1001,12 +1009,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, int *cpu) tmp = etmq->packet; etmq->packet = etmq->prev_packet; etmq->prev_packet = tmp;
- } else if (etmq->packet->sample_type & CS_ETM_RANGE) {
err = cs_etm__synth_instruction_sample(etmq);
if (err)
}return err;
return 0;
- return 0;
}
Since cs_etm__sample() is used for trace decoding should we also apply this patch to the master branch?
I wanted to test your implemenation but (8bbf7ee5d6cb perf inject: correct recording of branch address and destination) breaks normal CS trace decoding. Would you mind having a look?
Be mindful that I rebased the autoFDO tree to v4.14, so SHA's will have changed.
Thanks, Mathieu
static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) @@ -1053,11 +1058,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) struct branch_stack *bs = etmq->last_branch_rb; struct branch_entry *be = &bs->entries[etmq->last_branch_pos];
etmq->packet->cpu = cpu;
etmq->packet->start_addr = be->to;
etmq->packet->end_addr = be->to + 4;
err = cs_etm__synth_instruction_sample(etmq);
if (err) return err; }err = cs_etm__synth_instruction_sample(etmq, be->to, 1, cpu);
-- 1.9.1
CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight