To provide accurate instruction sampling when T32 instructions are used, it is necessary to count the number of instructions in each range executed by examining each instruction. With OpenCSD 0.9.0 and later this is performed by the OpenCSD library while decoding the trace. When using older versions of OpenCSD, this patch counts the instructions in each range emitted by the decoder library (which is slower than doing it in the decoder library).
Signed-off-by: Robert Walker robert.walker@arm.com --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 64 +++++++++++++++++++++---- 1 file changed, 56 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 260f3b3a..446033e 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -314,29 +314,77 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
#ifdef HAVE_CSTRACE_INSTR_INFO
-static int cs_etm_decoder__instr_count(const ocsd_generic_trace_elem *elem) +static int cs_etm_decoder__instr_count(struct cs_etm_decoder *decoder __maybe_unused, + const ocsd_generic_trace_elem *elem) { return elem->num_instr_range; }
-static int cs_etm_decoder__last_instr_size(const ocsd_generic_trace_elem *elem) +static int cs_etm_decoder__last_instr_size(struct cs_etm_decoder *decoder __maybe_unused, + const ocsd_generic_trace_elem *elem) { return elem->last_instr_sz; }
#else
-static int cs_etm_decoder__instr_count(const ocsd_generic_trace_elem *elem) +static inline +int cs_etm_decoder__t32_instr_size(struct cs_etm_decoder *decoder, + u64 addr) +{ + u8 instrBytes[2]; + + decoder->mem_access(decoder->data, + addr, + ARRAY_SIZE(instrBytes), + instrBytes); + + /* T32 instruction size is indicated by bits[15:11] of the first + * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 + * denote a 32-bit instruction. + */ + return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; +} + +static int cs_etm_decoder__instr_count(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem) { - /* Assume a 4-byte instruction size - will be wrong for T32 */ WARN_ONCE(elem->isa == ocsd_isa_thumb2, - "Instruction counts not available for T32. Please upgrade to OpenCSD >= 0.9.0\n"); + "Using slow method for T32 instruction counts. Please upgrade to OpenCSD >= 0.9.0\n"); + + if (elem->isa == ocsd_isa_thumb2) { + /* Count each instruction for T32 */ + u64 addr = elem->st_addr; + u64 count = 0; + + while (addr < elem->en_addr) { + addr += cs_etm_decoder__t32_instr_size(decoder, addr); + count++; + } + return count; + } + return (elem->en_addr - elem->st_addr) / 4; }
static -int cs_etm_decoder__last_instr_size(const ocsd_generic_trace_elem *elem __maybe_unused) +int cs_etm_decoder__last_instr_size(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem) { + if (elem->isa == ocsd_isa_thumb2) { + /* + * T32 instructions can be either 2 or 4 bytes + */ + if (elem->en_addr - elem->st_addr == 2) + /* Only one 2 byte instruction in packet*/ + return 2; + else if (cs_etm_decoder__t32_instr_size(decoder, elem->en_addr - 4) == 4) + return 4; + else + return 2; + } + + /* Otherwise a 4 byte instruction size (A32/A64) */ return 4; }
@@ -377,7 +425,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
packet->start_addr = elem->st_addr; packet->end_addr = elem->en_addr; - packet->instr_count = cs_etm_decoder__instr_count(elem); + packet->instr_count = cs_etm_decoder__instr_count(decoder, elem);
switch (elem->last_i_type) { case OCSD_INSTR_BR: @@ -392,7 +440,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; }
- packet->last_instr_size = cs_etm_decoder__last_instr_size(elem); + packet->last_instr_size = cs_etm_decoder__last_instr_size(decoder, elem);
return ret; }