On Mon, 1 Oct 2018 at 02:19, Robert Walker robert.walker@arm.com wrote:
On 29/09/18 09:35, leo.yan@linaro.org wrote:
On Thu, Sep 27, 2018 at 03:03:03PM +0100, Mike Leach wrote:
Hi Rob, On Thu, 27 Sep 2018 at 11:24, Robert Walker robert.walker@arm.com wrote:
This patch adds support for generating instruction samples from trace of AArch32 programs using the A32 and T32 instruction sets.
T32 has variable 2 or 4 byte instruction size, so the conversion between addresses and instruction counts requires extra information from the trace decoder, requiring version 0.9.0 of OpenCSD. A check for the new version member has been added to the feature check for OpenCSD. Where only the older version of OpenCSD is available, the previous behaviour of assuming 4 byte instruction size is used.
Signed-off-by: Robert Walker robert.walker@arm.com
tools/build/Makefile.feature | 3 +- tools/build/feature/Makefile | 4 ++ tools/build/feature/test-libopencsd-numinstr.c | 15 ++++++ tools/perf/Makefile.config | 3 ++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 58 ++++++++++++++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++ tools/perf/util/cs-etm.c | 71 +++++++++++-------------- 7 files changed, 123 insertions(+), 41 deletions(-) create mode 100644 tools/build/feature/test-libopencsd-numinstr.c
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index f216b2f..5e8d108 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -68,7 +68,8 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt \ setns \
libopencsd
libopencsd \
libopencsd-numinstr
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0516259..2cb7ef6 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -56,6 +56,7 @@ FILES= \ test-sched_getcpu.bin \ test-setns.bin \ test-libopencsd.bin \
test-libopencsd-numinstr.bin \ test-clang.bin \ test-llvm.bin \ test-llvm-version.bin
@@ -114,6 +115,9 @@ $(OUTPUT)test-libopencsd.bin: $(BUILD) # -lopencsd_c_api -lopencsd provided by # $(FEATURE_CHECK_LDFLAGS-libopencsd)
+$(OUTPUT)test-libopencsd-numinstr.bin:
$(BUILD) -lopencsd_c_api -lopencsd
- DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
Per my comments previously
- if no opencsd lib is found, then we don't build the feature in - as
before - report lib not found etc.
- if opencsd, but wrong version - don't build feature in, but output a
version info message - e..g need 0.9.x or later.
diff --git a/tools/build/feature/test-libopencsd-numinstr.c b/tools/build/feature/test-libopencsd-numinstr.c new file mode 100644 index 0000000..606de62 --- /dev/null +++ b/tools/build/feature/test-libopencsd-numinstr.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <opencsd/c_api/opencsd_c_api.h>
+int main(void) +{
/*
* Requires ocsd_generic_trace_elem.num_instr_range introduced in
* OpenCSD 0.9.0
*/
ocsd_generic_trace_elem elem;
(void)elem.num_instr_range;
(void)ocsd_get_version();
return 0;
+} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6d1a03..b0ba3ca 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -383,6 +383,9 @@ ifndef NO_CORESIGHT endif endif endif
ifeq ($(feature-libopencsd-numinstr), 1)
CFLAGS += -DHAVE_CSTRACE_INSTR_INFO
endif endif
ifndef NO_LIBELF
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 938def6..260f3b3a 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -17,6 +17,7 @@ #include "cs-etm-decoder.h" #include "intlist.h" #include "util.h" +#include "asm/bug.h"
#define MAX_BUFFER 1024
@@ -263,9 +264,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->tail = 0; decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) {
decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[i].instr_count = 0; decoder->packet_buffer[i].last_instr_taken_branch = false;
decoder->packet_buffer[i].last_instr_size = 0; decoder->packet_buffer[i].exc = false; decoder->packet_buffer[i].exc_ret = false; decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +298,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_count++;
decoder->packet_buffer[et].sample_type = sample_type;
decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[et].instr_count = 0;
Miss to initialize decoder->packet_buffer[et].last_instr_size to 0?
And also last_instr_taken_branch - these fields are only used in cs-etm.c if the packet type is CS_ETM_RANGE - so they will be set by cs_etm_decoder__buffer_range(). However, I will add add initializers for these.
if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT;
@@ -306,6 +312,36 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_CONT; }
+#ifdef HAVE_CSTRACE_INSTR_INFO
+static int cs_etm_decoder__instr_count(const ocsd_generic_trace_elem *elem) +{
return elem->num_instr_range;
+}
+static int cs_etm_decoder__last_instr_size(const ocsd_generic_trace_elem *elem) +{
return elem->last_instr_sz;
+}
+#else
+static int cs_etm_decoder__instr_count(const ocsd_generic_trace_elem *elem) +{
/* Assume a 4-byte instruction size - will be wrong for T32 */
WARN_ONCE(elem->isa == ocsd_isa_thumb2,
"Instruction counts not available for T32. Please upgrade to OpenCSD >= 0.9.0\n");
For this case, the program should report failure and directly exit?
Here why not check for A32 instruction case? Before version 0.8.x has supported A32 instructions? I understand A32 and A64 both have 4 bytes instruction width, but I don't know if the old OpenCSD lib supports to decode both of them?
The old version of OpenCSD did support A32 and T32 - samples from A32 trace will work with the current version of perf as the assumption of 4 byte instruction sizes for A64 also applies to A32.
return (elem->en_addr - elem->st_addr) / 4;
+}
+static +int cs_etm_decoder__last_instr_size(const ocsd_generic_trace_elem *elem __maybe_unused) +{
return 4;
+}
+#endif
alternate not needed if you decide not to build against insufficient version.
Just curious, if the API ocsd_get_version() has been provided by all OpenCSD versions?
Seems to me, this patch is to keep backwards compability with v0.8.x; I think we can still support perf build with old version OpenCSD, at the runtime the perf cs_etm decoder can get the OpenCSD version number by using API ocsd_get_version() and check if the OpenCSD version is sufficient for decoding current trace data, if not then reports error and directly exit from program.
ocsd_get_version() is provided by all versions of OpenCSD. The features added in 0.9.x to provide the instruction counts add a new member to the struct that describes each range of executed instructions - so the new code doesn't compile if the old library is installed. Calling ocsd_get_version() could be used to check for the unlikely case that perf has been compiled against 0.9.x, but is now being run against an older version - is this worth doing?
I don't think so - if someone is going to do these kind of things they deserve to see the SW explode on them.
static ocsd_datapath_resp_t cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, const ocsd_generic_trace_elem *elem, @@ -321,8 +357,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
packet = &decoder->packet_buffer[decoder->tail];
switch (elem->isa) {
case ocsd_isa_aarch64:
packet->isa = CS_ETM_ISA_A64;
break;
case ocsd_isa_arm:
packet->isa = CS_ETM_ISA_A32;
break;
case ocsd_isa_thumb2:
packet->isa = CS_ETM_ISA_T32;
break;
case ocsd_isa_tee:
case ocsd_isa_jazelle:
case ocsd_isa_custom:
case ocsd_isa_unknown:
default:
packet->isa = CS_ETM_ISA_UNKNOWN;
}
packet->start_addr = elem->st_addr; packet->end_addr = elem->en_addr;
packet->instr_count = cs_etm_decoder__instr_count(elem);
switch (elem->last_i_type) { case OCSD_INSTR_BR: case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +392,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; }
packet->last_instr_size = cs_etm_decoder__last_instr_size(elem);
}return ret;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 612b575..9351bd1 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -28,11 +28,21 @@ enum cs_etm_sample_type { CS_ETM_TRACE_ON = 1 << 1, };
+enum cs_etm_isa {
CS_ETM_ISA_UNKNOWN,
CS_ETM_ISA_A64,
CS_ETM_ISA_A32,
CS_ETM_ISA_T32,
+};
- struct cs_etm_packet { enum cs_etm_sample_type sample_type;
enum cs_etm_isa isa; u64 start_addr; u64 end_addr;
u32 instr_count; u8 last_instr_taken_branch;
u8 last_instr_size; u8 exc; u8 exc_ret; int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2ae6402..fcaa73f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -31,14 +31,6 @@
#define MAX_TIMESTAMP (~0ULL)
-/*
- A64 instructions are always 4 bytes
- Only A64 is supported, so can use this constant for converting between
- addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
- struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues;
@@ -492,21 +484,16 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) etmq->last_branch_rb->nr = 0; }
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) -{
/* Returns 0 for the CS_ETM_TRACE_ON packet */
if (packet->sample_type == CS_ETM_TRACE_ON)
return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
u64 addr) {
u8 instrBytes[2];
/*
* The packet records the execution range with an exclusive end address
*
* A64 instructions are constant size, so the last executed
* instruction is A64_INSTR_SIZE before the end address
* Will need to do instruction level decode for T32 instructions as
* they can be variable size (not yet supported).
cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
/* T32 instruction size is indicated by bits[15:11] of the first
* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
* denote a 32-bit instruction. */
return packet->end_addr - A64_INSTR_SIZE;
return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
Could use macro to replace 0xF8/0xE8 for more readable?
I don't think adding a macro will make this any more readable - there's a detailed comment about the encoding above.
I agree with Robert. In this case all the information is there to understand what is happening.
}
static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) @@ -518,27 +505,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) return packet->start_addr; }
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) +static inline +u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) {
/*
* Only A64 instructions are currently supported, so can get
* instruction count by dividing.
* Will need to do instruction level decode for T32 instructions as
* they can be variable size (not yet supported).
*/
return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
/* Returns 0 for the CS_ETM_TRACE_ON packet */
if (packet->sample_type == CS_ETM_TRACE_ON)
return 0;
}return packet->end_addr - packet->last_instr_size;
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, +static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
{const struct cs_etm_packet *packet, u64 offset)
/*
* Only A64 instructions are currently supported, so can get
* instruction address by muliplying.
* Will need to do instruction level decode for T32 instructions as
* they can be variable size (not yet supported).
*/
return packet->start_addr + offset * A64_INSTR_SIZE;
if (packet->isa == CS_ETM_ISA_T32) {
u64 addr = packet->start_addr;
while (offset > 0) {
addr += cs_etm__t32_instr_size(etmq, addr);
offset--;
Should be: offset -= cs_etm__t32_instr_size(etmq, addr)?
No - offset is the number of the instruction we want to find the address of - e.g. the decoder will tell us that the block has 100 instructions and we want to generate a sample on the 57th, so we step 57 instructions through the block.
}
return addr;
}
/* Assume a 4 byte instruction size (A32/A64) */
return packet->start_addr + offset * 4;
}
static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -867,9 +859,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int ret;
u64 instrs_executed;
u64 instrs_executed = etmq->packet->instr_count;
instrs_executed = cs_etm__instr_count(etmq->packet); etmq->period_instructions += instrs_executed; /*
@@ -899,7 +890,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * executed, but PC has not advanced to next instruction) */ u64 offset = (instrs_executed - instrs_over - 1);
u64 addr = cs_etm__instr_addr(etmq->packet, offset);
u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
If you are passing etmq, do you also need to pass etmq->packet?
Mike
ret = cs_etm__synth_instruction_sample( etmq, addr, etm->instructions_sample_period);
-- 2.7.4
CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight
-- Mike Leach Principal Engineer, ARM Ltd. Manchester Design Centre. UK _______________________________________________ CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight
CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight