Hi Mathieu,
On 01/10/18 19:00, Mathieu Poirier wrote:
Hi Robert and thanks for this second revision.
On Thu, 27 Sep 2018 at 04:24, Robert Walker robert.walker@arm.com wrote:
This patch adds support for generating instruction samples from trace of AArch32 programs using the A32 and T32 instruction sets.
T32 has variable 2 or 4 byte instruction size, so the conversion between addresses and instruction counts requires extra information from the trace decoder, requiring version 0.9.0 of OpenCSD. A check for the new version member has been added to the feature check for OpenCSD. Where only the older version of OpenCSD is available, the previous behaviour of assuming 4 byte instruction size is used.
Signed-off-by: Robert Walker robert.walker@arm.com
tools/build/Makefile.feature | 3 +- tools/build/feature/Makefile | 4 ++ tools/build/feature/test-libopencsd-numinstr.c | 15 ++++++ tools/perf/Makefile.config | 3 ++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 58 ++++++++++++++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++ tools/perf/util/cs-etm.c | 71 +++++++++++-------------- 7 files changed, 123 insertions(+), 41 deletions(-) create mode 100644 tools/build/feature/test-libopencsd-numinstr.c
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index f216b2f..5e8d108 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -68,7 +68,8 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt \ setns \
libopencsd
libopencsd \
libopencsd-numinstr
I understand what you're doing here but it won't fly with the upstream guy. The new #define in the openCSD library needs to come before this patchset. That way functionality that needs the newest version of the library can be conditionally compiled without this extra step and a new HAVE_CSTRACE_INSTR_INFO.
So just to be clear: * We do a new release of OpenCSD (0.9.3?) that exports some macros describing its version. You previously suggested including of the ocsd_if_version.h header in the OpenCSD install - we can't do a '#include "opencsd/ocsd_if_version.h"' directly from the perf code without breaking compilation when using the older library versions without the header (0.9.2 or older), so maybe opencsd/c_api/opencsd_c_api.h would be a good place to put the macros (or include ocsd_if_version.h from). * We don't need to make any changes to the feature detection in perf * util/cs-etm-decoder/cs-etm-decoder.c can use the version macros to determine whether to set HAVE_CSTRACE_INSTR_INFO within that file and hence whether to use the extra info available from the decoder.
This sounds OK to me - we get rid of the extra overhead of feature checks in the perf makefiles. The only downside is that it requires a new version of OpenCSD, so anyone with 0.9.x will have to update to enable the feature even though their version already has the necessary support. But if it makes adding new features easier in future then it's worth doing.
Also, please run your patches through checkpatch.pl before sending them out.
Sorry - I think there were a few long lines left to fix - will be fixed in the next version.
Regards
Rob
Regards, Mathieu
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0516259..2cb7ef6 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -56,6 +56,7 @@ FILES= \ test-sched_getcpu.bin \ test-setns.bin \ test-libopencsd.bin \
test-libopencsd-numinstr.bin \ test-clang.bin \ test-llvm.bin \ test-llvm-version.bin
@@ -114,6 +115,9 @@ $(OUTPUT)test-libopencsd.bin: $(BUILD) # -lopencsd_c_api -lopencsd provided by # $(FEATURE_CHECK_LDFLAGS-libopencsd)
+$(OUTPUT)test-libopencsd-numinstr.bin:
$(BUILD) -lopencsd_c_api -lopencsd
- DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
diff --git a/tools/build/feature/test-libopencsd-numinstr.c b/tools/build/feature/test-libopencsd-numinstr.c new file mode 100644 index 0000000..606de62 --- /dev/null +++ b/tools/build/feature/test-libopencsd-numinstr.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <opencsd/c_api/opencsd_c_api.h>
+int main(void) +{
/*
* Requires ocsd_generic_trace_elem.num_instr_range introduced in
* OpenCSD 0.9.0
*/
ocsd_generic_trace_elem elem;
(void)elem.num_instr_range;
(void)ocsd_get_version();
return 0;
+} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6d1a03..b0ba3ca 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -383,6 +383,9 @@ ifndef NO_CORESIGHT endif endif endif
ifeq ($(feature-libopencsd-numinstr), 1)
CFLAGS += -DHAVE_CSTRACE_INSTR_INFO
endif endif
ifndef NO_LIBELF
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 938def6..260f3b3a 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -17,6 +17,7 @@ #include "cs-etm-decoder.h" #include "intlist.h" #include "util.h" +#include "asm/bug.h"
#define MAX_BUFFER 1024
@@ -263,9 +264,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->tail = 0; decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) {
decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[i].instr_count = 0; decoder->packet_buffer[i].last_instr_taken_branch = false;
decoder->packet_buffer[i].last_instr_size = 0; decoder->packet_buffer[i].exc = false; decoder->packet_buffer[i].exc_ret = false; decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +298,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_count++;
decoder->packet_buffer[et].sample_type = sample_type;
decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[et].instr_count = 0; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT;
@@ -306,6 +312,36 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_CONT; }
+#ifdef HAVE_CSTRACE_INSTR_INFO
+static int cs_etm_decoder__instr_count(const ocsd_generic_trace_elem *elem) +{
return elem->num_instr_range;
+}
+static int cs_etm_decoder__last_instr_size(const ocsd_generic_trace_elem *elem) +{
return elem->last_instr_sz;
+}
+#else
+static int cs_etm_decoder__instr_count(const ocsd_generic_trace_elem *elem) +{
/* Assume a 4-byte instruction size - will be wrong for T32 */
WARN_ONCE(elem->isa == ocsd_isa_thumb2,
"Instruction counts not available for T32. Please upgrade to OpenCSD >= 0.9.0\n");
return (elem->en_addr - elem->st_addr) / 4;
+}
+static +int cs_etm_decoder__last_instr_size(const ocsd_generic_trace_elem *elem __maybe_unused) +{
return 4;
+}
+#endif
- static ocsd_datapath_resp_t cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, const ocsd_generic_trace_elem *elem,
@@ -321,8 +357,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
packet = &decoder->packet_buffer[decoder->tail];
switch (elem->isa) {
case ocsd_isa_aarch64:
packet->isa = CS_ETM_ISA_A64;
break;
case ocsd_isa_arm:
packet->isa = CS_ETM_ISA_A32;
break;
case ocsd_isa_thumb2:
packet->isa = CS_ETM_ISA_T32;
break;
case ocsd_isa_tee:
case ocsd_isa_jazelle:
case ocsd_isa_custom:
case ocsd_isa_unknown:
default:
packet->isa = CS_ETM_ISA_UNKNOWN;
}
packet->start_addr = elem->st_addr; packet->end_addr = elem->en_addr;
packet->instr_count = cs_etm_decoder__instr_count(elem);
switch (elem->last_i_type) { case OCSD_INSTR_BR: case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +392,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; }
packet->last_instr_size = cs_etm_decoder__last_instr_size(elem);
}return ret;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 612b575..9351bd1 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -28,11 +28,21 @@ enum cs_etm_sample_type { CS_ETM_TRACE_ON = 1 << 1, };
+enum cs_etm_isa {
CS_ETM_ISA_UNKNOWN,
CS_ETM_ISA_A64,
CS_ETM_ISA_A32,
CS_ETM_ISA_T32,
+};
- struct cs_etm_packet { enum cs_etm_sample_type sample_type;
enum cs_etm_isa isa; u64 start_addr; u64 end_addr;
u32 instr_count; u8 last_instr_taken_branch;
u8 last_instr_size; u8 exc; u8 exc_ret; int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2ae6402..fcaa73f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -31,14 +31,6 @@
#define MAX_TIMESTAMP (~0ULL)
-/*
- A64 instructions are always 4 bytes
- Only A64 is supported, so can use this constant for converting between
- addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
- struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues;
@@ -492,21 +484,16 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) etmq->last_branch_rb->nr = 0; }
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) -{
/* Returns 0 for the CS_ETM_TRACE_ON packet */
if (packet->sample_type == CS_ETM_TRACE_ON)
return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
u64 addr) {
u8 instrBytes[2];
/*
* The packet records the execution range with an exclusive end address
*
* A64 instructions are constant size, so the last executed
* instruction is A64_INSTR_SIZE before the end address
* Will need to do instruction level decode for T32 instructions as
* they can be variable size (not yet supported).
cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
/* T32 instruction size is indicated by bits[15:11] of the first
* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
* denote a 32-bit instruction. */
return packet->end_addr - A64_INSTR_SIZE;
return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
}
static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -518,27 +505,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) return packet->start_addr; }
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) +static inline +u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) {
/*
* Only A64 instructions are currently supported, so can get
* instruction count by dividing.
* Will need to do instruction level decode for T32 instructions as
* they can be variable size (not yet supported).
*/
return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
/* Returns 0 for the CS_ETM_TRACE_ON packet */
if (packet->sample_type == CS_ETM_TRACE_ON)
return 0;
}return packet->end_addr - packet->last_instr_size;
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, +static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
{const struct cs_etm_packet *packet, u64 offset)
/*
* Only A64 instructions are currently supported, so can get
* instruction address by muliplying.
* Will need to do instruction level decode for T32 instructions as
* they can be variable size (not yet supported).
*/
return packet->start_addr + offset * A64_INSTR_SIZE;
if (packet->isa == CS_ETM_ISA_T32) {
u64 addr = packet->start_addr;
while (offset > 0) {
addr += cs_etm__t32_instr_size(etmq, addr);
offset--;
}
return addr;
}
/* Assume a 4 byte instruction size (A32/A64) */
return packet->start_addr + offset * 4;
}
static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -867,9 +859,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int ret;
u64 instrs_executed;
u64 instrs_executed = etmq->packet->instr_count;
instrs_executed = cs_etm__instr_count(etmq->packet); etmq->period_instructions += instrs_executed; /*
@@ -899,7 +890,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * executed, but PC has not advanced to next instruction) */ u64 offset = (instrs_executed - instrs_over - 1);
u64 addr = cs_etm__instr_addr(etmq->packet, offset);
u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); ret = cs_etm__synth_instruction_sample( etmq, addr, etm->instructions_sample_period);
-- 2.7.4
CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight