This patch adds the required mechanic to quickly lookup the CPU number associated with a traceID. That way the CPU that executed the code conveyed by a decoded packet can be identified, without having to do unecessary translations.
Using this new functionality the "cs-trace-disasm.py" script is enhanced to output the file and CPU number the code has been executed on:
FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab57fd80: 910003e0 mov x0, sp 7fab57fd84: 94000d53 bl 7fab5832d0 <free@plt+0x3790> FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab5832d0: d11203ff sub sp, sp, #0x480 FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab5832d4: a9ba7bfd stp x29, x30, [sp,#-96]! 7fab5832d8: 910003fd mov x29, sp 7fab5832dc: a90363f7 stp x23, x24, [sp,#48] 7fab5832e0: 9101e3b7 add x23, x29, #0x78 7fab5832e4: a90573fb stp x27, x28, [sp,#80] 7fab5832e8: a90153f3 stp x19, x20, [sp,#16] 7fab5832ec: aa0003fb mov x27, x0 7fab5832f0: 910a82e1 add x1, x23, #0x2a0 7fab5832f4: a9025bf5 stp x21, x22, [sp,#32] 7fab5832f8: a9046bf9 stp x25, x26, [sp,#64] 7fab5832fc: 910102e0 add x0, x23, #0x40 7fab583300: f800841f str xzr, [x0],#8 7fab583304: eb01001f cmp x0, x1 7fab583308: 54ffffc1 b.ne 7fab583300 <free@plt+0x37c0>
Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org --- tools/perf/scripts/python/cs-trace-disasm.py | 2 + tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 15 +++++- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 + tools/perf/util/cs-etm.c | 70 ++++++++++++++++++++++++- tools/perf/util/cs-etm.h | 7 +++ 5 files changed, 92 insertions(+), 3 deletions(-)
diff --git a/tools/perf/scripts/python/cs-trace-disasm.py b/tools/perf/scripts/python/cs-trace-disasm.py index 175ae4bfa732..429d0d2d7a23 100644 --- a/tools/perf/scripts/python/cs-trace-disasm.py +++ b/tools/perf/scripts/python/cs-trace-disasm.py @@ -82,6 +82,7 @@ def process_event(t): if (len(disasm_cache) > cache_size): disasm_cache.clear();
+ cpu = format(sample['cpu'], "d"); addr_range = format(sample['ip'],"x") + ":" + format(sample['addr'],"x");
try: @@ -103,6 +104,7 @@ def process_event(t): disasm_output = check_output(disasm).split('\n') disasm_cache[addr_range] = disasm_output;
+ print "FILE: %s\tCPU: %s" % (dso, cpu); for line in disasm_output: m = disasm_re.search(line) if (m != None) : diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 2cc7ccd3c22a..c6f23d63590d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -16,10 +16,13 @@ * with this program. If not, see http://www.gnu.org/licenses/. */
+#include <linux/err.h> #include <stdlib.h>
+#include "../cs-etm.h" #include "cs-etm-decoder.h" #include "../util.h" +#include "../util/intlist.h"
#include "c_api/opencsd_c_api.h" #include "ocsd_if_types.h" @@ -102,10 +105,12 @@ int cs_etm_decoder__flush(struct cs_etm_decoder *decoder)
static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id, enum cs_etm_sample_type sample_type) { int err = 0; uint32_t et = 0; + struct int_node *inode = NULL;
if (decoder == NULL) return -1;
@@ -116,12 +121,18 @@ static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, if (err) return err;
et = decoder->end_tail; + /* Search the RB tree for the cpu associated with this traceID */ + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return PTR_ERR(inode);
decoder->packet_buffer[et].sample_type = sample_type; decoder->packet_buffer[et].start_addr = elem->st_addr; decoder->packet_buffer[et].end_addr = elem->en_addr; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; + decoder->packet_buffer[et].cpu = *((int*)inode->priv); + et = (et + 1) & (MAX_BUFFER - 1);
decoder->end_tail = et; @@ -177,7 +188,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( //decoder->discontinuity = true; //break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - cs_etm_decoder__buffer_packet(decoder,elem, CS_ETM_RANGE); + cs_etm_decoder__buffer_packet(decoder,elem, + trace_chan_id, CS_ETM_RANGE); resp = OCSD_RESP_WAIT; break; case OCSD_GEN_TRC_ELEM_EXCEPTION: @@ -409,6 +421,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; decoder->packet_buffer[i].exc = false; decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; } }
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 2d1013bcdbab..38c5ae84ed07 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -51,6 +51,7 @@ struct cs_etm_packet { uint64_t end_addr; bool exc; bool exc_ret; + int cpu; };
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1194e8a34a21..551adc7bca6f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -15,6 +15,7 @@ * this program. If not, see http://www.gnu.org/licenses/. */
+#include <linux/err.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/bitops.h> @@ -29,6 +30,7 @@ #include "evlist.h" #include "machine.h" #include "util.h" +#include "util/intlist.h" #include "color.h" #include "cs-etm.h" #include "cs-etm-decoder/cs-etm-decoder.h" @@ -234,12 +236,20 @@ static void cs_etm__free(struct perf_session *session) {
size_t i; + struct int_node *inode, *tmp; struct cs_etm_auxtrace *aux = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); auxtrace_heap__free(&aux->heap); cs_etm__free_events(session); session->auxtrace = NULL; + + /* First remove all traceID/CPU# nodes from the RB tree */ + intlist__for_each_safe(inode, tmp, traceid_list) + intlist__remove(traceid_list, inode); + /* Then the RB tree itself */ + intlist__delete(traceid_list); + //thread__delete(aux->unknown_thread); for (i = 0; i < aux->num_cpu; ++i) { zfree(&aux->metadata[i]); @@ -613,7 +623,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = (end_addr - start_addr) >> 2; - sample.cpu = etmq->cpu; + sample.cpu = packet->cpu; sample.flags = 0; // etmq->flags; sample.insn_len = 1; // etmq->insn_len; sample.cpumode = event->header.misc; @@ -1327,12 +1337,19 @@ int cs_etm__process_auxtrace_info(union perf_event *event, size_t priv_size = 0; size_t num_cpu; struct cs_etm_auxtrace *etm = 0; - int err = 0; + int err = 0, idx = -1; u64 *ptr; u64 *hdr = NULL; u64 **metadata = NULL; size_t i,j,k; unsigned pmu_type; + struct int_node *inode; + + /* + * sizeof(auxtrace_info_event::type) + + * sizeof(auxtrace_info_event::reserved) == 8 + */ + info_header_size = 8;
if (total_size < (event_header_size + info_header_size)) return -EINVAL; @@ -1356,7 +1373,20 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return -EINVAL; }
+ /* + * Create an RB tree for traceID-CPU# tuple. Since the conversion has + * to be made for each packet that gets decoded optimizing access in + * anything other than a sequential array is worth doing. + */ + traceid_list = intlist__new(NULL); + if (!traceid_list) + return -ENOMEM; + metadata = zalloc(sizeof(u64 *) * num_cpu); + if (!metadata) { + err = -ENOMEM; + goto err_free_traceid_list; + }
if (metadata == NULL) { return -EINVAL; @@ -1370,6 +1400,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event, for (k = 0; k < CS_ETM_PRIV_MAX; k++) { metadata[j][k] = ptr[i+k]; } + + /* The traceID is our handle */ + idx = metadata[j][CS_ETM_ETMIDR]; i += CS_ETM_PRIV_MAX; } else if (ptr[i] == __perf_cs_etmv4_magic) { metadata[j] = zalloc(sizeof(u64)*CS_ETMV4_PRIV_MAX); @@ -1378,8 +1411,33 @@ int cs_etm__process_auxtrace_info(union perf_event *event, for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) { metadata[j][k] = ptr[i+k]; } + + /* The traceID is our handle */ + idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; i += CS_ETMV4_PRIV_MAX; } + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, idx); + + /* Something went wrong, no need to continue */ + if (!inode) { + err = PTR_ERR(inode); + goto err_free_metadata; + } + + /* + * The node for that CPU should not have been taken already. + * Backout if that's the case. + */ + if (inode->priv) { + err = -EINVAL; + goto err_free_metadata; + } + + /* All good, associate the traceID with the CPU# */ + inode->priv = &metadata[j][CS_ETM_CPU]; + }
if (i*8 != priv_size) @@ -1464,5 +1522,13 @@ err_free_queues: session->auxtrace = NULL; err_free: free(etm); +err_free_metadata: + /* No need to check @metadata[j], free(NULL) is supported */ + for (j = 0; j < num_cpu; ++j) + free(metadata[j]); + free(metadata); +err_free_traceid_list: + intlist__delete(traceid_list); + return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 7b683a024690..ec6ff78f1905 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -18,6 +18,10 @@ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ #define INCLUDE__UTIL_PERF_CS_ETM_H__
+#include "util/event.h" +#include "util/intlist.h" +#include "util/session.h" + /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. */ @@ -61,6 +65,9 @@ enum { CS_ETMV4_PRIV_MAX, };
+/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024)