The exception packet appears as one element with 'elem_type' == OCSD_GEN_TRC_ELEM_EXCEPTION or OCSD_GEN_TRC_ELEM_EXCEPTION_RET, which present for exception entry and exit respectively. The decoder set packet fields 'packet->exc' and 'packet->exc_ret' to indicate the exception packets; but exception packets don't have dedicated sample type and shares the same sample type CS_ETM_RANGE with normal instruction packets.
As result, the exception packets are taken as normal instruction packets and this introduces confusion to mix different packet types. Furthermore, these instruction range packets will be processed for branch sample only when 'packet->last_instr_taken_branch' is true, otherwise they will be omitted, this can introduce mess for exception and exception returning due we don't have complete address range info for context switching.
To process exception packets properly, this patch introduce two new sample type: CS_ETM_EXCEPTION and CS_ETM_EXCEPTION_RET; for these two kind packets, they will be handled by cs_etm__exception(). The func cs_etm__exception() forces to set previous CS_ETM_RANGE packet flag 'prev_packet->last_instr_taken_branch' to true, this matches well with the program flow when the exception is trapped from user space to kernel space, no matter if the most recent flow has branch taken or not; this is also safe for returning to user space after exception handling.
After exception packets have their own sample type, the packet fields 'packet->exc' and 'packet->exc_ret' aren't needed anymore, so remove them.
Signed-off-by: Leo Yan leo.yan@linaro.org --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 26 +++++++++++++++++------ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++----- tools/perf/util/cs-etm.c | 28 +++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 11 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 24aabf0..c1715ff 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -264,8 +264,6 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[i].last_instr_taken_branch = false; - decoder->packet_buffer[i].exc = false; - decoder->packet_buffer[i].exc_ret = false; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -292,8 +290,6 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_count++;
decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].exc = false; - decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; @@ -345,6 +341,22 @@ cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder, CS_ETM_TRACE_ON); }
+static ocsd_datapath_resp_t +cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_EXCEPTION); +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_EXCEPTION_RET); +} + static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, const ocsd_trc_index_t indx __maybe_unused, @@ -370,10 +382,12 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: - decoder->packet_buffer[decoder->tail].exc = true; + resp = cs_etm_decoder__buffer_exception(decoder, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: - decoder->packet_buffer[decoder->tail].exc_ret = true; + resp = cs_etm_decoder__buffer_exception_ret(decoder, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: case OCSD_GEN_TRC_ELEM_EO_TRACE: diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 108dc9d..cb57756 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -25,9 +25,11 @@ struct cs_etm_buffer { };
enum cs_etm_sample_type { - CS_ETM_EMPTY = 0, - CS_ETM_RANGE = 1 << 0, - CS_ETM_TRACE_ON = 1 << 1, + CS_ETM_EMPTY = 0, + CS_ETM_RANGE = 1 << 0, + CS_ETM_TRACE_ON = 1 << 1, + CS_ETM_EXCEPTION = 1 << 2, + CS_ETM_EXCEPTION_RET = 1 << 3, };
struct cs_etm_packet { @@ -35,8 +37,6 @@ struct cs_etm_packet { u64 start_addr; u64 end_addr; u8 last_instr_taken_branch; - u8 exc; - u8 exc_ret; int cpu; };
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2ae6402..b85100b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -942,6 +942,25 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) return 0; }
+static int cs_etm__exception(struct cs_etm_queue *etmq) +{ + /* + * When the exception packet is inserted, whether the last instruction + * in previous range packet is taken branch or not, we need to force + * to set 'prev_packet->last_instr_taken_branch' to true. This ensures + * to generate branch sample for the instruction range before the + * exception is trapped to kernel or before the exception returning. + * + * The exception packet includes the dummy address values, so don't + * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful + * for generating instruction and branch samples. + */ + if (etmq->prev_packet->sample_type == CS_ETM_RANGE) + etmq->prev_packet->last_instr_taken_branch = true; + + return 0; +} + static int cs_etm__flush(struct cs_etm_queue *etmq) { int err = 0; @@ -1057,6 +1076,15 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) */ cs_etm__sample(etmq); break; + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(etmq); + break; case CS_ETM_TRACE_ON: /* * Discontinuity in trace, flush
This commit adds python script to parse CoreSight tracing event and use command 'objdump' for disassembled lines, finally we can generate readable program execution flow for reviewing tracing data.
The script receives CoreSight tracing packet with below format:
+------------+------------+------------+ packet(n): | addr | ip | cpu | +------------+------------+------------+ packet(n+1): | addr | ip | cpu | +------------+------------+------------+
packet::ip is the last address of current branch instruction and packet::addr presents the start address of the next coming branch instruction. So for one branch instruction which starts in packet(n), its execution flow starts from packet(n)::addr and it stops at packet(n+1)::ip. As results we need to combine the two continuous packets to generate the instruction range, this is the rationale for the script implementation:
[ sample(n)::addr .. sample(n+1)::ip ]
Credits to Tor Jeremiassen who have written the script skeleton and provides the ideas for reading symbol file according to build-id, creating memory map for dso and basic packet handling. Mathieu Poirier contributed fixes for build-id and memory map bugs. The detailed development history for this script you can find from [1]. Based on Tor and Mathieu work, the script is updated samples handling for the corrected sample format. Another minor enhancement is to support for without build-id case, the script can parse kernel symbols with option '-k' for vmlinux file path.
[1] https://github.com/Linaro/perf-opencsd/commits/perf-opencsd-v4.15/tools/perf...
Co-authored-by: Tor Jeremiassen tor@ti.com Co-authored-by: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Leo Yan leo.yan@linaro.org --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 235 +++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 tools/perf/scripts/python/arm-cs-trace-disasm.py
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py new file mode 100644 index 0000000..1239ab4 --- /dev/null +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -0,0 +1,235 @@ +# arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember +# SPDX-License-Identifier: GPL-2.0 +# +# Tor Jeremiassen tor@ti.com is original author who wrote script +# skeleton, Mathieu Poirier mathieu.poirier@linaro.org contributed +# fixes for build-id and memory map; Leo Yan leo.yan@linaro.org +# updated the packet parsing with new samples format. + +import os +import sys +import re +from subprocess import * +from optparse import OptionParser, make_option + +# Command line parsing + +option_list = [ + # formatting options for the bottom entry of the stack + make_option("-k", "--vmlinux", dest="vmlinux_name", + help="Set path to vmlinux file"), + make_option("-d", "--objdump", dest="objdump_name", + help="Set path to objdump executable file"), + make_option("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="Enable debugging log") +] + +parser = OptionParser(option_list=option_list) +(options, args) = parser.parse_args() + +if (options.objdump_name == None): + sys.exit("No objdump executable file specified - use -d or --objdump option") + +# Initialize global dicts and regular expression + +build_ids = dict() +mmaps = dict() +disasm_cache = dict() +cpu_data = dict() +disasm_re = re.compile("^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s<.*>:") +cache_size = 32*1024 +prev_cpu = -1 + +def parse_buildid(): + global build_ids + + buildid_regex = "([a-fA-f0-9]+)[ \t]([^ \n]+)" + buildid_re = re.compile(buildid_regex) + + results = check_output(["perf", "buildid-list"]).split('\n'); + for line in results: + m = buildid_re.search(line) + if (m == None): + continue; + + id_name = m.group(2) + id_num = m.group(1) + + if (id_name == "[kernel.kallsyms]") : + append = "/kallsyms" + elif (id_name == "[vdso]") : + append = "/vdso" + else: + append = "/elf" + + build_ids[id_name] = os.environ['PERF_BUILDID_DIR'] + \ + "/" + id_name + "/" + id_num + append; + # Replace duplicate slash chars to single slash char + build_ids[id_name] = build_ids[id_name].replace('//', '/', 1) + + if ((options.vmlinux_name == None) and ("[kernel.kallsyms]" in build_ids)): + print "kallsyms cannot be used to dump assembler" + + # Set vmlinux path to replace kallsyms file, if without buildid we still + # can use vmlinux to prase kernel symbols + if ((options.vmlinux_name != None)): + build_ids['[kernel.kallsyms]'] = options.vmlinux_name; + +def parse_mmap(): + global mmaps + + # Check mmap for PERF_RECORD_MMAP and PERF_RECORD_MMAP2 + mmap_regex = "PERF_RECORD_MMAP.* -?[0-9]+/[0-9]+: [(0x[0-9a-fA-F]+)((0x[0-9a-fA-F]+)).*:\s.*\s(\S*)" + mmap_re = re.compile(mmap_regex) + + results = check_output("perf script --show-mmap-events | fgrep PERF_RECORD_MMAP", shell=True).split('\n') + for line in results: + m = mmap_re.search(line) + if (m != None): + if (m.group(3) == '[kernel.kallsyms]_text'): + dso = '[kernel.kallsyms]' + else: + dso = m.group(3) + + start = int(m.group(1),0) + end = int(m.group(1),0) + int(m.group(2),0) + mmaps[dso] = [start, end] + +def find_dso_mmap(addr): + global mmaps + + for key, value in mmaps.items(): + if (addr >= value[0] and addr < value[1]): + return key + + return None + +def read_disam(dso, start_addr, stop_addr): + global mmaps + global build_ids + + addr_range = start_addr + ":" + stop_addr; + + # Don't let the cache get too big, clear it when it hits max size + if (len(disasm_cache) > cache_size): + disasm_cache.clear(); + + try: + disasm_output = disasm_cache[addr_range]; + except: + try: + fname = build_ids[dso]; + except KeyError: + sys.exit("cannot find symbol file for " + dso) + + disasm = [ options.objdump_name, "-d", "-z", + "--start-address="+start_addr, + "--stop-address="+stop_addr, fname ] + + disasm_output = check_output(disasm).split('\n') + disasm_cache[addr_range] = disasm_output; + + return disasm_output + +def dump_disam(dso, start_addr, stop_addr): + for line in read_disam(dso, start_addr, stop_addr): + m = disasm_func_re.search(line) + if (m != None): + print "\t",line + continue + + m = disasm_re.search(line) + if (m == None): + continue; + + print "\t",line + +def dump_packet(sample): + print "Packet = { cpu: 0x%d addr: 0x%x phys_addr: 0x%x ip: 0x%x " \ + "pid: %d tid: %d period: %d time: %d }" % \ + (sample['cpu'], sample['addr'], sample['phys_addr'], \ + sample['ip'], sample['pid'], sample['tid'], \ + sample['period'], sample['time']) + +def trace_begin(): + print 'ARM CoreSight Trace Data Assembler Dump' + parse_buildid() + parse_mmap() + +def trace_end(): + print 'End' + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) + +def process_event(param_dict): + global cache_size + global options + global prev_cpu + + sample = param_dict["sample"] + + if (options.verbose == True): + dump_packet(sample) + + # If period doesn't equal to 1, this packet is for instruction sample + # packet, we need drop this synthetic packet. + if (sample['period'] != 1): + print "Skip synthetic instruction sample" + return + + cpu = format(sample['cpu'], "d"); + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = format(sample['addr'], "#x") + prev_cpu = cpu + return + + # The format for packet is: + # + # +------------+------------+------------+ + # sample_prev: | addr | ip | cpu | + # +------------+------------+------------+ + # sample_next: | addr | ip | cpu | + # +------------+------------+------------+ + # + # We need to combine the two continuous packets to get the instruction + # range for sample_prev::cpu: + # + # [ sample_prev::addr .. sample_next::ip ] + # + # For this purose, sample_prev::addr is stored into cpu_data structure + # and read back for 'start_addr' when the new packet comes, and we need + # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for + # 'stop_addr' is for the sake of objdump so the final assembler dump can + # include last instruction for sample_next::ip. + + start_addr = cpu_data[str(prev_cpu) + 'addr'] + stop_addr = format(sample['ip'] + 4, "#x") + + # Record for previous sample packet + cpu_data[str(cpu) + 'addr'] = format(sample['addr'], "#x") + prev_cpu = cpu + + # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 + if (int(start_addr, 0) == 0 and int(stop_addr, 0) == 4): + print "CPU%s: CS_ETM_TRACE_ON packet is inserted" % cpu + return + + # Sanity checking dso for start_addr and stop_addr + prev_dso = find_dso_mmap(int(start_addr, 0)) + next_dso = find_dso_mmap(int(stop_addr, 0)) + + # If cannot find dso so cannot dump assembler, bail out + if (prev_dso == None or next_dso == None): + print "Address range [ %s .. %s ]: failed to find dso" % (start_addr, stop_addr) + return + elif (prev_dso != next_dso): + print "Address range [ %s .. %s ]: isn't in same dso" % (start_addr, stop_addr) + return + + dump_disam(prev_dso, start_addr, stop_addr)
This commit documents CoreSight trace disassembler usage and gives example for it.
Signed-off-by: Leo Yan leo.yan@linaro.org --- Documentation/trace/coresight.txt | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+)
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt index 1d74ad0..6bcff5a 100644 --- a/Documentation/trace/coresight.txt +++ b/Documentation/trace/coresight.txt @@ -425,6 +425,61 @@ root@genericarmv8:~#
Details on how to use the generic STM API can be found here [2].
+ +Tracing data disassembler +------------------------- + +'perf script' supports to use script to parse tracing packet and rely on +objdump executable (e.g. aarch64-linux-gnu-objdump) for disassembled lines, +this can convert tracing data to readable program execution flow for easily +reviewing tracing data. + +The CoreSight trace disassembler is located in the folder: +tools/perf/scripts/python/arm-cs-trace-disasm.py. This script support below +options: + + -d, --objdump: Set path to objdump executable, this option is + mandatory. + -k, --vmlinux: Set path to vmlinux file. + -v, --verbose: Enable debugging log, after enable this option the + script dumps every event data. + +Below is one example for using python script to dump CoreSight trace +disassembler: + + $ perf script -s arm-cs-trace-disasm.py -i perf.data \ + -F cpu,event,ip,addr,sym -- -d aarch64-linux-gnu-objdump \ + -k ./vmlinux > cs-disasm.log + +Below is one example for the disassembler log: + +ARM CoreSight Trace Data Assembler Dump + ffff000008a5f2dc <etm4_enable_hw+0x344>: + ffff000008a5f2dc: 340000a0 cbz w0, ffff000008a5f2f0 <etm4_enable_hw+0x358> + ffff000008a5f2f0 <etm4_enable_hw+0x358>: + ffff000008a5f2f0: f9400260 ldr x0, [x19] + ffff000008a5f2f4: d5033f9f dsb sy + ffff000008a5f2f8: 913ec000 add x0, x0, #0xfb0 + ffff000008a5f2fc: b900001f str wzr, [x0] + ffff000008a5f300: f9400bf3 ldr x19, [sp, #16] + ffff000008a5f304: a8c27bfd ldp x29, x30, [sp], #32 + ffff000008a5f308: d65f03c0 ret + ffff000008a5fa18 <etm4_enable+0x1b0>: + ffff000008a5fa18: 14000025 b ffff000008a5faac <etm4_enable+0x244> + ffff000008a5faac <etm4_enable+0x244>: + ffff000008a5faac: b9406261 ldr w1, [x19, #96] + ffff000008a5fab0: 52800015 mov w21, #0x0 // #0 + ffff000008a5fab4: f901ca61 str x1, [x19, #912] + ffff000008a5fab8: 2a1503e0 mov w0, w21 + ffff000008a5fabc: 3940e261 ldrb w1, [x19, #56] + ffff000008a5fac0: f901ce61 str x1, [x19, #920] + ffff000008a5fac4: a94153f3 ldp x19, x20, [sp, #16] + ffff000008a5fac8: a9425bf5 ldp x21, x22, [sp, #32] + ffff000008a5facc: a94363f7 ldp x23, x24, [sp, #48] + ffff000008a5fad0: a8c47bfd ldp x29, x30, [sp], #64 + ffff000008a5fad4: d65f03c0 ret + + [1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm [2]. Documentation/trace/stm.txt [3]. https://github.com/Linaro/perf-opencsd