This commit adds python script to parse CoreSight tracing event and use command 'objdump' for disassembled lines, finally we can generate readable program execution flow for reviewing tracing data.
The script receives CoreSight tracing packet with below format:
+------------+------------+------------+ packet(n): | addr | ip | cpu | +------------+------------+------------+ packet(n+1): | addr | ip | cpu | +------------+------------+------------+
packet::ip is the last address of current branch instruction and packet::addr presents the start address of the next coming branch instruction. So for one branch instruction which starts in packet(n), its execution flow starts from packet(n)::addr and it stops at packet(n+1)::ip. As results we need to combine the two continuous packets to generate the instruction range, this is the rationale for the script implementation:
[ sample(n)::addr .. sample(n+1)::ip ]
Except for the normal branch flow, this script also parses program flow for exception handling and for the exception level change flow from user space to kernel:
- We can easily decide it's an exception handling if the stop address is in kernel's vector table. For this case, we need to divide the program range into two parts, the first part in before exception handling, and the second part is for exception handling.
- For the program flow from user space to kernel, it might be a system call or a trap, so the script reads ahead 16 instructions to check if includes 'svc' instruction or not. If the prediction includes 'svc' instruction the script prints out the flow until 'svc' instruction, otherwise it only prints the instruction which causes trap.
Credits to Tor Jeremiassen who have written the script skeleton and provides the ideas for reading symbol file according to build-id, creating memory map for dso and basic packet handling. Mathieu Poirier contributed fixes for build-id and memory map bugs. The detailed development history for this script you can find from [1]. Based on Tor and Mathieu work, the script is enhanced to support exception handling and program flow from user space to kernel as described above. Another minor enhancement is to support for without build-id case, the script can parse kernel symbols with option '-k' for vmlinux file path.
[1] https://github.com/Linaro/perf-opencsd/commits/perf-opencsd-v4.15/tools/perf...
Co-authored-by: Tor Jeremiassen tor@ti.com Co-authored-by: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Leo Yan leo.yan@linaro.org --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 324 +++++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 tools/perf/scripts/python/arm-cs-trace-disasm.py
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py new file mode 100644 index 0000000..4f71ee6 --- /dev/null +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -0,0 +1,324 @@ +# arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember +# SPDX-License-Identifier: GPL-2.0 +# +# Tor Jeremiassen tor@ti.com is original author who wrote script +# skeleton, Mathieu Poirier mathieu.poirier@linaro.org contributed +# fixes for build-id and memory map; Leo Yan leo.yan@linaro.org +# enhanced the packet parsing to support exception handling and the +# flow from user space to kernel switching. + +import os +import sys +import re +from subprocess import * +from optparse import OptionParser, make_option + +# Command line parsing + +option_list = [ + # formatting options for the bottom entry of the stack + make_option("-k", "--vmlinux", dest="vmlinux_name", + help="Set path to vmlinux file"), + make_option("-d", "--objdump", dest="objdump_name", + help="Set path to objdump executable file"), + make_option("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="Enable debugging log") +] + +parser = OptionParser(option_list=option_list) +(options, args) = parser.parse_args() + +if (options.objdump_name == None): + sys.exit("No objdump executable file specified - use -d or --objdump option") + +# Initialize global dicts and regular expression + +build_ids = dict() +mmaps = dict() +disasm_cache = dict() +cpu_data = dict() +disasm_re = re.compile("^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s<.*>:") +cache_size = 32*1024 +vectors_start = 0 +vectors_end = 0 +prev_cpu = -1 + +def parse_buildid(): + global build_ids + + buildid_regex = "([a-fA-f0-9]+)[ \t]([^ \n]+)" + buildid_re = re.compile(buildid_regex) + + results = check_output(["perf", "buildid-list"]).split('\n'); + for line in results: + m = buildid_re.search(line) + if (m == None): + continue; + + id_name = m.group(2) + id_num = m.group(1) + + if (id_name == "[kernel.kallsyms]") : + append = "/kallsyms" + elif (id_name == "[vdso]") : + append = "/vdso" + else: + append = "/elf" + + build_ids[id_name] = os.environ['PERF_BUILDID_DIR'] + \ + "/" + id_name + "/" + id_num + append; + # Replace duplicate slash chars to single slash char + build_ids[id_name] = build_ids[id_name].replace('//', '/', 1) + + if ((options.vmlinux_name == None) and ("[kernel.kallsyms]" in build_ids)): + print "kallsyms cannot be used to dump assembler" + + # Set vmlinux path to replace kallsyms file, if without buildid we still + # can use vmlinux to prase kernel symbols + if ((options.vmlinux_name != None)): + build_ids['[kernel.kallsyms]'] = options.vmlinux_name; + +def parse_mmap(): + global mmaps + + # Check mmap for PERF_RECORD_MMAP and PERF_RECORD_MMAP2 + mmap_regex = "PERF_RECORD_MMAP.* -?[0-9]+/[0-9]+: [(0x[0-9a-fA-F]+)((0x[0-9a-fA-F]+)).*:\s.*\s(\S*)" + mmap_re = re.compile(mmap_regex) + + results = check_output("perf script --show-mmap-events | fgrep PERF_RECORD_MMAP", shell=True).split('\n') + for line in results: + m = mmap_re.search(line) + if (m != None): + if (m.group(3) == '[kernel.kallsyms]_text'): + dso = '[kernel.kallsyms]' + else: + dso = m.group(3) + + start = int(m.group(1),0) + end = int(m.group(1),0) + int(m.group(2),0) + mmaps[dso] = [start, end] + +def parse_vectors(): + global vectors_start + global vectors_end + + if (options.vmlinux_name == None): + return + + vectors_regex = "([0-9a-fA-F]+)\sg.*.text\s*[0-9a-fA-F]+\s(vectors)" + vectors_re = re.compile(vectors_regex) + + fname = options.vmlinux_name + disasm = [ options.objdump_name, "-t", fname ] + disasm_output = check_output(disasm).split('\n') + + for line in disasm_output: + m = vectors_re.search(line) + if (m != None): + vectors_start = int("0x"+m.group(1), 0) + vectors_end = vectors_start + 0x800 + +def find_dso_mmap(addr): + global mmaps + + for key, value in mmaps.items(): + if (addr >= value[0] and addr < value[1]): + return key + + return None + +def read_disam(dso, start_addr, stop_addr): + global mmaps + global build_ids + + addr_range = start_addr + ":" + stop_addr; + + # Don't let the cache get too big, clear it when it hits max size + if (len(disasm_cache) > cache_size): + disasm_cache.clear(); + + try: + disasm_output = disasm_cache[addr_range]; + except: + try: + fname = build_ids[dso]; + except KeyError: + sys.exit("cannot find symbol file for " + dso) + + disasm = [ options.objdump_name, "-d", "-z", + "--start-address="+start_addr, + "--stop-address="+stop_addr, fname ] + + disasm_output = check_output(disasm).split('\n') + disasm_cache[addr_range] = disasm_output; + + return disasm_output + +def dump_disam(dso, start_addr, stop_addr, check_svc): + for line in read_disam(dso, start_addr, stop_addr): + m = disasm_func_re.search(line) + if (m != None): + print "\t",line + continue + + m = disasm_re.search(line) + if (m == None): + continue; + + print "\t",line + + if ((check_svc == True) and "svc" in line): + return + +def is_svc_call(dso, start_addr, stop_addr): + for line in read_disam(dso, start_addr, stop_addr): + m = disasm_re.search(line) + if (m == None): + continue; + + if "svc" in line: + return True + + return False + +def in_vector_table(addr): + global vectors_start + global vectors_end + + if (addr >= vectors_start and addr < vectors_end): + return True + + return False + +def is_exception_handling(start_addr, stop_addr): + if (in_vector_table(int(start_addr, 0)) == False and + in_vector_table(int(stop_addr, 0)) == True): + return True + + return False + +def dump_packet(sample): + print "Packet = { cpu: 0x%d addr: 0x%x phys_addr: 0x%x ip: 0x%x " \ + "pid: %d tid: %d period: %d time: %d }" % \ + (sample['cpu'], sample['addr'], sample['phys_addr'], \ + sample['ip'], sample['pid'], sample['tid'], \ + sample['period'], sample['time']) + +def trace_begin(): + print 'CoreSight Trace Data Assembler Dump (ARM64)' + parse_buildid() + parse_mmap() + parse_vectors() + +def trace_end(): + print 'End' + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) + +def process_event(param_dict): + global cache_size + global options + global prev_cpu + + sample = param_dict["sample"] + + if (options.verbose == True): + dump_packet(sample) + + # If period doesn't equal to 1, this packet is for instruction sample + # packet, we need drop this synthetic packet. + if (sample['period'] != 1): + print "Skip synthetic instruction sample" + return + + cpu = format(sample['cpu'], "d"); + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = format(sample['addr'], "#x") + prev_cpu = cpu + return + + # The format for packet is: + # + # +------------+------------+------------+ + # sample_prev: | addr | ip | cpu | + # +------------+------------+------------+ + # sample_next: | addr | ip | cpu | + # +------------+------------+------------+ + # + # We need to combine the two continuous packets to get the instruction + # range for sample_prev::cpu: + # + # [ sample_prev::addr .. sample_next::ip ] + # + # For this purose, sample_prev::addr is stored into cpu_data structure + # and read back for 'start_addr' when the new packet comes, and we need + # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for + # 'stop_addr' is for the sake of objdump so the final assembler dump can + # include last instruction for sample_next::ip. + + start_addr = cpu_data[str(prev_cpu) + 'addr'] + stop_addr = format(sample['ip'] + 4, "#x") + + # Find dso for start_addr and stop_addr, if these two addresses have + # different dso that means there have switch between kernel space + # and user space. + + prev_dso = find_dso_mmap(int(start_addr, 0)) + next_dso = find_dso_mmap(int(stop_addr, 0)) + + # If cannot find dso so cannot dump assembler, bail out + if (prev_dso == None or next_dso == None): + print "Address range [ %s .. %s ]: dso is not found" % (start_addr, stop_addr) + prev_cpu = cpu + return + + if (prev_dso != next_dso): + + # If 'prev_dso' doesn't equal to 'next_dso', this might be below cases: + # - synchronous exception with 'svc' instruction; + # - synchronous exception and trap to kernel (data abort, etc); + # - asynchronous exception for interrupt. + # + # Check if the next 16 insts (16 insts x 4 bytes =64 bytes) have included + # 'svc' insctuction and for 'svc' instruction, we print out the flow from + # start address until to 'svc' instruction. + + stop_addr = format((int(start_addr, 0) + 64), "#x") + + is_svc = is_svc_call(prev_dso, start_addr, stop_addr) + + # Otherwise we only print single instruction. + if (is_svc == False): + stop_addr = format((int(start_addr, 0) + 4), "#x") + + dump_disam(prev_dso, start_addr, stop_addr, is_svc) + + elif (is_exception_handling(start_addr, stop_addr) == True): + + # If this flow is to trigger exception handling, we need divide + # to print the flow into two parts: + # - the flow before exception; + # - the flow for exception handling, every exception handling entry + # is aligned with 0x80. + + stop_addr = format((int(start_addr, 0) + 4), "#x") + + dump_disam(prev_dso, start_addr, stop_addr, False) + + start_addr = format(sample['ip'] & ~(0x80 - 1), "#x") + stop_addr = format(sample['ip'] + 4, "#x") + + dump_disam(prev_dso, start_addr, stop_addr, False) + + else: + # This is normal case to dump range [ start_addr .. stop_addr ] + dump_disam(prev_dso, start_addr, stop_addr, False) + + cpu_data[str(cpu) + 'addr'] = format(sample['addr'], "#x") + prev_cpu = cpu