Changes in V6: - In arm-cs-trace-disasm.py, zero map_pgoff for kernel files. Add map_pgoff to start/end address for dso not found message. - Added "Reviewed-by" trailer for patches 1-3 previously reviewed by Leo Yan in V4 and V5.
Changes in V5: - In symbol-elf.c, branch to exit_close label if open file. - In trace_event_python.c, correct indentation. set_sym_in_dict call parameter "map_pgoff" renamed as "addr_map_pgoff" to match local naming.
Changes in V4: - In trace-event-python.c, fixed perf-tools-next merge problem.
Changes in V3: - Rebased to linux-perf-tools branch. - Squash symbol-elf.c and symbol.h into same commit. - In map.c, merge dso__is_pie() call into existing if statement. - In arm-cs-trace-disasm.py, remove debug artifacts.
Changes in V2: - In dso__is_pie() (symbol-elf.c), Decrease indentation, add null pointer checks per Leo Yan review. - Updated mailing list distribution.
Steve Clevenger (4): Add dso__is_pie call to identify ELF PIE Force MAPPING_TYPE__IDENTIY for PIE Add map pgoff to python dictionary based on MAPPING_TYPE Adjust objdump start/end range per map pgoff parameter
.../scripts/python/arm-cs-trace-disasm.py | 12 ++-- tools/perf/util/map.c | 4 +- .../scripting-engines/trace-event-python.c | 13 +++- tools/perf/util/symbol-elf.c | 61 +++++++++++++++++++ tools/perf/util/symbol.h | 1 + 5 files changed, 82 insertions(+), 9 deletions(-)
Extract map_pgoff parameter from the dictionary, and adjust start/end range passed to objdump based on the value.
The start_addr/stop_addr address checks are changed to print a warning only if verbose == True. This script repeatedly sees a zero value passed in for start_addr = cpu_data[str(cpu) + 'addr']
These zero values are not a new problem. The start_addr/stop_addr warning clutters the instruction trace output, hence this change.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 7aff02d84ffb..35a2ab60ca12 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -187,6 +187,7 @@ def process_event(param_dict): dso_start = get_optional(param_dict, "dso_map_start") dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + map_pgoff = get_optional(param_dict, "map_pgoff")
cpu = sample["cpu"] ip = sample["ip"] @@ -249,11 +250,13 @@ def process_event(param_dict): return
if (start_addr < int(dso_start) or start_addr > int(dso_end)): - print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso)) + if (options.verbose == True): + print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso)) return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): - print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) + if (options.verbose == True): + print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) return
if (options.objdump_name != None): @@ -262,13 +265,14 @@ def process_event(param_dict): # vm_start to zero. if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 + map_pgoff = 0 else: dso_vm_start = int(dso_start)
dso_fname = get_dso_file_path(dso, dso_bid) if path.exists(dso_fname): - print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) else: - print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr)) + print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff))
print_srccode(comm, param_dict, sample, symbol, dso)
On 05/09/2024 1:11 am, Steve Clevenger wrote:
Extract map_pgoff parameter from the dictionary, and adjust start/end range passed to objdump based on the value.
The start_addr/stop_addr address checks are changed to print a warning only if verbose == True. This script repeatedly sees a zero value passed in for start_addr = cpu_data[str(cpu) + 'addr']
These zero values are not a new problem. The start_addr/stop_addr warning clutters the instruction trace output, hence this change.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com
tools/perf/scripts/python/arm-cs-trace-disasm.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 7aff02d84ffb..35a2ab60ca12 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -187,6 +187,7 @@ def process_event(param_dict): dso_start = get_optional(param_dict, "dso_map_start") dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol")
- map_pgoff = get_optional(param_dict, "map_pgoff")
cpu = sample["cpu"] ip = sample["ip"] @@ -249,11 +250,13 @@ def process_event(param_dict): return if (start_addr < int(dso_start) or start_addr > int(dso_end)):
print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
if (options.verbose == True):
returnprint("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
if (options.verbose == True):
print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
What about skipping printing if the address is 0 rather than if it's not verbose? Seems like a zero is expected because that's for discontinuities, but other non zero addresses that don't match are a genuine issue.
And there is already an early exit for zero addresses above, maybe that just needs to be fixed to make it more general. But this one probably does need the verbosity change:
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 if (start_addr == 0 and stop_addr == 4): print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) return
Also minor nit, but I think this change in verbosity is a separate change to the change to add the map offset.
return
if (options.objdump_name != None): @@ -262,13 +265,14 @@ def process_event(param_dict): # vm_start to zero. if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0
else: dso_vm_start = int(dso_start)map_pgoff = 0
dso_fname = get_dso_file_path(dso, dso_bid) if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff)
print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff))
I get this error here for this line:
$ perf script -k vmlinux -s python:tools/perf/scripts/python/arm-cs\ -trace-disasm.py -- -d -k vmlinux
Traceback (most recent call last): File "tools/perf/scripts/python/arm-cs-trace-disasm.py", line 331, in process_event print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) TypeError: unsupported operand type(s) for +: 'int' and 'str' Fatal Python error: handler_call_die: problem in Python trace event handler Python runtime state: initialized
Current thread 0x000075db4c249400 (most recent call first): <no Python frame>
Extension modules: perf_trace_context, apt_pkg (total: 2) Aborted (core dumped)
On 9/5/2024 6:45 AM, James Clark wrote:
On 05/09/2024 1:11 am, Steve Clevenger wrote:
Extract map_pgoff parameter from the dictionary, and adjust start/end range passed to objdump based on the value.
The start_addr/stop_addr address checks are changed to print a warning only if verbose == True. This script repeatedly sees a zero value passed in for start_addr = cpu_data[str(cpu) + 'addr']
These zero values are not a new problem. The start_addr/stop_addr warning clutters the instruction trace output, hence this change.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com
tools/perf/scripts/python/arm-cs-trace-disasm.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/ perf/scripts/python/arm-cs-trace-disasm.py index 7aff02d84ffb..35a2ab60ca12 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -187,6 +187,7 @@ def process_event(param_dict): dso_start = get_optional(param_dict, "dso_map_start") dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + map_pgoff = get_optional(param_dict, "map_pgoff") cpu = sample["cpu"] ip = sample["ip"] @@ -249,11 +250,13 @@ def process_event(param_dict): return if (start_addr < int(dso_start) or start_addr > int(dso_end)): - print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso)) + if (options.verbose == True): + print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso)) return if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): - print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) + if (options.verbose == True): + print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
What about skipping printing if the address is 0 rather than if it's not verbose? Seems like a zero is expected because that's for discontinuities, but other non zero addresses that don't match are a genuine issue.
And there is already an early exit for zero addresses above, maybe that just needs to be fixed to make it more general. But this one probably does need the verbosity change:
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 if (start_addr == 0 and stop_addr == 4): print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) return
Also minor nit, but I think this change in verbosity is a separate change to the change to add the map offset.
In my experience, non-zero start addresses outside the dso address range are exceedingly rare. I don't recall the last time I saw one. To avoid having to pick through thousands of generated messages with zero start addresses, I'll filter these out.
I also don't recall ever seeing the CS_ETM_TRACE_ON notification message embedded in the instruction trace output. And a typical trace capture has many thousands of TRACE_ON packets. I only have Ampere HW to use for CoreSight trace, but how valid is the test with start_addr=0 and stop_addr=4? I'll change the script to optionally print the message in verbose mode for these conditions.
return if (options.objdump_name != None): @@ -262,13 +265,14 @@ def process_event(param_dict): # vm_start to zero. if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 + map_pgoff = 0 else: dso_vm_start = int(dso_start) dso_fname = get_dso_file_path(dso, dso_bid) if path.exists(dso_fname): - print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) else: - print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr)) + print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff))
I get this error here for this line:
$ perf script -k vmlinux -s python:tools/perf/scripts/python/arm-cs\ -trace-disasm.py -- -d -k vmlinux
Traceback (most recent call last): File "tools/perf/scripts/python/arm-cs-trace-disasm.py", line 331, in process_event print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) TypeError: unsupported operand type(s) for +: 'int' and 'str' Fatal Python error: handler_call_die: problem in Python trace event handler Python runtime state: initialized
Current thread 0x000075db4c249400 (most recent call first): <no Python frame>
Extension modules: perf_trace_context, apt_pkg (total: 2) Aborted (core dumped)
My mistake. I intermixed the types, and didn't catch it during test. I Look for Version 7 to address your comments.
Steve C.
Add map_pgoff parameter to python dictionary so it can be seen by the python script, arm-cs-trace-disasm.py. map_pgoff is forced to zero in the dictionary if file type is MAPPING_TYPE__IDENTITY. Otherwise, the map_pgoff value is directly added to the dictionary.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com Reviewed-by: Leo Yan leo.yan@arm.com --- .../util/scripting-engines/trace-event-python.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 6971dd6c231f..7b96504d5406 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -798,7 +798,8 @@ static int set_regs_in_dict(PyObject *dict, static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *dso_field, const char *dso_bid_field, const char *dso_map_start, const char *dso_map_end, - const char *sym_field, const char *symoff_field) + const char *sym_field, const char *symoff_field, + const char *map_pgoff) { char sbuild_id[SBUILD_ID_SIZE];
@@ -814,6 +815,12 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al, PyLong_FromUnsignedLong(map__start(al->map))); pydict_set_item_string_decref(dict, dso_map_end, PyLong_FromUnsignedLong(map__end(al->map))); + if (al->map->mapping_type == MAPPING_TYPE__DSO) + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(al->map->pgoff)); + else + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(0)); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -900,7 +907,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", - "symbol", "symoff"); + "symbol", "symoff", "map_pgoff");
pydict_set_item_string_decref(dict, "callchain", callchain);
@@ -925,7 +932,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyBool_FromLong(1)); set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", "addr_dso_map_start", "addr_dso_map_end", - "addr_symbol", "addr_symoff"); + "addr_symbol", "addr_symoff", "addr_map_pgoff"); }
if (sample->flags)
Use dso__is_pie() to check whether the DSO file is a Position Independent Executable (PIE). If PIE, change the MAPPING_TYPE to MAPPING_TYPE__IDENTITY so a zero map pgoff (text offset) is passed into the script.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com Reviewed-by: Leo Yan leo.yan@arm.com --- tools/perf/util/map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index e781c8d56a9a..c846faec177b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -173,8 +173,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, assert(!dso__kernel(dso)); map__init(result, start, start + len, pgoff, dso, prot, flags);
- if (anon || no_dso) { - map->mapping_type = MAPPING_TYPE__IDENTITY; + if (anon || no_dso || dso__is_pie(dso)) { + map__set_mapping_type(map, MAPPING_TYPE__IDENTITY);
/* * Set memory without DSO as loaded. All map__find_*
Add dso__is_pie global to read the .dynamic section DT_FLAGS_1 entry for the DF_1_PIE flag. This identifies position executable code.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com Reviewed-by: Leo Yan leo.yan@arm.com --- tools/perf/util/symbol-elf.c | 61 ++++++++++++++++++++++++++++++++++++ tools/perf/util/symbol.h | 1 + 2 files changed, 62 insertions(+)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index e398abfd13a0..babe47976922 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -662,6 +662,67 @@ static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf, return err; }
+/* + * Check dynamic section DT_FLAGS_1 for a Position Independent + * Executable (PIE). + */ +bool dso__is_pie(struct dso *dso) +{ + Elf *elf = NULL; + Elf_Scn *scn = NULL; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + bool is_pie = false; + char dso_path[PATH_MAX]; + int fd = -1; + + if (!dso || (elf_version(EV_CURRENT) == EV_NONE)) + goto exit; // false + + dso__build_id_filename(dso, dso_path, sizeof(dso_path), false); + + fd = open(dso_path, O_RDONLY); + + if (fd < 0) { + pr_debug("%s: cannot read cached %s.\n", __func__, dso_path); + goto exit; // false + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + gelf_getehdr(elf, &ehdr); + + if (ehdr.e_type == ET_DYN) { + Elf_Data *data; + GElf_Dyn *entry; + int n_entries = shdr.sh_size / sizeof(GElf_Dyn); + + scn = elf_section_by_name(elf, &ehdr, &shdr, ".dynamic", NULL); + if (!scn) + goto exit_close; // false + + data = (Elf_Data *) elf_getdata(scn, NULL); + if (!data || !data->d_buf) + goto exit_close; // false + + // check DT_FLAGS_1 + for (int i = 0; i < n_entries; i++) { + entry = ((GElf_Dyn *) data->d_buf) + i; + if (entry->d_tag == DT_FLAGS_1) { + if ((entry->d_un.d_val & DF_1_PIE) != 0) { + is_pie = true; + break; + } + } + } // end for + } + +exit_close: + elf_end(elf); + close(fd); +exit: + return is_pie; +} + /* * We need to check if we have a .dynsym, so that we can handle the * .plt, synthesizing its symbols, that aren't on the symtabs (be it diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3fb5d146d9b1..33ea2596ce31 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -127,6 +127,7 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym); void dso__delete_symbol(struct dso *dso, struct symbol *sym); +bool dso__is_pie(struct dso *dso);
struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);