On 16/06/2026 3:51 pm, Leo Yan wrote:
Add a CoreSight shell test for synthesized callchains.
The test uses the new callchain workload to generate trace and decodes it with synthesis callchain. It then verifies that the instruction samples show the expected callchain push and pop.
Use control FIFOs so tracing starts only around the workload, which keeps the trace data small. The test is limited to with the cs_etm event available and root permission.
After:
perf test 138 -vvv 138: CoreSight synthesized callchain: ---- start ---- test child forked, pid 35581 Callchain flow matched: l1=4642868 l2=4642880 l3=4642895 l4=4642919 l5=4670494 l6=4670500 l7=4670520 ---- end(0) ---- 138: CoreSight synthesized callchain : Ok
Assisted-by: Codex:GPT-5.5 Signed-off-by: Leo Yan leo.yan@arm.com
tools/perf/Documentation/perf-test.txt | 6 +- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/shell/coresight/callchain.sh | 172 ++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 + tools/perf/tests/workloads/callchain.c | 33 +++++ 6 files changed, 213 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 81c8525f594680d814f80e6f88bcce8d867bb350..859df74e62efc4b1e80da13ae8e053356f68ae54 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -57,7 +57,8 @@ OPTIONS --workload=:: Run a built-in workload, to list them use '--list-workloads', current ones include: noploop, thloop, leafloop, sqrtloop, brstack, datasym,
- context_switch_loop, deterministic, named_threads and landlock.
- context_switch_loop, deterministic, named_threads, landlock and
- callchain.
Used with the shell script regression tests. @@ -69,7 +70,8 @@ OPTIONS 'named_threads' accepts the number of threads and the number of loops to do in each thread.
- The datasym, landlock and deterministic workloads don't accept any.
- The datasym, landlock, deterministic and callchain workloads don't accept
- any.
--list-workloads:: List the available workloads to use with -w/--workload. diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 7e75f590f225e3284980829707ca8d916c98cada..1d1f38127e05429a27f31beda814f2b5f5a75089 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -168,6 +168,7 @@ static struct test_workload *workloads[] = { &workload__jitdump, &workload__context_switch_loop, &workload__deterministic,
- &workload__callchain,
#ifdef HAVE_RUST_SUPPORT &workload__code_with_type, diff --git a/tools/perf/tests/shell/coresight/callchain.sh b/tools/perf/tests/shell/coresight/callchain.sh new file mode 100755 index 0000000000000000000000000000000000000000..13cca7dc11184002e3ddc058c0d0ffa1c458c483 --- /dev/null +++ b/tools/perf/tests/shell/coresight/callchain.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# CoreSight synthesized callchain (exclusive) +# SPDX-License-Identifier: GPL-2.0
+glb_err=1
+if ! tmpdir=$(mktemp -d /tmp/perf-cs-callchain-test.XXXXXX); then
- echo "mktemp failed"
- exit 1
+fi
+cleanup_files() +{
- rm -rf "$tmpdir"
+}
+trap cleanup_files EXIT +trap 'cleanup_files; exit $glb_err' TERM INT
+skip_if_system_is_not_ready() +{
- perf list | grep -Pzq 'cs_etm//' || {
echo "[Skip] cs_etm event is not available" >&2return 2- }
- # Requires root for trace in kernel
- [ "$(id -u)" = 0 ] || {
echo "[Skip] No root permission" >&2return 2- }
- return 0
+}
+record_trace() +{
- local data=$1
- local script=$2
- local cf="$tmpdir/ctl"
- local af="$tmpdir/ack"
- mkfifo "$cf" "$af"
- perf record -o "$data" -e cs_etm// --per-thread -D -1 --control fifo:"$cf","$af" -- \
perf test --record-ctl fifo:"$cf","$af" -w callchain >/dev/null 2>&1 &&- # It is safe to use 'i3i' with a three-instruction interval, since the
- # workload is compiled with -O0.
- perf script --itrace=g16i3il64 -i "$data" > "$script"
Is there a reason we don't generate callstacks on branch samples and use --itrace=g16bl64? That removes the magic number 3 and reduces the output file size and test runtime a bit.
All I had to do was copy the same "if (etm->synth_opts.callchain) { ..." block to cs_etm__synth_branch_sample(). It seems like the grepping doesn't exactly match the branch sample format so the test fails, but I'm sure that could be fixed.
I suppose there is value in testing instruction output, but maybe we can add the option for users to add callstacks to branch samples, even if it's not tested.
+}
+callchain_regex_1() +{
- printf '%s' \
+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\ +'([[:space:]]+[[:xdigit:]]+ .*\n)*' +}
+callchain_regex_2() +{
- printf '%s' \
+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\ +'([[:space:]]+[[:xdigit:]]+ .*\n)*' +}
+callchain_regex_3() +{
- printf '%s' \
+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\ +'[[:space:]]+[[:xdigit:]]+ syscall(@plt)?+0x[[:xdigit:]]+ (.*)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\ +'([[:space:]]+[[:xdigit:]]+ .*\n)*' +}
+callchain_regex_4() +{
- printf '%s' \
+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\ +'[[:space:]]+[[:xdigit:]]+ .*+0x[[:xdigit:]]+ ([kernel.kallsyms])\n'\ +'[[:space:]]+[[:xdigit:]]+ syscall(@plt)?+0x[[:xdigit:]]+ (.*)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\ +'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\ +'([[:space:]]+[[:xdigit:]]+ .*\n)*' +}
+find_after_line() +{
- local regex="$1"
- local file="$2"
- local start="$3"
- local offset
- local line
- # Search in byte offset
- offset=$(
tail -n +"$start" "$file" |grep -Pzob -m1 "$regex" |tr '\0' '\n' |sed -n 's/^\([0-9][0-9]*\):.*/\1/p;q'- )
- if [ -z "$offset" ]; then
echo "Failed to match regex after line $start" >&2echo "Regex:" >&2printf '%s\n' "$regex" >&2echo "Context from line $start:" >&2sed -n "${start},$((start + 100))p" "$file" >&2return 1- fi
- # Convert from offset to line
- line=$(
tail -n +"$start" "$file" |head -c "$offset" |wc -l- )
- echo "$((start + line))"
+}
+check_callchain_flow() +{
- local file="$1"
- local l1 l2 l3 l4 l5 l6 l7
- # Callchain push
- l1=$(find_after_line "$(callchain_regex_1)" "$file" 1) || return 1
- l2=$(find_after_line "$(callchain_regex_2)" "$file" "$((l1 + 1))") || return 1
- l3=$(find_after_line "$(callchain_regex_3)" "$file" "$((l2 + 1))") || return 1
- l4=$(find_after_line "$(callchain_regex_4)" "$file" "$((l3 + 1))") || return 1
- # Callchain pop
- l5=$(find_after_line "$(callchain_regex_3)" "$file" "$((l4 + 1))") || return 1
- l6=$(find_after_line "$(callchain_regex_2)" "$file" "$((l5 + 1))") || return 1
- l7=$(find_after_line "$(callchain_regex_1)" "$file" "$((l6 + 1))") || return 1
- echo "Callchain flow matched:"
- echo " l1=$l1 l2=$l2 l3=$l3 l4=$l4 l5=$l5 l6=$l6 l7=$l7"
- return 0
+}
+run_test() +{
- local data=$tmpdir/perf.data
- local script=$tmpdir/perf.script
- if ! record_trace "$data" "$script"; then
echo "perf record/script failed"return- fi
- check_callchain_flow "$script" || return
- glb_err=0
+}
+skip_if_system_is_not_ready || exit 2
+run_test
+exit $glb_err diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 7cedf05be544ad79a99e86d30dfa4f7b01ca0837..cee9e6b62dcc838c864bbe76efe3b638ed75b134 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -248,6 +248,7 @@ DECLARE_WORKLOAD(inlineloop); DECLARE_WORKLOAD(jitdump); DECLARE_WORKLOAD(context_switch_loop); DECLARE_WORKLOAD(deterministic); +DECLARE_WORKLOAD(callchain); #ifdef HAVE_RUST_SUPPORT DECLARE_WORKLOAD(code_with_type); diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index 7bb4b9829ba245740c8967e6bf3235614cdd55a3..048e371eb63e316453b6b46ebd0a02794c3d25d7 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -13,6 +13,7 @@ perf-test-y += inlineloop.o perf-test-y += jitdump.o perf-test-y += context_switch_loop.o perf-test-y += deterministic.o +perf-test-y += callchain.o ifeq ($(CONFIG_RUST_SUPPORT),y) perf-test-y += code_with_type.o @@ -27,3 +28,4 @@ CFLAGS_traploop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_inlineloop.o = -g -O2 CFLAGS_deterministic.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_named_threads.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE +CFLAGS_callchain.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/callchain.c b/tools/perf/tests/workloads/callchain.c new file mode 100644 index 0000000000000000000000000000000000000000..3951423d8115e9efb49af8ba2586001fc6f02761 --- /dev/null +++ b/tools/perf/tests/workloads/callchain.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <sys/syscall.h> +#include <unistd.h> +#include "../tests.h"
+/*
- Mark as noinline to establish the call chain, and avoid the static
- annotation to prevent LTO from renaming the functions.
- */
+noinline void callchain_do_syscall(void); +noinline void callchain_foo(void); +noinline int callchain(int argc, const char **argv);
+noinline void callchain_do_syscall(void) +{
- syscall(SYS_getpid);
+}
+noinline void callchain_foo(void) +{
- callchain_do_syscall();
+}
+noinline int callchain(int argc __maybe_unused,
const char **argv __maybe_unused)+{
- callchain_foo();
- return 0;
+}
+DEFINE_WORKLOAD(callchain);