Re: [PATCH v9 9/9] perf test: Add Arm CoreSight callchain test

17 Jun 2026

On 16/06/2026 3:51 pm, Leo Yan wrote:
...
Add a CoreSight shell test for synthesized callchains.
The test uses the new callchain workload to generate trace and decodes
it with synthesis callchain. It then verifies that the instruction
samples show the expected callchain push and pop.
Use control FIFOs so tracing starts only around the workload, which
keeps the trace data small. The test is limited to with the cs_etm
event available and root permission.
After:
perf test 138 -vvv
   138: CoreSight synthesized callchain:
   ---- start ----
   test child forked, pid 35581
   Callchain flow matched:
     l1=4642868 l2=4642880 l3=4642895 l4=4642919 l5=4670494 l6=4670500 l7=4670520
   ---- end(0) ----
   138: CoreSight synthesized callchain                                                                           : Ok
Assisted-by: Codex:GPT-5.5
Signed-off-by: Leo Yan leo.yan@arm.com

tools/perf/Documentation/perf-test.txt        |   6 +-
  tools/perf/tests/builtin-test.c               |   1 +
  tools/perf/tests/shell/coresight/callchain.sh | 172 ++++++++++++++++++++++++++
  tools/perf/tests/tests.h                      |   1 +
  tools/perf/tests/workloads/Build              |   2 +
  tools/perf/tests/workloads/callchain.c        |  33 +++++
  6 files changed, 213 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 81c8525f594680d814f80e6f88bcce8d867bb350..859df74e62efc4b1e80da13ae8e053356f68ae54 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -57,7 +57,8 @@ OPTIONS
  --workload=::
   Run a built-in workload, to list them use '--list-workloads', current
   ones include: noploop, thloop, leafloop, sqrtloop, brstack, datasym,

context_switch_loop, deterministic, named_threads and landlock.


context_switch_loop, deterministic, named_threads, landlock and
callchain.

Used with the shell script regression tests.
  
@@ -69,7 +70,8 @@ OPTIONS
   'named_threads' accepts the number of threads and the number of loops to
   do in each thread.

The datasym, landlock and deterministic workloads don't accept any.


The datasym, landlock, deterministic and callchain workloads don't accept
any.

--list-workloads::
   List the available workloads to use with -w/--workload.
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 7e75f590f225e3284980829707ca8d916c98cada..1d1f38127e05429a27f31beda814f2b5f5a75089 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -168,6 +168,7 @@ static struct test_workload *workloads[] = {
   &workload__jitdump,
   &workload__context_switch_loop,
   &workload__deterministic,

&workload__callchain,

#ifdef HAVE_RUST_SUPPORT
   &workload__code_with_type,
diff --git a/tools/perf/tests/shell/coresight/callchain.sh b/tools/perf/tests/shell/coresight/callchain.sh
new file mode 100755
index 0000000000000000000000000000000000000000..13cca7dc11184002e3ddc058c0d0ffa1c458c483
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/callchain.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# CoreSight synthesized callchain (exclusive)
+# SPDX-License-Identifier: GPL-2.0



+glb_err=1



+if ! tmpdir=$(mktemp -d /tmp/perf-cs-callchain-test.XXXXXX); then

echo "mktemp failed"
exit 1

+fi



+cleanup_files()
+{

rm -rf "$tmpdir"

+}



+trap cleanup_files EXIT
+trap 'cleanup_files; exit $glb_err' TERM INT



+skip_if_system_is_not_ready()
+{

perf list | grep -Pzq 'cs_etm//' || {
echo "[Skip] cs_etm event is not available" >&2


return 2


}

# Requires root for trace in kernel
[ "$(id -u)" = 0 ] || {
echo "[Skip] No root permission" >&2


return 2


}

return 0

+}



+record_trace()
+{

local data=$1
local script=$2

local cf="$tmpdir/ctl"
local af="$tmpdir/ack"

mkfifo "$cf" "$af"

perf record -o "$data" -e cs_etm// --per-thread -D -1 --control fifo:"$cf","$af" -- \
perf test --record-ctl fifo:"$cf","$af" -w callchain >/dev/null 2>&1 &&



# It is safe to use 'i3i' with a three-instruction interval, since the
# workload is compiled with -O0.
perf script --itrace=g16i3il64 -i "$data" > "$script"

Is there a reason we don't generate callstacks on branch samples and use 
--itrace=g16bl64? That removes the magic number 3 and reduces the output 
file size and test runtime a bit.
All I had to do was copy the same "if (etm->synth_opts.callchain) { ..." 
block to cs_etm__synth_branch_sample(). It seems like the grepping 
doesn't exactly match the branch sample format so the test fails, but 
I'm sure that could be fixed.
I suppose there is value in testing instruction output, but maybe we can 
add the option for users to add callstacks to branch samples, even if 
it's not tested.
...
+}



+callchain_regex_1()
+{

printf '%s' \

+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\
+'([[:space:]]+[[:xdigit:]]+ .*\n)*'
+}



+callchain_regex_2()
+{

printf '%s' \

+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\
+'([[:space:]]+[[:xdigit:]]+ .*\n)*'
+}



+callchain_regex_3()
+{

printf '%s' \

+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
+'[[:space:]]+[[:xdigit:]]+ syscall(@plt)?+0x[[:xdigit:]]+ (.*)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\
+'([[:space:]]+[[:xdigit:]]+ .*\n)*'
+}



+callchain_regex_4()
+{

printf '%s' \

+'perf[[:space:]]+[0-9]+[[:space:]]+[[0-9]+][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
+'[[:space:]]+[[:xdigit:]]+ .*+0x[[:xdigit:]]+ ([kernel.kallsyms])\n'\
+'[[:space:]]+[[:xdigit:]]+ syscall(@plt)?+0x[[:xdigit:]]+ (.*)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain_foo+0x[[:xdigit:]]+ (.*/perf)\n'\
+'[[:space:]]+[[:xdigit:]]+ callchain+0x[[:xdigit:]]+ (.*/perf)\n'\
+'([[:space:]]+[[:xdigit:]]+ .*\n)*'
+}



+find_after_line()
+{

local regex="$1"
local file="$2"
local start="$3"
local offset
local line

# Search in byte offset
offset=$(
tail -n +"$start" "$file" |


grep -Pzob -m1 "$regex" |


tr '\0' '\n' |


sed -n 's/^\([0-9][0-9]*\):.*/\1/p;q'


)

if [ -z "$offset" ]; then
echo "Failed to match regex after line $start" >&2


echo "Regex:" >&2


printf '%s\n' "$regex" >&2


echo "Context from line $start:" >&2


sed -n "${start},$((start + 100))p" "$file" >&2


return 1


fi

# Convert from offset to line
line=$(
tail -n +"$start" "$file" |


head -c "$offset" |


wc -l


)

echo "$((start + line))"

+}



+check_callchain_flow()
+{

local file="$1"
local l1 l2 l3 l4 l5 l6 l7

# Callchain push
l1=$(find_after_line "$(callchain_regex_1)" "$file" 1) || return 1
l2=$(find_after_line "$(callchain_regex_2)" "$file" "$((l1 + 1))") || return 1
l3=$(find_after_line "$(callchain_regex_3)" "$file" "$((l2 + 1))") || return 1
l4=$(find_after_line "$(callchain_regex_4)" "$file" "$((l3 + 1))") || return 1

# Callchain pop
l5=$(find_after_line "$(callchain_regex_3)" "$file" "$((l4 + 1))") || return 1
l6=$(find_after_line "$(callchain_regex_2)" "$file" "$((l5 + 1))") || return 1
l7=$(find_after_line "$(callchain_regex_1)" "$file" "$((l6 + 1))") || return 1

echo "Callchain flow matched:"
echo "  l1=$l1 l2=$l2 l3=$l3 l4=$l4 l5=$l5 l6=$l6 l7=$l7"

return 0

+}



+run_test()
+{

local data=$tmpdir/perf.data
local script=$tmpdir/perf.script

if ! record_trace "$data" "$script"; then
echo "perf record/script failed"


return


fi

check_callchain_flow "$script" || return

glb_err=0

+}



+skip_if_system_is_not_ready || exit 2



+run_test



+exit $glb_err
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 7cedf05be544ad79a99e86d30dfa4f7b01ca0837..cee9e6b62dcc838c864bbe76efe3b638ed75b134 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -248,6 +248,7 @@ DECLARE_WORKLOAD(inlineloop);
  DECLARE_WORKLOAD(jitdump);
  DECLARE_WORKLOAD(context_switch_loop);
  DECLARE_WORKLOAD(deterministic);
+DECLARE_WORKLOAD(callchain);
  
  #ifdef HAVE_RUST_SUPPORT
  DECLARE_WORKLOAD(code_with_type);
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index 7bb4b9829ba245740c8967e6bf3235614cdd55a3..048e371eb63e316453b6b46ebd0a02794c3d25d7 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -13,6 +13,7 @@ perf-test-y += inlineloop.o
  perf-test-y += jitdump.o
  perf-test-y += context_switch_loop.o
  perf-test-y += deterministic.o
+perf-test-y += callchain.o
  
  ifeq ($(CONFIG_RUST_SUPPORT),y)
      perf-test-y += code_with_type.o
@@ -27,3 +28,4 @@ CFLAGS_traploop.o         = -g -O0 -fno-inline -U_FORTIFY_SOURCE
  CFLAGS_inlineloop.o       = -g -O2
  CFLAGS_deterministic.o    = -g -O0 -fno-inline -U_FORTIFY_SOURCE
  CFLAGS_named_threads.o    = -g -O0 -fno-inline -U_FORTIFY_SOURCE
+CFLAGS_callchain.o        = -g -O0 -fno-inline -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/callchain.c b/tools/perf/tests/workloads/callchain.c
new file mode 100644
index 0000000000000000000000000000000000000000..3951423d8115e9efb49af8ba2586001fc6f02761
--- /dev/null
+++ b/tools/perf/tests/workloads/callchain.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "../tests.h"



+/*


Mark as noinline to establish the call chain, and avoid the static



annotation to prevent LTO from renaming the functions.


*/

+noinline void callchain_do_syscall(void);
+noinline void callchain_foo(void);
+noinline int callchain(int argc, const char **argv);



+noinline void callchain_do_syscall(void)
+{

syscall(SYS_getpid);

+}



+noinline void callchain_foo(void)
+{

callchain_do_syscall();

+}



+noinline int callchain(int argc __maybe_unused,

       const char **argv __maybe_unused)



+{

callchain_foo();

return 0;

+}



+DEFINE_WORKLOAD(callchain);

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

Re: [PATCH v9 9/9] perf test: Add Arm CoreSight callchain test