Hi, Jiri. Sorry for my oversight, the optimized compile condition didn't work properly.
Could you try to replace the `__attribute__((optimize("O2")))` with `#pragma GCC optimize("O2")` in usdt_o2.c and verify it one more time? I guess it'll help. In fact, the usdt1 argument spec generated by the `__attribute__((optimize("O2")))` on my machine was `8@%rax`, not `8@(%rdx,%rax,8)`.
For more detail:
- #if defined(__GNUC__) && !defined(__clang__)
- __attribute__((optimize("O2")))
+#if (defined(__GNUC__) && !defined(__clang__)) +#pragma GCC optimize("O2") +#else +#pragma message("non-gcc compiler: the correct probes might not be installed") +#endif
Thanks
At 2025-08-15 20:44:32, "Jiri Olsa" olsajiri@gmail.com wrote:
On Thu, Aug 14, 2025 at 04:07:39PM +0000, Jiawei Zhao wrote:
usdt_o2 is intended to exercise the SIB (Scale-Index-Base) argument handling in libbpf's USDT path. With GCC 13 this reliably produced a SIB-form argument (e.g. 8@(%rdx,%rax,8)), but with newer GCC (e.g. 15) the compiler frequently optimizes the probe argument into a plain register (e.g. 8@%rax) or a stack slot, so the test stops covering the SIB code path and becomes flaky across toolchains.
Force a SIB memory operand in the probe by:
- placing the base pointer into %rdx and the index into %rax using an empty inline asm with output constraints ("=d", "=a") and matching inputs
- immediately passing base[idx] to STAP_PROBE1.
- only enable on x86 platform.
This makes the compiler encode the operand as SIB (base + index8), which in .note.stapsdt shows up as 8@(%rdx,%rax,8) regardless of GCC version. A memory clobber and noinline prevent reordering/re-allocation around the probe site.
This change is x86_64-specific and does not alter program semantics; it only stabilizes the USDT argument shape so the test consistently validates SIB handling. Clang historically prefers stack temporaries for such operands, but the selftests build with GCC, and this keeps behavior stable across GCC versions without introducing a separate .S file.
Signed-off-by: Jiawei Zhao phoenix500526@163.com
.../selftests/bpf/prog_tests/usdt_o2.c | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c index f02dcf5188ab..e46d5743ad24 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c @@ -15,11 +15,19 @@ __attribute__((optimize("O2"))) int lets_test_this(int); static volatile __u64 array[1] = {test_value}; -static __always_inline void trigger_func(void) +static noinline void trigger_func(void) { +#if defined(__x86_64__) || defined(__i386__) /* Base address + offset + (index * scale) */
- for (volatile int i = 0; i <= 0; i++)
STAP_PROBE1(test, usdt1, array[i]);
- /* Force SIB addressing with inline assembly */
- const __u64 *base;
- __u32 idx;
- /* binding base to %rdx and idx to %rax */
- asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : "memory");
- STAP_PROBE1(test, usdt1, base[idx]);
hum, I still end up with
stapsdt 0x0000002a NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt1 Location: 0x00000000007674c9, Base: 0x00000000035bc698, Semaphore: 0x0000000000000000 Arguments: 8@%rax
disasm being:
static noinline void trigger_func(void) { 76749f: 55 push %rbp 7674a0: 48 89 e5 mov %rsp,%rbp /* Base address + offset + (index * scale) */ /* Force SIB addressing with inline assembly */ const __u64 *base; __u32 idx; /* binding base to %rdx and idx to %rax */ asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : "memory"); 7674a3: ba 20 49 9c 03 mov $0x39c4920,%edx 7674a8: b8 00 00 00 00 mov $0x0,%eax 7674ad: 48 89 55 f8 mov %rdx,-0x8(%rbp) 7674b1: 89 45 f4 mov %eax,-0xc(%rbp) STAP_PROBE1(test, usdt1, base[idx]); 7674b4: 8b 45 f4 mov -0xc(%rbp),%eax 7674b7: 48 8d 14 c5 00 00 00 lea 0x0(,%rax,8),%rdx 7674be: 00 7674bf: 48 8b 45 f8 mov -0x8(%rbp),%rax 7674c3: 48 01 d0 add %rdx,%rax 7674c6: 48 8b 00 mov (%rax),%rax 7674c9: 90 nop #else STAP_PROBE1(test, usdt1, array[0]); #endif } 7674ca: 90 nop 7674cb: 5d pop %rbp 7674cc: c3 ret
I wonder we could also try to bring in Andrii's usdt.h [1] and overload usdt arguments like outlined in the hack below (full code in [1])
we will probably need smarter and sustainable change, but you I guess you get the idea
jirka
[1] https://github.com/anakryiko/usdt [2] git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git usdt_hack
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c index e46d5743ad24..7bb098c37de5 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c @@ -4,6 +4,8 @@
#include "../sdt.h" #include "test_usdt_o2.skel.h" +#define USDT_ARGS ".asciz "(,%%rax,8)"\n" +#include "usdt.h"
#if defined(__GNUC__) && !defined(__clang__) __attribute__((optimize("O2"))) @@ -28,6 +30,7 @@ static noinline void trigger_func(void) #else STAP_PROBE1(test, usdt1, array[0]); #endif
- USDT(krava, test1, 1, 2);
}
static void basic_sib_usdt(void) diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h index 549d1f774810..960ebd6aa88b 100644 --- a/tools/testing/selftests/bpf/usdt.h +++ b/tools/testing/selftests/bpf/usdt.h @@ -403,6 +403,10 @@ struct usdt_sema { volatile unsigned short active; }; __asm__ __volatile__ ("" :: "m" (sema)); #endif
+#ifndef USDT_ARGS +#define USDT_ARGS __usdt_asm_args(__VA_ARGS__) +#endif
/* main USDT definition (nop and .note.stapsdt metadata) */ #define __usdt_probe(group, name, sema_def, sema, ...) do { \ sema_def(sema) \ @@ -418,7 +422,7 @@ struct usdt_sema { volatile unsigned short active; }; __usdt_asm1( __usdt_asm_addr sema) \ __usdt_asm_strz(group) \ __usdt_asm_strz(name) \
- __usdt_asm_args(__VA_ARGS__) \
- USDT_ARGS \ __usdt_asm1( .ascii "\0") \ __usdt_asm1(994: .balign 4) \ __usdt_asm1( .popsection) \