When using GCC on x86-64 to compile an usdt prog with -O1 or higher optimization, the compiler will generate SIB addressing mode for global array and PC-relative addressing mode for global variable, e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
The current USDT implementation in libbpf cannot parse these two formats, causing `bpf_program__attach_usdt()` to fail with -ENOENT (unrecognized register).
This patch series adds support for SIB addressing mode in USDT probes. The main changes include: - add correct handling logic for SIB-addressed arguments in `parse_usdt_arg`. - add an usdt_o2 test case to cover SIB addressing mode.
Testing shows that the SIB probe correctly generates 8@(%rcx,%rax,8) argument spec and passes all validation checks.
The modification history of this patch series: Change since v1: - refactor the code to make it more readable - modify the commit message to explain why and how
Change since v2: - fix the `scale` uninitialized error
Change since v3: - force -O2 optimization for usdt.test.o to generate SIB addressing usdt and pass all test cases.
Change since v4: - split the patch into two parts, one for the fix and the other for the test
Change since v5: - Only enable optimization for x86 architecture to generate SIB addressing usdt argument spec.
Change since v6: - Add an usdt_o2 test case to cover SIB addressing mode. - Reinstate the usdt.c test case.
Change since v7: - Refactor modifications to __bpf_usdt_arg_spec to avoid increasing its size, achieving better compatibility - Fix some minor code style issues - Refactor the usdt_o2 test case, removing semaphore and adding GCC attribute to force -O2 optimization
Change since v8: - Refactor the usdt_o2 test case, using assembly to force SIB addressing mode.
Change since v9: - Only enable the usdt_o2 test case on x86_64 and i386 architectures since the SIB addressing mode is only supported on x86_64 and i386.
Change since v10: - Replace `__attribute__((optimize("O2")))` with `#pragma GCC optimize("O1")` to fix the issue where the optimized compilation condition works improperly. - Renamed test case usdt_o2 and relevant files name to usdt_o1 in that O1 level optimization is enough to generate SIB addressing usdt argument spec.
Change since v11: - Replace `STAP_PROBE1` with `STAP_PROBE_ASM` - Use bit fields instead of bit shifting operations - Merge the usdt_o1 test case into the usdt test case
Change since v12: - This patch is same with the v12 but with a new version number.
Jiawei Zhao (2): libbpf: fix USDT SIB argument handling causing unrecognized register error selftests/bpf: Enrich subtest_basic_usdt case in selftests to cover SIB handling logic
tools/lib/bpf/usdt.bpf.h | 47 ++++++++++++++- tools/lib/bpf/usdt.c | 58 +++++++++++++++++-- tools/testing/selftests/bpf/prog_tests/usdt.c | 44 +++++++++++++- tools/testing/selftests/bpf/progs/test_usdt.c | 30 ++++++++++ 4 files changed, 170 insertions(+), 9 deletions(-)
On x86-64, USDT arguments can be specified using Scale-Index-Base (SIB) addressing, e.g. "1@-96(%rbp,%rax,8)". The current USDT implementation in libbpf cannot parse this format, causing `bpf_program__attach_usdt()` to fail with -ENOENT (unrecognized register).
This patch fixes this by implementing the necessary changes: - add correct handling for SIB-addressed arguments in `bpf_usdt_arg`. - add adaptive support to `__bpf_usdt_arg_type` and `__bpf_usdt_arg_spec` to represent SIB addressing parameters.
Signed-off-by: Jiawei Zhao phoenix500526@163.com --- tools/lib/bpf/usdt.bpf.h | 47 ++++++++++++++++++++++++++++++-- tools/lib/bpf/usdt.c | 58 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 2a7865c8e3fe..263168d57286 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -4,6 +4,7 @@ #define __USDT_BPF_H__
#include <linux/errno.h> +#include <asm/byteorder.h> #include "bpf_helpers.h" #include "bpf_tracing.h"
@@ -34,13 +35,34 @@ enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, BPF_USDT_ARG_REG, BPF_USDT_ARG_REG_DEREF, + BPF_USDT_ARG_SIB, };
+/* + * This struct layout is designed specifically to be backwards/forward + * compatible between libbpf versions for ARG_CONST, ARG_REG, and + * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+. + */ struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; /* arg location case, see bpf_usdt_arg() for details */ - enum __bpf_usdt_arg_type arg_type; + enum __bpf_usdt_arg_type arg_type: 8; +#if defined(__LITTLE_ENDIAN_BITFIELD) + /* index register offset within struct pt_regs (high 12 bits) */ + __u16 idx_reg_off: 12, + /* scale factor for index register (1, 2, 4, or 8) (low 4 bits) */ + scale: 4; +#elif defined(__BIG_ENDIAN_BITFIELD) + /* scale factor for index register (1, 2, 4, or 8) (high 4 bits) */ + __u16 scale: 4, + /* index register offset within struct pt_regs (low 12 bits) */ + idx_reg_off: 12; +#else +#error "Please fix <asm/byteorder.h>" +#endif + /* reserved for future use, keeps reg_off offset stable */ + __u8 reserved; /* offset of referenced register within struct pt_regs */ short reg_off; /* whether arg should be interpreted as signed value */ @@ -149,7 +171,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; struct __bpf_usdt_arg_spec *arg_spec; - unsigned long val; + unsigned long val, idx; int err, spec_id;
*res = 0; @@ -202,6 +224,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) return err; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ val >>= arg_spec->arg_bitshift; +#endif + break; + case BPF_USDT_ARG_SIB: + /* Arg is in memory addressed by SIB (Scale-Index-Base) mode + * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first + * fetch the base register contents and the index register + * contents from pt_regs. Then we calculate the final address + * as base + (index * scale) + offset, and do a user-space + * probe read to fetch the argument value. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx * arg_spec->scale) + arg_spec->val_off)); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; #endif break; default: diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 3373b9d45ac4..730a896a566b 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -9,6 +9,7 @@ #include <unistd.h> #include <linux/ptrace.h> #include <linux/kernel.h> +#include <asm/byteorder.h>
/* s8 will be marked as poison while it's a reg of riscv */ #if defined(__riscv) @@ -200,12 +201,23 @@ enum usdt_arg_type { USDT_ARG_CONST, USDT_ARG_REG, USDT_ARG_REG_DEREF, + USDT_ARG_SIB, };
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ struct usdt_arg_spec { __u64 val_off; - enum usdt_arg_type arg_type; + enum usdt_arg_type arg_type: 8; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 idx_reg_off: 12, + scale: 4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 scale: 4, + idx_reg_off: 12; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 reserved; /* keep reg_off offset stable */ short reg_off; bool arg_signed; char arg_bitshift; @@ -1283,11 +1295,46 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { - char reg_name[16]; - int len, reg_off; - long off; + char reg_name[16] = {0}, idx_reg_name[16] = {0}; + int len, reg_off, idx_reg_off, scale = 1; + long off = 0; + + if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 || + sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, &off, reg_name, idx_reg_name, &len) == 4 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, reg_name, idx_reg_name, &len) == 3 + ) { + /* + * Scale Index Base case: + * 1@-96(%rbp,%rax,8) + * 1@(%rbp,%rax,8) + * 1@-96(%rbp,%rax) + * 1@(%rbp,%rax) + */ + arg->arg_type = USDT_ARG_SIB; + arg->val_off = off;
- if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) { + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + + idx_reg_off = calc_pt_regs_off(idx_reg_name); + if (idx_reg_off < 0) + return idx_reg_off; + /* validate scale factor and set fields directly */ + if (scale != 1 && scale != 2 && scale != 4 && scale != 8) { + pr_warn("usdt: invalid SIB scale %d, expected 1,2,4,8; defaulting to 1\n", scale); + return -EINVAL; + } + arg->idx_reg_off = idx_reg_off; + arg->scale = scale; + } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", + arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1306,6 +1353,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG; + /* register read has no memory offset */ arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao phoenix500526@163.com wrote:
On x86-64, USDT arguments can be specified using Scale-Index-Base (SIB) addressing, e.g. "1@-96(%rbp,%rax,8)". The current USDT implementation in libbpf cannot parse this format, causing `bpf_program__attach_usdt()` to fail with -ENOENT (unrecognized register).
This patch fixes this by implementing the necessary changes:
- add correct handling for SIB-addressed arguments in `bpf_usdt_arg`.
- add adaptive support to `__bpf_usdt_arg_type` and `__bpf_usdt_arg_spec` to represent SIB addressing parameters.
Signed-off-by: Jiawei Zhao phoenix500526@163.com
tools/lib/bpf/usdt.bpf.h | 47 ++++++++++++++++++++++++++++++-- tools/lib/bpf/usdt.c | 58 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 2a7865c8e3fe..263168d57286 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -4,6 +4,7 @@ #define __USDT_BPF_H__
#include <linux/errno.h> +#include <asm/byteorder.h> #include "bpf_helpers.h" #include "bpf_tracing.h"
@@ -34,13 +35,34 @@ enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, BPF_USDT_ARG_REG, BPF_USDT_ARG_REG_DEREF,
BPF_USDT_ARG_SIB,
};
+/*
- This struct layout is designed specifically to be backwards/forward
- compatible between libbpf versions for ARG_CONST, ARG_REG, and
- ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
- */
struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; /* arg location case, see bpf_usdt_arg() for details */
enum __bpf_usdt_arg_type arg_type;
enum __bpf_usdt_arg_type arg_type: 8;
this needs to be inside the #if/#elif/#else, I believe. When it previously was a 4 byte field, BPF_USDT_ARG_REG = 1 would be `0x01, 0x00, 0x00, 0x00` in memory on little endian and `0x00, 0x00, 0x00, 0x01` on big endian. So you need to reorder it such that on big endian it's the last field.
pw-bot: cr
+#if defined(__LITTLE_ENDIAN_BITFIELD)
I'm not sure whether compiler itself defines __LITTLE_ENDIAN_BITFIELD. Throughout libbpf we use #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, let's do that here as well?
/* index register offset within struct pt_regs (high 12 bits) */
__u16 idx_reg_off: 12,
/* scale factor for index register (1, 2, 4, or 8) (low 4 bits) */
scale: 4;
nit: don't do comma-separated bitfields. compiler will combine them as necessary, even if they are declared as separate fields. so just:
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ enum __bpf_usdt_arg_type arg_type: 8; __u16 idx_reg_off: 12; __u16 idx_reg_shift: 4; __u8 __reserved: 8; #else __u8 __reserved: 8; __u16 idx_reg_off: 12; __u16 idx_reg_shift: 4; enum __bpf_usdt_arg_type arg_type: 8; #endif
Note that we don't need to change order of idx_reg_off and idx_reg_shift, as they are new additions (and they don't have to be consistent between big and little endian)
+#elif defined(__BIG_ENDIAN_BITFIELD)
/* scale factor for index register (1, 2, 4, or 8) (high 4 bits) */
__u16 scale: 4,
/* index register offset within struct pt_regs (low 12 bits) */
idx_reg_off: 12;
+#else +#error "Please fix <asm/byteorder.h>" +#endif
let's drop the fix suggestion, isn't asm/byteorder.h kernel-specific header?.. I'm fine assuming only big or little endian system
/* reserved for future use, keeps reg_off offset stable */
__u8 reserved; /* offset of referenced register within struct pt_regs */ short reg_off; /* whether arg should be interpreted as signed value */
@@ -149,7 +171,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; struct __bpf_usdt_arg_spec *arg_spec;
unsigned long val;
unsigned long val, idx; int err, spec_id; *res = 0;
@@ -202,6 +224,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) return err; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ val >>= arg_spec->arg_bitshift; +#endif
break;
case BPF_USDT_ARG_SIB:
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
* fetch the base register contents and the index register
* contents from pt_regs. Then we calculate the final address
* as base + (index * scale) + offset, and do a user-space
* probe read to fetch the argument value.
*/
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
if (err)
return err;
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
if (err)
return err;
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx * arg_spec->scale) + arg_spec->val_off));
hm.. I thought we discussed recording the number of bits to shift by, no? It's not too big of a deal, we can afford 4 bits (instead of 2 that would be enough for bit shift), but any specific reason you prefer multiplication here?
if (err)
return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
val >>= arg_spec->arg_bitshift;
#endif break; default:
[...]
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
reg_off = calc_pt_regs_off(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
idx_reg_off = calc_pt_regs_off(idx_reg_name);
if (idx_reg_off < 0)
return idx_reg_off;
/* validate scale factor and set fields directly */
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) {
pr_warn("usdt: invalid SIB scale %d, expected 1,2,4,8; defaulting to 1\n", scale);
"defaulting to 1" is very confusing, why? (and please use spaces after comma)
return -EINVAL;
}
arg->idx_reg_off = idx_reg_off;
arg->scale = scale;
} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off;
@@ -1306,6 +1353,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG;
/* register read has no memory offset */ arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name);
-- 2.43.0
hm.. I thought we discussed recording the number of bits to shift by, no? It's not too big of a deal, we can afford 4 bits (instead of 2 that would be enough for bit shift), but any specific reason you prefer multiplication here?
Using multiplication is more straightforward since the format of a SIB address looks like `8@(%rdx,%rax,8)`. We can directly store the 8 as a scale instead of translating it to the number of bits to shift by.
"defaulting to 1" is very confusing, why? (and please use spaces after comma)
Yes, it's misleading to put it down here. The default value of scale is 1 since the scale can be omitted when the scale is 1. I remove it in the next version.
At 2025-08-23 06:43:06, "Andrii Nakryiko" andrii.nakryiko@gmail.com wrote:
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao phoenix500526@163.com wrote:
On x86-64, USDT arguments can be specified using Scale-Index-Base (SIB) addressing, e.g. "1@-96(%rbp,%rax,8)". The current USDT implementation in libbpf cannot parse this format, causing `bpf_program__attach_usdt()` to fail with -ENOENT (unrecognized register).
This patch fixes this by implementing the necessary changes:
- add correct handling for SIB-addressed arguments in `bpf_usdt_arg`.
- add adaptive support to `__bpf_usdt_arg_type` and `__bpf_usdt_arg_spec` to represent SIB addressing parameters.
Signed-off-by: Jiawei Zhao phoenix500526@163.com
tools/lib/bpf/usdt.bpf.h | 47 ++++++++++++++++++++++++++++++-- tools/lib/bpf/usdt.c | 58 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 2a7865c8e3fe..263168d57286 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -4,6 +4,7 @@ #define __USDT_BPF_H__
#include <linux/errno.h> +#include <asm/byteorder.h> #include "bpf_helpers.h" #include "bpf_tracing.h"
@@ -34,13 +35,34 @@ enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, BPF_USDT_ARG_REG, BPF_USDT_ARG_REG_DEREF,
BPF_USDT_ARG_SIB,
};
+/*
- This struct layout is designed specifically to be backwards/forward
- compatible between libbpf versions for ARG_CONST, ARG_REG, and
- ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
- */
struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; /* arg location case, see bpf_usdt_arg() for details */
enum __bpf_usdt_arg_type arg_type;
enum __bpf_usdt_arg_type arg_type: 8;
this needs to be inside the #if/#elif/#else, I believe. When it previously was a 4 byte field, BPF_USDT_ARG_REG = 1 would be `0x01, 0x00, 0x00, 0x00` in memory on little endian and `0x00, 0x00, 0x00, 0x01` on big endian. So you need to reorder it such that on big endian it's the last field.
pw-bot: cr
+#if defined(__LITTLE_ENDIAN_BITFIELD)
I'm not sure whether compiler itself defines __LITTLE_ENDIAN_BITFIELD. Throughout libbpf we use #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, let's do that here as well?
/* index register offset within struct pt_regs (high 12 bits) */
__u16 idx_reg_off: 12,
/* scale factor for index register (1, 2, 4, or 8) (low 4 bits) */
scale: 4;
nit: don't do comma-separated bitfields. compiler will combine them as necessary, even if they are declared as separate fields. so just:
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ enum __bpf_usdt_arg_type arg_type: 8; __u16 idx_reg_off: 12; __u16 idx_reg_shift: 4; __u8 __reserved: 8; #else __u8 __reserved: 8; __u16 idx_reg_off: 12; __u16 idx_reg_shift: 4; enum __bpf_usdt_arg_type arg_type: 8; #endif
Note that we don't need to change order of idx_reg_off and idx_reg_shift, as they are new additions (and they don't have to be consistent between big and little endian)
+#elif defined(__BIG_ENDIAN_BITFIELD)
/* scale factor for index register (1, 2, 4, or 8) (high 4 bits) */
__u16 scale: 4,
/* index register offset within struct pt_regs (low 12 bits) */
idx_reg_off: 12;
+#else +#error "Please fix <asm/byteorder.h>" +#endif
let's drop the fix suggestion, isn't asm/byteorder.h kernel-specific header?.. I'm fine assuming only big or little endian system
/* reserved for future use, keeps reg_off offset stable */
__u8 reserved; /* offset of referenced register within struct pt_regs */ short reg_off; /* whether arg should be interpreted as signed value */
@@ -149,7 +171,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; struct __bpf_usdt_arg_spec *arg_spec;
unsigned long val;
unsigned long val, idx; int err, spec_id; *res = 0;
@@ -202,6 +224,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) return err; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ val >>= arg_spec->arg_bitshift; +#endif
break;
case BPF_USDT_ARG_SIB:
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
* fetch the base register contents and the index register
* contents from pt_regs. Then we calculate the final address
* as base + (index * scale) + offset, and do a user-space
* probe read to fetch the argument value.
*/
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
if (err)
return err;
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
if (err)
return err;
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx * arg_spec->scale) + arg_spec->val_off));
hm.. I thought we discussed recording the number of bits to shift by, no? It's not too big of a deal, we can afford 4 bits (instead of 2 that would be enough for bit shift), but any specific reason you prefer multiplication here?
if (err)
return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
val >>= arg_spec->arg_bitshift;
#endif break; default:
[...]
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
reg_off = calc_pt_regs_off(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
idx_reg_off = calc_pt_regs_off(idx_reg_name);
if (idx_reg_off < 0)
return idx_reg_off;
/* validate scale factor and set fields directly */
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) {
pr_warn("usdt: invalid SIB scale %d, expected 1,2,4,8; defaulting to 1\n", scale);
"defaulting to 1" is very confusing, why? (and please use spaces after comma)
return -EINVAL;
}
arg->idx_reg_off = idx_reg_off;
arg->scale = scale;
} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off;
@@ -1306,6 +1353,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG;
/* register read has no memory offset */ arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name);
-- 2.43.0
When using GCC on x86-64 to compile an usdt prog with -O1 or higher optimization, the compiler will generate SIB addressing mode for global array and PC-relative addressing mode for global variable, e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
In this patch: - enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec handling logic
Signed-off-by: Jiawei Zhao phoenix500526@163.com --- tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++- tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9057e983cc54..c04b416aa4a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes"); unsigned short test_usdt3_semaphore SEC(".probes"); unsigned short test_usdt12_semaphore SEC(".probes");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) +unsigned short test_usdt_sib_semaphore SEC(".probes"); +#endif + static void __always_inline trigger_func(int x) { long y = 42;
@@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) { } }
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) +static __attribute__((optimize("O1"))) void trigger_sib_spec(void) +{ + /* Base address + offset + (index * scale) */ + /* Force SIB addressing with inline assembly */ + asm volatile( + "# probe point with memory access\n" + STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2)) + "# end probe point" + : + : "d"(nums), "a"(0) + : "memory" + ); +} +#endif + static void subtest_basic_usdt(void) { LIBBPF_OPTS(bpf_usdt_opts, opts); struct test_usdt *skel; struct test_usdt__bss *bss; int err, i; + const __u64 expected_cookie = 0xcafedeadbeeffeed;
skel = test_usdt__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -59,20 +80,29 @@ static void subtest_basic_usdt(void) goto cleanup;
/* usdt0 won't be auto-attached */ - opts.usdt_cookie = 0xcafedeadbeeffeed; + opts.usdt_cookie = expected_cookie; skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, 0 /*self*/, "/proc/self/exe", "test", "usdt0", &opts); if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link")) goto cleanup;
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) + opts.usdt_cookie = expected_cookie; + skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib, + 0 /*self*/, "/proc/self/exe", + "test", "usdt_sib", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link")) + goto cleanup; +#endif + trigger_func(1);
ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called"); ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called"); ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
- ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); + ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie"); ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size"); @@ -156,6 +186,16 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) + trigger_sib_spec(); + ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called"); + ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie"); + ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt"); + ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg"); + ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret"); + ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size"); +#endif + cleanup: test_usdt__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c index 096488f47fbc..b5f883bca66b 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt.c +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -107,4 +107,34 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, return 0; }
+ +int usdt_sib_called; +u64 usdt_sib_cookie; +int usdt_sib_arg_cnt; +int usdt_sib_arg_ret; +u64 usdt_sib_arg; +int usdt_sib_arg_size; + +// Note: usdt_sib is only tested on x86-related architectures, so it requires +// manual attach since auto-attach will panic tests under other architectures +SEC("usdt") +int usdt_sib(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt_sib_called, 1); + + usdt_sib_cookie = bpf_usdt_cookie(ctx); + usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp); + usdt_sib_arg = (short)tmp; + usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0); + + return 0; +} + char _license[] SEC("license") = "GPL";
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao phoenix500526@163.com wrote:
When using GCC on x86-64 to compile an usdt prog with -O1 or higher optimization, the compiler will generate SIB addressing mode for global array and PC-relative addressing mode for global variable, e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
In this patch:
- enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec handling logic
Signed-off-by: Jiawei Zhao phoenix500526@163.com
tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++- tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9057e983cc54..c04b416aa4a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes"); unsigned short test_usdt3_semaphore SEC(".probes"); unsigned short test_usdt12_semaphore SEC(".probes");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
+unsigned short test_usdt_sib_semaphore SEC(".probes"); +#endif
static void __always_inline trigger_func(int x) { long y = 42;
@@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) { } }
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) +static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
you use assembly directly, so optimize() should be irrelevant, no?
So we can make this non-GCC specific, right?
+{
/* Base address + offset + (index * scale) */
/* Force SIB addressing with inline assembly */
asm volatile(
"# probe point with memory access\n"
STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
is it guaranteed that nums address will end up in rdx and a in rax?...
I'd feel more comfortable if you explicitly set up rdx and rax in assembly, then add USDT with STAP_PROBE_ASM. That should be possible with embedded assembly, no?
"# end probe point"
why these unnecessary comments embedded in the assembly?...
:
: "d"(nums), "a"(0)
: "memory"
);
+}
[...]
+int usdt_sib_called; +u64 usdt_sib_cookie; +int usdt_sib_arg_cnt; +int usdt_sib_arg_ret; +u64 usdt_sib_arg; +int usdt_sib_arg_size;
+// Note: usdt_sib is only tested on x86-related architectures, so it requires +// manual attach since auto-attach will panic tests under other architectures
don't use c++ style comments
+SEC("usdt") +int usdt_sib(struct pt_regs *ctx) +{
long tmp;
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
__sync_fetch_and_add(&usdt_sib_called, 1);
usdt_sib_cookie = bpf_usdt_cookie(ctx);
usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
usdt_sib_arg = (short)tmp;
usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
return 0;
+}
char _license[] SEC("license") = "GPL";
2.43.0
does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
Yes, clang does defind __GNUC__ .
you use assembly directly, so optimize() should be irrelevant, no?
So we can make this non-GCC specific, right?
Yes, I'll make it in the next version.
is it guaranteed that nums address will end up in rdx and a in rax?...
I'd feel more comfortable if you explicitly set up rdx and rax in assembly, then add USDT with STAP_PROBE_ASM. That should be possible with embedded assembly, no?
I think it will in that the input operand constrain `"d"(nums), "a"(0)` implies the nums address will end up in rdx while the index 0 will be in rax.
So I think we don't need to explicitly set up rdx and rax again.
why these unnecessary comments embedded in the assembly?...
Because there is "before" and "after" in the `STAP_PROBE_ASM` example. It's OK to remove it.
At 2025-08-23 06:59:49, "Andrii Nakryiko" andrii.nakryiko@gmail.com wrote:
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao phoenix500526@163.com wrote:
When using GCC on x86-64 to compile an usdt prog with -O1 or higher optimization, the compiler will generate SIB addressing mode for global array and PC-relative addressing mode for global variable, e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
In this patch:
- enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec handling logic
Signed-off-by: Jiawei Zhao phoenix500526@163.com
tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++- tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9057e983cc54..c04b416aa4a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes"); unsigned short test_usdt3_semaphore SEC(".probes"); unsigned short test_usdt12_semaphore SEC(".probes");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
+unsigned short test_usdt_sib_semaphore SEC(".probes"); +#endif
static void __always_inline trigger_func(int x) { long y = 42;
@@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) { } }
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) +static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
you use assembly directly, so optimize() should be irrelevant, no?
So we can make this non-GCC specific, right?
+{
/* Base address + offset + (index * scale) */
/* Force SIB addressing with inline assembly */
asm volatile(
"# probe point with memory access\n"
STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
is it guaranteed that nums address will end up in rdx and a in rax?...
I'd feel more comfortable if you explicitly set up rdx and rax in assembly, then add USDT with STAP_PROBE_ASM. That should be possible with embedded assembly, no?
"# end probe point"
why these unnecessary comments embedded in the assembly?...
:
: "d"(nums), "a"(0)
: "memory"
);
+}
[...]
+int usdt_sib_called; +u64 usdt_sib_cookie; +int usdt_sib_arg_cnt; +int usdt_sib_arg_ret; +u64 usdt_sib_arg; +int usdt_sib_arg_size;
+// Note: usdt_sib is only tested on x86-related architectures, so it requires +// manual attach since auto-attach will panic tests under other architectures
don't use c++ style comments
+SEC("usdt") +int usdt_sib(struct pt_regs *ctx) +{
long tmp;
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
__sync_fetch_and_add(&usdt_sib_called, 1);
usdt_sib_cookie = bpf_usdt_cookie(ctx);
usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
usdt_sib_arg = (short)tmp;
usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
return 0;
+}
char _license[] SEC("license") = "GPL";
2.43.0
STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
I think such hard-coding, for example -2@(%%rdx,%%rax,2), is not very maintainable. Essentially, STAP_PROBE_ASM just performs simple text substitution. But in SIB addressing, both scale and size depend on the actual parameter type.
For instance, in -2@(%%rdx,%%rax,2), the -2 and 2 come from the fact that nums is an array of type short: the size of a short is 2, so the scale is 2; and since short is a signed type, the size is -2.
STAP_PROBE_ASM is expanded at the preprocessing stage, while at that stage we cannot compute the actual size of nums. If the type of nums changes in the future, it could lead to errors.
At 2025-08-23 06:59:49, "Andrii Nakryiko" andrii.nakryiko@gmail.com wrote:
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao phoenix500526@163.com wrote:
When using GCC on x86-64 to compile an usdt prog with -O1 or higher optimization, the compiler will generate SIB addressing mode for global array and PC-relative addressing mode for global variable, e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
In this patch:
- enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec handling logic
Signed-off-by: Jiawei Zhao phoenix500526@163.com
tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++- tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9057e983cc54..c04b416aa4a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes"); unsigned short test_usdt3_semaphore SEC(".probes"); unsigned short test_usdt12_semaphore SEC(".probes");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
+unsigned short test_usdt_sib_semaphore SEC(".probes"); +#endif
static void __always_inline trigger_func(int x) { long y = 42;
@@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) { } }
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__)) +static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
you use assembly directly, so optimize() should be irrelevant, no?
So we can make this non-GCC specific, right?
+{
/* Base address + offset + (index * scale) */
/* Force SIB addressing with inline assembly */
asm volatile(
"# probe point with memory access\n"
STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
is it guaranteed that nums address will end up in rdx and a in rax?...
I'd feel more comfortable if you explicitly set up rdx and rax in assembly, then add USDT with STAP_PROBE_ASM. That should be possible with embedded assembly, no?
"# end probe point"
why these unnecessary comments embedded in the assembly?...
:
: "d"(nums), "a"(0)
: "memory"
);
+}
[...]
+int usdt_sib_called; +u64 usdt_sib_cookie; +int usdt_sib_arg_cnt; +int usdt_sib_arg_ret; +u64 usdt_sib_arg; +int usdt_sib_arg_size;
+// Note: usdt_sib is only tested on x86-related architectures, so it requires +// manual attach since auto-attach will panic tests under other architectures
don't use c++ style comments
+SEC("usdt") +int usdt_sib(struct pt_regs *ctx) +{
long tmp;
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
__sync_fetch_and_add(&usdt_sib_called, 1);
usdt_sib_cookie = bpf_usdt_cookie(ctx);
usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
usdt_sib_arg = (short)tmp;
usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
return 0;
+}
char _license[] SEC("license") = "GPL";
2.43.0
linux-kselftest-mirror@lists.linaro.org