hm.. I thought we discussed recording the number of bits to shift by, no? It's not too big of a deal, we can afford 4 bits (instead of 2 that would be enough for bit shift), but any specific reason you prefer multiplication here?
Using multiplication is more straightforward since the format of a SIB address looks like `8@(%rdx,%rax,8)`. We can directly store the 8 as a scale instead of translating it to the number of bits to shift by.
"defaulting to 1" is very confusing, why? (and please use spaces after comma)
Yes, it's misleading to put it down here. The default value of scale is 1 since the scale can be omitted when the scale is 1. I remove it in the next version.
At 2025-08-23 06:43:06, "Andrii Nakryiko" andrii.nakryiko@gmail.com wrote:
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao phoenix500526@163.com wrote:
On x86-64, USDT arguments can be specified using Scale-Index-Base (SIB) addressing, e.g. "1@-96(%rbp,%rax,8)". The current USDT implementation in libbpf cannot parse this format, causing `bpf_program__attach_usdt()` to fail with -ENOENT (unrecognized register).
This patch fixes this by implementing the necessary changes:
- add correct handling for SIB-addressed arguments in `bpf_usdt_arg`.
- add adaptive support to `__bpf_usdt_arg_type` and `__bpf_usdt_arg_spec` to represent SIB addressing parameters.
Signed-off-by: Jiawei Zhao phoenix500526@163.com
tools/lib/bpf/usdt.bpf.h | 47 ++++++++++++++++++++++++++++++-- tools/lib/bpf/usdt.c | 58 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 2a7865c8e3fe..263168d57286 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -4,6 +4,7 @@ #define __USDT_BPF_H__
#include <linux/errno.h> +#include <asm/byteorder.h> #include "bpf_helpers.h" #include "bpf_tracing.h"
@@ -34,13 +35,34 @@ enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, BPF_USDT_ARG_REG, BPF_USDT_ARG_REG_DEREF,
BPF_USDT_ARG_SIB,
};
+/*
- This struct layout is designed specifically to be backwards/forward
- compatible between libbpf versions for ARG_CONST, ARG_REG, and
- ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
- */
struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; /* arg location case, see bpf_usdt_arg() for details */
enum __bpf_usdt_arg_type arg_type;
enum __bpf_usdt_arg_type arg_type: 8;
this needs to be inside the #if/#elif/#else, I believe. When it previously was a 4 byte field, BPF_USDT_ARG_REG = 1 would be `0x01, 0x00, 0x00, 0x00` in memory on little endian and `0x00, 0x00, 0x00, 0x01` on big endian. So you need to reorder it such that on big endian it's the last field.
pw-bot: cr
+#if defined(__LITTLE_ENDIAN_BITFIELD)
I'm not sure whether compiler itself defines __LITTLE_ENDIAN_BITFIELD. Throughout libbpf we use #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, let's do that here as well?
/* index register offset within struct pt_regs (high 12 bits) */
__u16 idx_reg_off: 12,
/* scale factor for index register (1, 2, 4, or 8) (low 4 bits) */
scale: 4;
nit: don't do comma-separated bitfields. compiler will combine them as necessary, even if they are declared as separate fields. so just:
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ enum __bpf_usdt_arg_type arg_type: 8; __u16 idx_reg_off: 12; __u16 idx_reg_shift: 4; __u8 __reserved: 8; #else __u8 __reserved: 8; __u16 idx_reg_off: 12; __u16 idx_reg_shift: 4; enum __bpf_usdt_arg_type arg_type: 8; #endif
Note that we don't need to change order of idx_reg_off and idx_reg_shift, as they are new additions (and they don't have to be consistent between big and little endian)
+#elif defined(__BIG_ENDIAN_BITFIELD)
/* scale factor for index register (1, 2, 4, or 8) (high 4 bits) */
__u16 scale: 4,
/* index register offset within struct pt_regs (low 12 bits) */
idx_reg_off: 12;
+#else +#error "Please fix <asm/byteorder.h>" +#endif
let's drop the fix suggestion, isn't asm/byteorder.h kernel-specific header?.. I'm fine assuming only big or little endian system
/* reserved for future use, keeps reg_off offset stable */
__u8 reserved; /* offset of referenced register within struct pt_regs */ short reg_off; /* whether arg should be interpreted as signed value */
@@ -149,7 +171,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; struct __bpf_usdt_arg_spec *arg_spec;
unsigned long val;
unsigned long val, idx; int err, spec_id; *res = 0;
@@ -202,6 +224,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) return err; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ val >>= arg_spec->arg_bitshift; +#endif
break;
case BPF_USDT_ARG_SIB:
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
* fetch the base register contents and the index register
* contents from pt_regs. Then we calculate the final address
* as base + (index * scale) + offset, and do a user-space
* probe read to fetch the argument value.
*/
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
if (err)
return err;
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
if (err)
return err;
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx * arg_spec->scale) + arg_spec->val_off));
hm.. I thought we discussed recording the number of bits to shift by, no? It's not too big of a deal, we can afford 4 bits (instead of 2 that would be enough for bit shift), but any specific reason you prefer multiplication here?
if (err)
return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
val >>= arg_spec->arg_bitshift;
#endif break; default:
[...]
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
reg_off = calc_pt_regs_off(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
idx_reg_off = calc_pt_regs_off(idx_reg_name);
if (idx_reg_off < 0)
return idx_reg_off;
/* validate scale factor and set fields directly */
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) {
pr_warn("usdt: invalid SIB scale %d, expected 1,2,4,8; defaulting to 1\n", scale);
"defaulting to 1" is very confusing, why? (and please use spaces after comma)
return -EINVAL;
}
arg->idx_reg_off = idx_reg_off;
arg->scale = scale;
} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off;
@@ -1306,6 +1353,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG;
/* register read has no memory offset */ arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name);
-- 2.43.0