On Sun, 2023-11-26 at 18:04 -0600, Daniel Xu wrote: [...]
Tbh I'm not sure. This test passes with preserve_static_offset because it suppresses preserve_access_index. In general clang translates bitfield access to a set of IR statements like:
C: struct foo { unsigned _; unsigned a:1; ... }; ... foo->a ...
IR: %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1 %bf.load = load i8, ptr %a, align 4 %bf.clear = and i8 %bf.load, 1 %bf.cast = zext i8 %bf.clear to i32
With preserve_static_offset the getelementptr+load are replaced by a single statement which is preserved as-is till code generation, thus load with align 4 is preserved.
On the other hand, I'm not sure that clang guarantees that load or stores used for bitfield access would be always aligned according to verifier expectations.
I think we should check if there are some clang knobs that prevent generation of unaligned memory access. I'll take a look.
Is there a reason to prefer fixing in compiler? I'm not opposed to it, but the downside to compiler fix is it takes years to propagate and sprinkles ifdefs into the code.
Would it be possible to have an analogue of BPF_CORE_READ_BITFIELD()?
Well, the contraption below passes verification, tunnel selftest appears to work. I might have messed up some shifts in the macro, though.
Still, if clang would peek unlucky BYTE_{OFFSET,SIZE} for a particular field access might be unaligned.
---
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 3065a716544d..41cd913ac7ff 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -9,6 +9,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf/bpf_core_read.h> #include "bpf_kfuncs.h" #include "bpf_tracing_net.h"
@@ -144,6 +145,38 @@ int ip6gretap_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; }
+#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \ + void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned byte_size = __CORE_RELO(s, field, BYTE_SIZE); \ + unsigned lshift = __CORE_RELO(s, field, LSHIFT_U64); \ + unsigned rshift = __CORE_RELO(s, field, RSHIFT_U64); \ + unsigned bit_size = (rshift - lshift); \ + unsigned long long nval, val, hi, lo; \ + \ + asm volatile("" : "=r"(p) : "0"(p)); \ + \ + switch (byte_size) { \ + case 1: val = *(unsigned char *)p; break; \ + case 2: val = *(unsigned short *)p; break; \ + case 4: val = *(unsigned int *)p; break; \ + case 8: val = *(unsigned long long *)p; break; \ + } \ + hi = val >> (bit_size + rshift); \ + hi <<= bit_size + rshift; \ + lo = val << (bit_size + lshift); \ + lo >>= bit_size + lshift; \ + nval = new_val; \ + nval <<= lshift; \ + nval >>= rshift; \ + val = hi | nval | lo; \ + switch (byte_size) { \ + case 1: *(unsigned char *)p = val; break; \ + case 2: *(unsigned short *)p = val; break; \ + case 4: *(unsigned int *)p = val; break; \ + case 8: *(unsigned long long *)p = val; break; \ + } \ +}) + SEC("tc") int erspan_set_tunnel(struct __sk_buff *skb) { @@ -173,9 +206,9 @@ int erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 7;
md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -214,8 +247,9 @@ int erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif
@@ -252,9 +286,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 17;
md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -294,8 +328,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif