BPF_SK_LOOKUP users may want to have access to the ifindex of the skb which triggered the socket lookup. This may be useful for selectively applying programmable socket lookup logic to packets that arrive on a specific interface, or excluding packets from an interface.
Mark Pashmfouroush (2): bpf: Add ifindex to bpf_sk_lookup selftests/bpf: Add tests for accessing ifindex in bpf_sk_lookup
include/linux/filter.h | 7 ++-- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 7 ++++ net/ipv4/inet_hashtables.c | 8 ++--- net/ipv4/udp.c | 8 ++--- net/ipv6/inet6_hashtables.c | 8 ++--- net/ipv6/udp.c | 8 ++--- tools/include/uapi/linux/bpf.h | 1 + .../selftests/bpf/prog_tests/sk_lookup.c | 31 ++++++++++++++++++ .../selftests/bpf/progs/test_sk_lookup.c | 8 +++++ .../selftests/bpf/verifier/ctx_sk_lookup.c | 32 +++++++++++++++++++ 11 files changed, 101 insertions(+), 18 deletions(-)
It may be helpful to have access to the ifindex during bpf socket lookup. Add this to the bpf_sk_lookup API.
Signed-off-by: Mark Pashmfouroush markpash@cloudflare.com
diff --git a/include/linux/filter.h b/include/linux/filter.h index 47f80adbe744..54ffd8036be6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1370,6 +1370,7 @@ struct bpf_sk_lookup_kern { const struct in6_addr *daddr; } v6; struct sock *selected_sk; + u32 ifindex; bool no_reuseport; };
@@ -1432,7 +1433,7 @@ extern struct static_key_false bpf_sk_lookup_enabled; static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1448,6 +1449,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, .v4.daddr = daddr, .sport = sport, .dport = dport, + .ifindex = ifindex, }; u32 act;
@@ -1470,7 +1472,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, const __be16 sport, const struct in6_addr *daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1486,6 +1488,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, .v6.daddr = daddr, .sport = sport, .dport = dport, + .ifindex = ifindex, }; u32 act;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937a..9bd3e8b8a659 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ifindex; /* Maps to skb->dev->ifindex */ };
/* diff --git a/net/core/filter.c b/net/core/filter.c index 4bace37a6a44..9514c6bbd117 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10491,6 +10491,7 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): case bpf_ctx_range(struct bpf_sk_lookup, remote_port): case bpf_ctx_range(struct bpf_sk_lookup, local_port): + case bpf_ctx_range(struct bpf_sk_lookup, ifindex): bpf_ctx_record_field_size(info, sizeof(__u32)); return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
@@ -10580,6 +10581,12 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct bpf_sk_lookup_kern, dport, 2, target_size)); break; + + case offsetof(struct bpf_sk_lookup, ifindex): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + ifindex, 4, target_size)); + break; }
return insn - insn_buf; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 80aeaf9e6e16..088bb6c27114 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -305,7 +305,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, - __be32 daddr, u16 hnum) + __be32 daddr, u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -313,8 +313,8 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, if (hashinfo != &tcp_hashinfo) return NULL; /* only TCP is supported */
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk;
@@ -338,7 +338,7 @@ struct sock *__inet_lookup_listener(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (result) goto done; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a7825a5b842..f4ddfa38449e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -459,7 +459,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, struct udp_table *udptable, struct sk_buff *skb, __be32 saddr, __be16 sport, - __be32 daddr, u16 hnum) + __be32 daddr, u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -467,8 +467,8 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, if (udptable != &udp_table) return NULL; /* only UDP is supported */
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk;
@@ -504,7 +504,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = udp4_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (sk) { result = sk; goto done; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 55c290d55605..8d25cb5d124b 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -165,7 +165,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const u16 hnum) + const u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -173,8 +173,8 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, if (hashinfo != &tcp_hashinfo) return NULL; /* only TCP is supported */
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk;
@@ -198,7 +198,7 @@ struct sock *inet6_lookup_listener(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (result) goto done; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e505bb007e9f..77ba0917b3ea 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -194,7 +194,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, - u16 hnum) + u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -202,8 +202,8 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, if (udptable != &udp_table) return NULL; /* only UDP is supported */
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk;
@@ -239,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = udp6_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (sk) { result = sk; goto done; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fc59d61937a..9bd3e8b8a659 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ifindex; /* Maps to skb->dev->ifindex */ };
/*
On Fri, Oct 15, 2021 at 4:24 AM Mark Pashmfouroush markpash@cloudflare.com wrote:
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937a..9bd3e8b8a659 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */
__u32 ifindex; /* Maps to skb->dev->ifindex */
Is the comment accurate? The bpf_sk_lookup_kern ifindex is populated with inet_iif(skb). Which is skb->skb_iif at this point (I think). skb->dev->ifindex would typically mean destination or egress ifindex. In __sk_buff we have 'ifindex' and 'ingress_ifindex' to differentiate them. If it's really dev->ifindex than keeping 'ifindex' name here would be correct, but looking at how it's populated in inet/udp_lookup makes me wonder whether it should be named 'ingress_ifindex' instead and comment clarified.
If/when you resubmit please trim cc list to a minimum.
Alexei Starovoitov wrote:
On Fri, Oct 15, 2021 at 4:24 AM Mark Pashmfouroush markpash@cloudflare.com wrote:
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937a..9bd3e8b8a659 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */
__u32 ifindex; /* Maps to skb->dev->ifindex */
Is the comment accurate? The bpf_sk_lookup_kern ifindex is populated with inet_iif(skb). Which is skb->skb_iif at this point (I think). skb->dev->ifindex would typically mean destination or egress ifindex. In __sk_buff we have 'ifindex' and 'ingress_ifindex' to differentiate them. If it's really dev->ifindex than keeping 'ifindex' name here would be correct, but looking at how it's populated in inet/udp_lookup makes me wonder whether it should be named 'ingress_ifindex' instead and comment clarified.
If/when you resubmit please trim cc list to a minimum.
At least in the tcp cases its coming from inet_iif which is either the rtable or skb->skb_iif. Agree would be nice to fixup the comment.
Thanks.
Mark Pashmfouroush wrote:
It may be helpful to have access to the ifindex during bpf socket lookup. Add this to the bpf_sk_lookup API.
Signed-off-by: Mark Pashmfouroush markpash@cloudflare.com
Would be nice to have more details on the 'use case' here. I don't know off-hand how it 'may be helpful'.
For the actual code though LGTM.
Acked-by: John Fastabend john.fastabend@gmail.com
A new field was added to the bpf_sk_lookup data that users can access. Add tests that validate that the new ifindex field contains the right data.
Signed-off-by: Mark Pashmfouroush markpash@cloudflare.com
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index aee41547e7f4..951e5dcf297e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -937,6 +937,37 @@ static void test_drop_on_lookup(struct test_sk_lookup *skel) .connect_to = { EXT_IP6, EXT_PORT }, .listen_at = { EXT_IP6, INT_PORT }, }, + /* The program will drop on success, meaning that the ifindex + * was 1. + */ + { + .desc = "TCP IPv4 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "TCP IPv6 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, EXT_PORT }, + }, + { + .desc = "UDP IPv4 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "UDP IPv6 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, EXT_PORT }, + }, }; const struct test *t;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 19d2465d9442..0f3283bfe3b6 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -84,6 +84,14 @@ int lookup_drop(struct bpf_sk_lookup *ctx) return SK_DROP; }
+SEC("sk_lookup") +int check_ifindex(struct bpf_sk_lookup *ctx) +{ + if (ctx->ifindex == 1) + return SK_DROP; + return SK_PASS; +} + SEC("sk_reuseport") int reuseport_pass(struct sk_reuseport_md *ctx) { diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c index d78627be060f..0b3088da1e89 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -229,6 +229,24 @@ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sk_lookup, local_port)),
+ /* 1-byte read from ifindex field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex) + 3), + /* 2-byte read from ifindex field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex) + 2), + /* 4-byte read from ifindex field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex)), + /* 8-byte read from sk field */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sk_lookup, sk)), @@ -351,6 +369,20 @@ .expected_attach_type = BPF_SK_LOOKUP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "invalid 8-byte read from bpf_sk_lookup ifindex field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ifindex)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ { "invalid 4-byte read from bpf_sk_lookup sk field",
Mark Pashmfouroush wrote:
A new field was added to the bpf_sk_lookup data that users can access. Add tests that validate that the new ifindex field contains the right data.
Signed-off-by: Mark Pashmfouroush markpash@cloudflare.com
LGTM.
Acked-by: John Fastabend john.fastabend@gmail.com
On Fri, 15 Oct 2021 at 12:24, Mark Pashmfouroush markpash@cloudflare.com wrote:
BPF_SK_LOOKUP users may want to have access to the ifindex of the skb which triggered the socket lookup. This may be useful for selectively applying programmable socket lookup logic to packets that arrive on a specific interface, or excluding packets from an interface.
For the series:
Reviewed-by: Lorenz Bauer lmb@cloudflare.com
linux-kselftest-mirror@lists.linaro.org