Here is a series from Geliang, adding mptcp_subflow bpf_iter support.
We are working on extending MPTCP with BPF, e.g. to control the path manager -- in charge of the creation, deletion, and announcements of subflows (paths) -- and the packet scheduler -- in charge of selecting which available path the next data will be sent to. These extensions need to iterate over the list of subflows attached to an MPTCP connection, and do some specific actions via some new kfunc that will be added later on.
This preparation work is split in different patches:
- Patch 1: register some "basic" MPTCP kfunc.
- Patch 2: add mptcp_subflow bpf_iter support. Note that previous versions of this single patch have already been shared to the BPF mailing list. The changelog has been kept with a comment, but the version number has been reset to avoid confusions.
- Patch 3: add kfunc to make sure the msk is valid
- Patch 4: add more MPTCP endpoints in the selftests, in order to create more than 2 subflows.
- Patch 5: add a very simple test validating mptcp_subflow bpf_iter support. This test could be written without the new bpf_iter, but it is there only to make sure this specific feature works as expected.
Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- Geliang Tang (5): bpf: Register mptcp common kfunc set bpf: Add mptcp_subflow bpf_iter bpf: Acquire and release mptcp socket selftests/bpf: More endpoints for endpoint_init selftests/bpf: Add mptcp_subflow bpf_iter subtest
net/mptcp/bpf.c | 104 ++++++++++++++++- tools/testing/selftests/bpf/bpf_experimental.h | 8 ++ tools/testing/selftests/bpf/prog_tests/mptcp.c | 129 ++++++++++++++++++++- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 10 ++ .../testing/selftests/bpf/progs/mptcp_bpf_iters.c | 64 ++++++++++ 5 files changed, 308 insertions(+), 7 deletions(-) --- base-commit: 141b4d6a8049cecdc8124f87e044b83a9e80730d change-id: 20241108-bpf-next-net-mptcp-bpf_iter-subflows-027f6d87770e
Best regards,
From: Geliang Tang tanggeliang@kylinos.cn
MPTCP helper mptcp_sk() is used to convert struct sock to mptcp_sock. Helpers mptcp_subflow_ctx() and mptcp_subflow_tcp_sock() are used to convert between struct mptcp_subflow_context and sock. They all will be used in MPTCP BPF programs too.
This patch defines corresponding wrappers of them, and put the wrappers into mptcp common kfunc set and register the set with the flag BPF_PROG_TYPE_UNSPEC to let them accessible to all types of BPF programs.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- net/mptcp/bpf.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672b94e2384f5263e1432296cbca1236bb30..6f96a5927fd371f8ea92cbf96c875edef9272b98 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -29,8 +29,46 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .set = &bpf_mptcp_fmodret_ids, };
+__bpf_kfunc_start_defs(); + +__bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) +{ + return mptcp_sk(sk); +} + +__bpf_kfunc static struct mptcp_subflow_context * +bpf_mptcp_subflow_ctx(const struct sock *sk) +{ + return mptcp_subflow_ctx(sk); +} + +__bpf_kfunc static struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{ + return mptcp_subflow_tcp_sock(subflow); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_mptcp_sk) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock) +BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) + +static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_mptcp_common_kfunc_ids, +}; + static int __init bpf_mptcp_kfunc_init(void) { - return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); + int ret; + + ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, + &bpf_mptcp_common_kfunc_set); + + return ret; } late_initcall(bpf_mptcp_kfunc_init);
On 11/8/24 7:52 AM, Matthieu Baerts (NGI0) wrote:
From: Geliang Tang tanggeliang@kylinos.cn
MPTCP helper mptcp_sk() is used to convert struct sock to mptcp_sock. Helpers mptcp_subflow_ctx() and mptcp_subflow_tcp_sock() are used to convert between struct mptcp_subflow_context and sock. They all will be used in MPTCP BPF programs too.
This patch defines corresponding wrappers of them, and put the wrappers into mptcp common kfunc set and register the set with the flag BPF_PROG_TYPE_UNSPEC to let them accessible to all types of BPF programs.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
net/mptcp/bpf.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672b94e2384f5263e1432296cbca1236bb30..6f96a5927fd371f8ea92cbf96c875edef9272b98 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -29,8 +29,46 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .set = &bpf_mptcp_fmodret_ids, }; +__bpf_kfunc_start_defs();
+__bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) +{
- return mptcp_sk(sk);
+}
+__bpf_kfunc static struct mptcp_subflow_context * +bpf_mptcp_subflow_ctx(const struct sock *sk) +{
- return mptcp_subflow_ctx(sk);
This returns "struct mptcp_subflow_context *" without checking the sk is a mptcp subflow or not...
+}
+__bpf_kfunc static struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{
- return mptcp_subflow_tcp_sock(subflow);
...and then the "struct mptcp_subflow_context *" can be used by this kfunc here. Is it really safe?
+}
+__bpf_kfunc_end_defs();
+BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_mptcp_sk) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock)
All of them has no KF_TRUSTED_ARGS or KF_RCU, so the returned ptr is supposed to be read-only? Why are they needed and why bpf_rdonly_cast (aka the bpf_core_cast in libbpf) cannot be used?
pw-bot: cr
+BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids)
+static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &bpf_mptcp_common_kfunc_ids,
+};
- static int __init bpf_mptcp_kfunc_init(void) {
- return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
- int ret;
- ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
- ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC,
&bpf_mptcp_common_kfunc_set);
- return ret; } late_initcall(bpf_mptcp_kfunc_init);
Hi Martin,
Thanks for the review.
On Mon, Nov 11, 2024 at 04:25:52PM -0800, Martin KaFai Lau wrote:
On 11/8/24 7:52 AM, Matthieu Baerts (NGI0) wrote:
From: Geliang Tang tanggeliang@kylinos.cn
MPTCP helper mptcp_sk() is used to convert struct sock to mptcp_sock. Helpers mptcp_subflow_ctx() and mptcp_subflow_tcp_sock() are used to convert between struct mptcp_subflow_context and sock. They all will be used in MPTCP BPF programs too.
This patch defines corresponding wrappers of them, and put the wrappers into mptcp common kfunc set and register the set with the flag BPF_PROG_TYPE_UNSPEC to let them accessible to all types of BPF programs.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
net/mptcp/bpf.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672b94e2384f5263e1432296cbca1236bb30..6f96a5927fd371f8ea92cbf96c875edef9272b98 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -29,8 +29,46 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .set = &bpf_mptcp_fmodret_ids, }; +__bpf_kfunc_start_defs();
+__bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) +{
- return mptcp_sk(sk);
+}
+__bpf_kfunc static struct mptcp_subflow_context * +bpf_mptcp_subflow_ctx(const struct sock *sk) +{
- return mptcp_subflow_ctx(sk);
This returns "struct mptcp_subflow_context *" without checking the sk is a mptcp subflow or not...
I checked it in patch 5 in the bpf program. I'll move this check into bpf_mptcp_subflow_ctx() in v2.
+}
+__bpf_kfunc static struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{
- return mptcp_subflow_tcp_sock(subflow);
...and then the "struct mptcp_subflow_context *" can be used by this kfunc here. Is it really safe?
I'll add null-check for subflow here too in v2.
+}
+__bpf_kfunc_end_defs();
+BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_mptcp_sk) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock)
All of them has no KF_TRUSTED_ARGS or KF_RCU, so the returned ptr is
KF_TRUSTED_ARGS should be added for bpf_mptcp_sk() indeed, but not for bpf_mptcp_subflow_ctx() or bpf_mptcp_subflow_tcp_sock(), since the parameters 'subflow' or 'sk' of the latter two functions are not pointers which are passed as tracepoint or struct_ops callback arguments, but pointers returned from kfuncs. I'll add KF_RET_NULL flag for all of them.
supposed to be read-only? Why are they needed and why bpf_rdonly_cast (aka the bpf_core_cast in libbpf) cannot be used?
The returned ptrs will pass to kfuncs. If bpf_rdonly_cast() is used here, a "untrusted_ptr_" error occurs:
# ; msk = bpf_core_cast(sk, struct mptcp_sock); @ mptcp_bpf_iters.c:29 # 9: (18) r2 = 0x3f95 ; R2_w=16277 # 11: (85) call bpf_rdonly_cast#53914 ; R0_w=untrusted_ptr_mptcp_sock() # ; if (!msk || msk->pm.server_side || !msk->pm.subflows) @ mptcp_bpf_iters.c:30 # 12: (15) if r0 == 0x0 goto pc+50 ; R0_w=untrusted_ptr_mptcp_sock() # 13: (71) r1 = *(u8 *)(r0 +2785) ; R0_w=untrusted_ptr_mptcp_sock() R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) # 14: (56) if w1 != 0x0 goto pc+48 ; R1_w=0 # 15: (71) r1 = *(u8 *)(r0 +2794) ; R0_w=untrusted_ptr_mptcp_sock() R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) # 16: (16) if w1 == 0x0 goto pc+46 ; R1_w=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) # ; msk = bpf_mptcp_sock_acquire(msk); @ mptcp_bpf_iters.c:33 # 17: (bf) r1 = r0 ; R0_w=untrusted_ptr_mptcp_sock() R1_w=untrusted_ptr_mptcp_sock() # 18: (85) call bpf_mptcp_sock_acquire#24762 # arg#0 is untrusted_ptr_ expected ptr_ or socket # processed 18 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 # -- END PROG LOAD LOG --
And I don't know how to fix it. So I added a new kfunc wrapper bpf_mptcp_sk() instead.
Please give me some advice, thanks.
-Geliang
pw-bot: cr
+BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids)
+static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &bpf_mptcp_common_kfunc_ids,
+};
- static int __init bpf_mptcp_kfunc_init(void) {
- return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
- int ret;
- ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
- ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC,
&bpf_mptcp_common_kfunc_set);
- return ret; } late_initcall(bpf_mptcp_kfunc_init);
From: Geliang Tang tanggeliang@kylinos.cn
It's necessary to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow. In kernel space, mptcp_for_each_subflow() helper is used for this:
mptcp_for_each_subflow(msk, subflow) kfunc(subflow);
But in the MPTCP BPF program, this has not yet been implemented. As Martin suggested recently, this conn_list walking + modify-by-kfunc usage fits the bpf_iter use case.
So this patch adds a new bpf_iter type named "mptcp_subflow" to do this and implements its helpers bpf_iter_mptcp_subflow_new()/_next()/ _destroy(). And register these bpf_iter mptcp_subflow into mptcp common kfunc set. Then bpf_for_each() for mptcp_subflow can be used in BPF program like this:
bpf_for_each(mptcp_subflow, subflow, msk) kfunc(subflow);
Suggested-by: Martin KaFai Lau martin.lau@kernel.org Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- Notes: A few versions of this single patch have been previously posted to the BPF mailing list by Geliang, before continuing to the MPTCP mailing list only, with other patches of this series. The version of the whole series has been reset to 1, but here is the ChangeLog for this patch here: - v2: remove msk->pm.lock in _new() and _destroy() (Martin) drop DEFINE_BPF_ITER_FUNC, change opaque[3] to opaque[2] (Andrii) - v3: drop bpf_iter__mptcp_subflow - v4: if msk is NULL, initialize kit->msk to NULL in _new() and check it in _next() (Andrii) - v5: use list_is_last() instead of list_entry_is_head() add KF_ITER_NEW/NEXT/DESTROY flags add msk_owned_by_me in _new() - v6: add KF_TRUSTED_ARGS flag (Andrii, Martin) --- net/mptcp/bpf.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 6f96a5927fd371f8ea92cbf96c875edef9272b98..d107c2865e97e6ccffb9e0720dfbbd232b63a3b8 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -29,6 +29,15 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .set = &bpf_mptcp_fmodret_ids, };
+struct bpf_iter_mptcp_subflow { + __u64 __opaque[2]; +} __aligned(8); + +struct bpf_iter_mptcp_subflow_kern { + struct mptcp_sock *msk; + struct list_head *pos; +} __aligned(8); + __bpf_kfunc_start_defs();
__bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) @@ -48,12 +57,48 @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) return mptcp_subflow_tcp_sock(subflow); }
+__bpf_kfunc static int +bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, + struct mptcp_sock *msk) +{ + struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; + + kit->msk = msk; + if (!msk) + return -EINVAL; + + msk_owned_by_me(msk); + + kit->pos = &msk->conn_list; + return 0; +} + +__bpf_kfunc static struct mptcp_subflow_context * +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it) +{ + struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; + + if (!kit->msk || list_is_last(kit->pos, &kit->msk->conn_list)) + return NULL; + + kit->pos = kit->pos->next; + return list_entry(kit->pos, struct mptcp_subflow_context, node); +} + +__bpf_kfunc static void +bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) +{ +} + __bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_sk) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids)
static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
On 11/8/24 7:52 AM, Matthieu Baerts (NGI0) wrote:
From: Geliang Tang tanggeliang@kylinos.cn
It's necessary to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow. In kernel space, mptcp_for_each_subflow() helper is used for this:
mptcp_for_each_subflow(msk, subflow) kfunc(subflow);
But in the MPTCP BPF program, this has not yet been implemented. As Martin suggested recently, this conn_list walking + modify-by-kfunc usage fits the bpf_iter use case.
So this patch adds a new bpf_iter type named "mptcp_subflow" to do this and implements its helpers bpf_iter_mptcp_subflow_new()/_next()/ _destroy(). And register these bpf_iter mptcp_subflow into mptcp common kfunc set. Then bpf_for_each() for mptcp_subflow can be used in BPF program like this:
bpf_for_each(mptcp_subflow, subflow, msk) kfunc(subflow);
Suggested-by: Martin KaFai Lau martin.lau@kernel.org Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
Notes: A few versions of this single patch have been previously posted to the BPF mailing list by Geliang, before continuing to the MPTCP mailing list only, with other patches of this series. The version of the whole series has been reset to 1, but here is the ChangeLog for this patch here:
- v2: remove msk->pm.lock in _new() and _destroy() (Martin) drop DEFINE_BPF_ITER_FUNC, change opaque[3] to opaque[2] (Andrii)
- v3: drop bpf_iter__mptcp_subflow
- v4: if msk is NULL, initialize kit->msk to NULL in _new() and check it in _next() (Andrii)
- v5: use list_is_last() instead of list_entry_is_head() add KF_ITER_NEW/NEXT/DESTROY flags add msk_owned_by_me in _new()
- v6: add KF_TRUSTED_ARGS flag (Andrii, Martin)
net/mptcp/bpf.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 6f96a5927fd371f8ea92cbf96c875edef9272b98..d107c2865e97e6ccffb9e0720dfbbd232b63a3b8 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -29,6 +29,15 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .set = &bpf_mptcp_fmodret_ids, }; +struct bpf_iter_mptcp_subflow {
- __u64 __opaque[2];
+} __aligned(8);
+struct bpf_iter_mptcp_subflow_kern {
- struct mptcp_sock *msk;
- struct list_head *pos;
+} __aligned(8);
- __bpf_kfunc_start_defs();
__bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) @@ -48,12 +57,48 @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) return mptcp_subflow_tcp_sock(subflow); } +__bpf_kfunc static int +bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
struct mptcp_sock *msk)
+{
- struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
- kit->msk = msk;
- if (!msk)
return -EINVAL;
- msk_owned_by_me(msk);
I recalled in the earlier revision, a concern had already been brought up about needing lock held and using the subflow iter in tracing. This patch still has the subflow iter available to tracing [by register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC)]. How is it supposed to work? Adding msk_owned_by_me(msk) does not help. At best it will give a WARN which is not good and then keep going even msk is not locked.
Do you need to use subflow iter in tracing?
The commit message mentioned it needs to modify the subflow. I don't see how this modification could work in a tracing program also. It must be some non tracing hooks? What is the plan on this hook? Is it a bpf_struct_ops or something else?
If it needs to modify the subflow, does it need to take the lock of the subflow?
- kit->pos = &msk->conn_list;
- return 0;
+}
+__bpf_kfunc static struct mptcp_subflow_context * +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it) +{
- struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
- if (!kit->msk || list_is_last(kit->pos, &kit->msk->conn_list))
return NULL;
- kit->pos = kit->pos->next;
- return list_entry(kit->pos, struct mptcp_subflow_context, node);
+}
+__bpf_kfunc static void +bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) +{ +}
- __bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_sk) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
Hi Martin,
On Mon, Nov 11, 2024 at 04:50:49PM -0800, Martin KaFai Lau wrote:
On 11/8/24 7:52 AM, Matthieu Baerts (NGI0) wrote:
From: Geliang Tang tanggeliang@kylinos.cn
It's necessary to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow. In kernel space, mptcp_for_each_subflow() helper is used for this:
mptcp_for_each_subflow(msk, subflow) kfunc(subflow);
But in the MPTCP BPF program, this has not yet been implemented. As Martin suggested recently, this conn_list walking + modify-by-kfunc usage fits the bpf_iter use case.
So this patch adds a new bpf_iter type named "mptcp_subflow" to do this and implements its helpers bpf_iter_mptcp_subflow_new()/_next()/ _destroy(). And register these bpf_iter mptcp_subflow into mptcp common kfunc set. Then bpf_for_each() for mptcp_subflow can be used in BPF program like this:
bpf_for_each(mptcp_subflow, subflow, msk) kfunc(subflow);
Suggested-by: Martin KaFai Lau martin.lau@kernel.org Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
Notes: A few versions of this single patch have been previously posted to the BPF mailing list by Geliang, before continuing to the MPTCP mailing list only, with other patches of this series. The version of the whole series has been reset to 1, but here is the ChangeLog for this patch here:
- v2: remove msk->pm.lock in _new() and _destroy() (Martin) drop DEFINE_BPF_ITER_FUNC, change opaque[3] to opaque[2] (Andrii)
- v3: drop bpf_iter__mptcp_subflow
- v4: if msk is NULL, initialize kit->msk to NULL in _new() and check it in _next() (Andrii)
- v5: use list_is_last() instead of list_entry_is_head() add KF_ITER_NEW/NEXT/DESTROY flags add msk_owned_by_me in _new()
- v6: add KF_TRUSTED_ARGS flag (Andrii, Martin)
net/mptcp/bpf.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 6f96a5927fd371f8ea92cbf96c875edef9272b98..d107c2865e97e6ccffb9e0720dfbbd232b63a3b8 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -29,6 +29,15 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .set = &bpf_mptcp_fmodret_ids, }; +struct bpf_iter_mptcp_subflow {
- __u64 __opaque[2];
+} __aligned(8);
+struct bpf_iter_mptcp_subflow_kern {
- struct mptcp_sock *msk;
- struct list_head *pos;
+} __aligned(8);
- __bpf_kfunc_start_defs(); __bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk)
@@ -48,12 +57,48 @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) return mptcp_subflow_tcp_sock(subflow); } +__bpf_kfunc static int +bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
struct mptcp_sock *msk)
+{
- struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
- kit->msk = msk;
- if (!msk)
return -EINVAL;
- msk_owned_by_me(msk);
I recalled in the earlier revision, a concern had already been brought up about needing lock held and using the subflow iter in tracing. This patch still has the subflow iter available to tracing [by register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC)]. How is it supposed to work? Adding msk_owned_by_me(msk) does not help. At best it will give a WARN which is not good and then keep going even msk is not locked.
I'll check lockdep_sock_is_held(msk) here in v2, and return NULL if msk socket is not locked.
Do you need to use subflow iter in tracing?
No.
The commit message mentioned it needs to modify the subflow. I don't see how this modification could work in a tracing program also. It must be some non tracing hooks? What is the plan on this hook? Is it a bpf_struct_ops or something else?
We only plan to use it in struct_ops (mptcp bpf path manager [1], and mptcp packet scheduler [2]) and cgroup sockopt (mptcp setsockopt [3]).
So I'll register this kfunc_set for BPF_PROG_TYPE_STRUCT_OPS and BPF_PROG_TYPE_CGROUP_SOCKOPT only in v2, not for BPF_PROG_TYPE_UNSPEC.
If it needs to modify the subflow, does it need to take the lock of the subflow?
We will call the following mptcp_subflow_set_scheduled() kfunc to set the scheduled field of a subflow:
void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) { WRITE_ONCE(subflow->scheduled, scheduled); }
WRITE_ONCE is used here, and no additional lock of the subflow is used.
Thanks, -Geliang
[1] https://github.com/multipath-tcp/mptcp_net-next/issues/74 [2] https://github.com/multipath-tcp/mptcp_net-next/issues/75 [3] https://github.com/multipath-tcp/mptcp_net-next/issues/484
- kit->pos = &msk->conn_list;
- return 0;
+}
+__bpf_kfunc static struct mptcp_subflow_context * +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it) +{
- struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
- if (!kit->msk || list_is_last(kit->pos, &kit->msk->conn_list))
return NULL;
- kit->pos = kit->pos->next;
- return list_entry(kit->pos, struct mptcp_subflow_context, node);
+}
+__bpf_kfunc static void +bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) +{ +}
- __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_sk) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
From: Geliang Tang tanggeliang@kylinos.cn
The KF_TRUSTED_ARGS flag is used for bpf_iter_mptcp_subflow_new, it indicates that the all pointer arguments are valid. It's necessary to add a KF_ACQUIRE helper to get valid "msk".
This patch adds bpf_mptcp_sock_acquire() and bpf_mptcp_sock_release() helpers for this. Increase sk->sk_refcnt in _acquire() and decrease it in _release(). Register them with KF_ACQUIRE flag and KF_RELEASE flag.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- net/mptcp/bpf.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index d107c2865e97e6ccffb9e0720dfbbd232b63a3b8..5bd04548e846b4dc120dbc83725a604821fac772 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -90,6 +90,23 @@ bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) { }
+__bpf_kfunc static struct +mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) + return msk; + return NULL; +} + +__bpf_kfunc static void bpf_mptcp_sock_release(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + + WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); +} + __bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) @@ -99,6 +116,8 @@ BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_mptcp_sock_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_mptcp_sock_release, KF_RELEASE) BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids)
static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
From: Geliang Tang tanggeliang@kylinos.cn
This patch changes ADDR_2 from "10.0.1.2" to "10.0.2.1", and adds two more IPv4 test addresses ADDR_3 - ADDR_4, four IPv6 addresses ADDR6_1 - ADDR6_4. Add a new helper address_init() to initialize all these addresses.
Add a new parameter "endpoints" for endpoint_init() to control how many endpoints are used for the tests. This makes it more flexible. Update the parameters of endpoint_init() in test_subflow().
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 56 +++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd20bbb7ee018728f7ae0f0a09d4d30..85f3d4119802a85c86cde7b74a0b857252bad8b8 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -14,7 +14,13 @@
#define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" -#define ADDR_2 "10.0.1.2" +#define ADDR_2 "10.0.2.1" +#define ADDR_3 "10.0.3.1" +#define ADDR_4 "10.0.4.1" +#define ADDR6_1 "dead:beef:1::1" +#define ADDR6_2 "dead:beef:2::1" +#define ADDR6_3 "dead:beef:3::1" +#define ADDR6_4 "dead:beef:4::1" #define PORT_1 10001
#ifndef IPPROTO_MPTCP @@ -322,22 +328,60 @@ static void test_mptcpify(void) close(cgroup_fd); }
-static int endpoint_init(char *flags) +static int address_init(void) { SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST); SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1); + SYS(fail, "ip -net %s addr add %s/64 dev veth1 nodad", NS_TEST, ADDR6_1); SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST); SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2); + SYS(fail, "ip -net %s addr add %s/64 dev veth2 nodad", NS_TEST, ADDR6_2); SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST); - if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) { + + SYS(fail, "ip -net %s link add veth3 type veth peer name veth4", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth3", NS_TEST, ADDR_3); + SYS(fail, "ip -net %s addr add %s/64 dev veth3 nodad", NS_TEST, ADDR6_3); + SYS(fail, "ip -net %s link set dev veth3 up", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth4", NS_TEST, ADDR_4); + SYS(fail, "ip -net %s addr add %s/64 dev veth4 nodad", NS_TEST, ADDR6_4); + SYS(fail, "ip -net %s link set dev veth4 up", NS_TEST); + + return 0; +fail: + return -1; +} + +static int endpoint_add(char *addr, char *flags) +{ + return SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, addr, flags); +} + +static int endpoint_init(char *flags, u8 endpoints) +{ + int ret = -1; + + if (!endpoints || endpoints > 4) + goto fail; + + if (address_init()) + goto fail; + + if (SYS_NOFAIL("ip -net %s mptcp limits set add_addr_accepted 4 subflows 4", + NS_TEST)) { printf("'ip mptcp' not supported, skip this test.\n"); test__skip(); goto fail; }
- return 0; + if (endpoints > 1) + ret = endpoint_add(ADDR_2, flags); + if (endpoints > 2) + ret = ret ?: endpoint_add(ADDR_3, flags); + if (endpoints > 3) + ret = ret ?: endpoint_add(ADDR_4, flags); + fail: - return -1; + return ret; }
static void wait_for_new_subflows(int fd) @@ -423,7 +467,7 @@ static void test_subflow(void) if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_subflow")) goto skel_destroy;
- if (endpoint_init("subflow") < 0) + if (endpoint_init("subflow", 2) < 0) goto close_netns;
run_subflow();
From: Geliang Tang tanggeliang@kylinos.cn
This patch adds a "cgroup/getsockopt" program "iters_subflow" to test the newly added mptcp_subflow bpf_iter.
Export mptcp_subflow helpers bpf_iter_mptcp_subflow_new/_next/_destroy, bpf_mptcp_sock_acquire/_release and other helpers into bpf_experimental.h.
Use bpf_mptcp_sock_acquire() to acquire the msk, then use bpf_for_each() to walk the subflow list of this msk. From there, future MPTCP-specific kfunc can be called in the loop. Because they are not there yet, this test doesn't do anything very useful for the moment, but it focuses on validating the 'bpf_iter' part and the basic MPTCP kfunc. That's why it simply adds all subflow ids to local variable local_ids to make sure all subflows have been seen, then invoke the kfunc bpf_mptcp_subflow_tcp_sock() in the loop to pick a subsocket.
Out of the loop, use bpf_mptcp_subflow_ctx() to get the subflow context of the picked subsocket and do some verifications. Finally, assign local_ids to global variable ids so that the application can obtain this value, and release the msk.
A related subtest called test_iters_subflow is added to load and verify the newly added mptcp_subflow type bpf_iter example in test_mptcp. The endpoint_init() helper is used to add 3 new subflow endpoints. Then one byte of message is sent to trigger the creation of new subflows. getsockopt() is invoked once the subflows have been created to trigger the "cgroup/getsockopt" test program "iters_subflow". skel->bss->ids is then checked to make sure it equals 10, the sum of each subflow ID: we should have 4 subflows: 1 + 2 + 3 + 4 = 10. If that's the case, the bpf_iter loop did the job as expected.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/bpf/bpf_experimental.h | 8 +++ tools/testing/selftests/bpf/prog_tests/mptcp.c | 73 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 10 +++ .../testing/selftests/bpf/progs/mptcp_bpf_iters.c | 64 +++++++++++++++++++ 4 files changed, 155 insertions(+)
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index b0668f29f7b394eb5294b6c9cade28fc1b17265a..08eaa431aafd758117f8e818410c4a3e7fd0b70c 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -575,6 +575,14 @@ extern int bpf_iter_css_new(struct bpf_iter_css *it, extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym; extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+struct bpf_iter_mptcp_subflow; +extern int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, + struct mptcp_sock *msk) __weak __ksym; +extern struct mptcp_subflow_context * +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it) __weak __ksym; +extern void +bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) __weak __ksym; + extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 85f3d4119802a85c86cde7b74a0b857252bad8b8..f37574b5ef68d8f32f8002df317869dfdf1d4b2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -11,6 +11,7 @@ #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_bpf_iters.skel.h"
#define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -33,6 +34,9 @@ #ifndef MPTCP_INFO #define MPTCP_INFO 1 #endif +#ifndef TCP_IS_MPTCP +#define TCP_IS_MPTCP 43 /* Is MPTCP being used? */ +#endif #ifndef MPTCP_INFO_FLAG_FALLBACK #define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) #endif @@ -480,6 +484,73 @@ static void test_subflow(void) close(cgroup_fd); }
+static void run_iters_subflow(void) +{ + int server_fd, client_fd; + int is_mptcp, err; + socklen_t len; + + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); + if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) + return; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) + goto close_server; + + send_byte(client_fd); + wait_for_new_subflows(client_fd); + + len = sizeof(is_mptcp); + /* mainly to trigger the BPF program */ + err = getsockopt(client_fd, SOL_TCP, TCP_IS_MPTCP, &is_mptcp, &len); + if (ASSERT_OK(err, "getsockopt(client_fd, TCP_IS_MPTCP)")) + ASSERT_EQ(is_mptcp, 1, "is_mptcp"); + + close(client_fd); +close_server: + close(server_fd); +} + +static void test_iters_subflow(void) +{ + struct mptcp_bpf_iters *skel; + struct netns_obj *netns; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/iters_subflow"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: iters_subflow")) + return; + + skel = mptcp_bpf_iters__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: iters_subflow")) + goto close_cgroup; + + skel->links.iters_subflow = bpf_program__attach_cgroup(skel->progs.iters_subflow, + cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.iters_subflow, "attach getsockopt")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: iters_subflow")) + goto skel_destroy; + + if (endpoint_init("subflow", 4) < 0) + goto close_netns; + + run_iters_subflow(); + + /* 1 + 2 + 3 + 4 = 10 */ + ASSERT_EQ(skel->bss->ids, 10, "subflow ids"); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_bpf_iters__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -488,4 +559,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("iters_subflow")) + test_iters_subflow(); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index 3b188ccdcc4041acb4f7ed38ae8ddf5a7305466a..3210736b1c349a989c5fb5c16dc21d03d6f9b5bf 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -39,4 +39,14 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) return subflow->tcp_sock; }
+/* ksym */ +extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym; +extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym; + +extern struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) __ksym; +extern struct mptcp_subflow_context * +bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym; +extern struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; + #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c new file mode 100644 index 0000000000000000000000000000000000000000..1bede22a7e3d125ec357fb798059e7bbcfe0d227 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Kylin Software */ + +/* vmlinux.h, bpf_helpers.h and other 'define' */ +#include "bpf_tracing_net.h" +#include "mptcp_bpf.h" + +char _license[] SEC("license") = "GPL"; +int ids; + +#ifndef TCP_IS_MPTCP +#define TCP_IS_MPTCP 43 /* Is MPTCP being used? */ +#endif + +SEC("cgroup/getsockopt") +int iters_subflow(struct bpf_sockopt *ctx) +{ + struct mptcp_subflow_context *subflow; + struct bpf_sock *sk = ctx->sk; + struct sock *ssk = NULL; + struct mptcp_sock *msk; + int local_ids = 0; + + if (!sk || sk->protocol != IPPROTO_MPTCP || + ctx->level != SOL_TCP || ctx->optname != TCP_IS_MPTCP) + return 1; + + msk = bpf_mptcp_sk((struct sock *)sk); + if (msk->pm.server_side || !msk->pm.subflows) + return 1; + + msk = bpf_mptcp_sock_acquire(msk); + if (!msk) + return 1; + bpf_for_each(mptcp_subflow, subflow, msk) { + /* Here MPTCP-specific packet scheduler kfunc can be called: + * this test is not doing anything really useful, only to + * verify the iteration works. + */ + + local_ids += subflow->subflow_id; + + /* only to check the following kfunc works */ + ssk = bpf_mptcp_subflow_tcp_sock(subflow); + } + + if (!ssk) + goto out; + + /* assert: if not OK, something wrong on the kernel side */ + if (ssk->sk_dport != ((struct sock *)msk)->sk_dport) + goto out; + + /* only to check the following kfunc works */ + subflow = bpf_mptcp_subflow_ctx(ssk); + if (subflow->token != msk->token) + goto out; + + ids = local_ids; + +out: + bpf_mptcp_sock_release(msk); + return 1; +}
linux-kselftest-mirror@lists.linaro.org