In this series from Geliang, modifying MPTCP BPF selftests, we have:
- SIGINT support
- A new macro to reduce duplicated code
- A new MPTCP subflow BPF program setting socket options per subflow: it looks better to have this old test program in the BPF selftests to track regressions and to serve as example.
Note: Nicolas is no longer working for Tessares, but he did this work while working for them, and his email address is no longer available.
- A new MPTCP BPF subtest validating the new BPF program.
Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- Geliang Tang (3): selftests/bpf: Handle SIGINT when creating netns selftests/bpf: Add RUN_MPTCP_TEST macro selftests/bpf: Add mptcp subflow subtest
Nicolas Rybowski (1): selftests/bpf: Add mptcp subflow example
tools/testing/selftests/bpf/prog_tests/mptcp.c | 127 +++++++++++++++++++++- tools/testing/selftests/bpf/progs/mptcp_subflow.c | 70 ++++++++++++ 2 files changed, 193 insertions(+), 4 deletions(-) --- base-commit: 329a6720a3ebbc041983b267981ab2cac102de93 change-id: 20240506-upstream-bpf-next-20240506-mptcp-subflow-test-faef6654bfa3
Best regards,
From: Geliang Tang tanggeliang@kylinos.cn
It's necessary to delete netns during the MPTCP bpf tests interrupt, otherwise the next tests run will fail due to unable to create netns.
This patch adds a new SIGINT handle sig_int, and deletes NS_TEST in it.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 274d2e033e39..baf976a7a1dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -64,11 +64,18 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; };
+static void sig_int(int sig) +{ + signal(sig, SIG_IGN); + SYS_NOFAIL("ip netns del %s", NS_TEST); +} + static struct nstoken *create_netns(void) { SYS(fail, "ip netns add %s", NS_TEST); SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+ signal(SIGINT, sig_int); return open_netns(NS_TEST); fail: return NULL;
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
It's necessary to delete netns during the MPTCP bpf tests interrupt, otherwise the next tests run will fail due to unable to create netns.
This patch adds a new SIGINT handle sig_int, and deletes NS_TEST in it.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 274d2e033e39..baf976a7a1dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -64,11 +64,18 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; };
+static void sig_int(int sig) +{
signal(sig, SIG_IGN);
SYS_NOFAIL("ip netns del %s", NS_TEST);
+}
static struct nstoken *create_netns(void) { SYS(fail, "ip netns add %s", NS_TEST); SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
signal(SIGINT, sig_int); return open_netns(NS_TEST);
That's a drop in the bucket. ctrl-c of test_progs doesn't really work. Such clean up needs to be generic as part of network_helpers.c
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:43, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
It's necessary to delete netns during the MPTCP bpf tests interrupt, otherwise the next tests run will fail due to unable to create netns.
This patch adds a new SIGINT handle sig_int, and deletes NS_TEST in it.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 274d2e033e39..baf976a7a1dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -64,11 +64,18 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; };
+static void sig_int(int sig) +{
signal(sig, SIG_IGN);
SYS_NOFAIL("ip netns del %s", NS_TEST);
+}
static struct nstoken *create_netns(void) { SYS(fail, "ip netns add %s", NS_TEST); SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
signal(SIGINT, sig_int); return open_netns(NS_TEST);
That's a drop in the bucket. ctrl-c of test_progs doesn't really work. Such clean up needs to be generic as part of network_helpers.c
It makes sense. I can drop this patch and ask Geliang to add a similar 'create_netns()' helper in network_helpers.c creating the netns, and handling SIGINT. This helper will no longer be specific to MPTCP BPF selftests then.
Cheers, Matt
From: Geliang Tang tanggeliang@kylinos.cn
Each MPTCP subtest tests test__start_subtest(suffix), then invokes test_suffix(). It makes sense to add a new macro RUN_MPTCP_TEST to simpolify the code.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index baf976a7a1dd..9d1b255bb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -347,10 +347,14 @@ static void test_mptcpify(void) close(cgroup_fd); }
+#define RUN_MPTCP_TEST(suffix) \ +do { \ + if (test__start_subtest(#suffix)) \ + test_##suffix(); \ +} while (0) + void test_mptcp(void) { - if (test__start_subtest("base")) - test_base(); - if (test__start_subtest("mptcpify")) - test_mptcpify(); + RUN_MPTCP_TEST(base); + RUN_MPTCP_TEST(mptcpify); }
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
Each MPTCP subtest tests test__start_subtest(suffix), then invokes test_suffix(). It makes sense to add a new macro RUN_MPTCP_TEST to simpolify the code.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index baf976a7a1dd..9d1b255bb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -347,10 +347,14 @@ static void test_mptcpify(void) close(cgroup_fd); }
+#define RUN_MPTCP_TEST(suffix) \ +do { \
if (test__start_subtest(#suffix)) \
test_##suffix(); \
+} while (0)
Please no. Don't hide it behind macros.
void test_mptcp(void) {
if (test__start_subtest("base"))
test_base();
if (test__start_subtest("mptcpify"))
test_mptcpify();
RUN_MPTCP_TEST(base);
RUN_MPTCP_TEST(mptcpify);
}
-- 2.43.0
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:44, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
Each MPTCP subtest tests test__start_subtest(suffix), then invokes test_suffix(). It makes sense to add a new macro RUN_MPTCP_TEST to simpolify the code.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index baf976a7a1dd..9d1b255bb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -347,10 +347,14 @@ static void test_mptcpify(void) close(cgroup_fd); }
+#define RUN_MPTCP_TEST(suffix) \ +do { \
if (test__start_subtest(#suffix)) \
test_##suffix(); \
+} while (0)
Please no. Don't hide it behind macros.
I understand, I'm personally not a big fan of hiding code being a macro too. This one saves only one line. Geliang added a few more tests in our tree [1], for a total of 9, so that's only saving 9 lines.
Related to that, if you don't mind, Geliang also added another macro -- MPTCP_SCHED_TEST -- for tests that are currently only in our tree [2] (not ready yet). We asked him to reduce the size of this macro to the minimum. We accepted it because it removed quite a lot of similar code with very small differences [3]. Do you think we should revert this modification too?
[1] https://github.com/multipath-tcp/mptcp_net-next/blob/4369d9cbd752e166961ac0d...
[2] https://github.com/multipath-tcp/mptcp_net-next/blob/4369d9cbd752e166961ac0d...
[3] https://lore.kernel.org/mptcp/cover.1713321357.git.tanggeliang@kylinos.cn/T/...
Cheers, Matt
On Tue, May 7, 2024 at 9:02 AM Matthieu Baerts matttbe@kernel.org wrote:
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:44, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
Each MPTCP subtest tests test__start_subtest(suffix), then invokes test_suffix(). It makes sense to add a new macro RUN_MPTCP_TEST to simpolify the code.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index baf976a7a1dd..9d1b255bb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -347,10 +347,14 @@ static void test_mptcpify(void) close(cgroup_fd); }
+#define RUN_MPTCP_TEST(suffix) \ +do { \
if (test__start_subtest(#suffix)) \
test_##suffix(); \
+} while (0)
Please no. Don't hide it behind macros.
I understand, I'm personally not a big fan of hiding code being a macro too. This one saves only one line. Geliang added a few more tests in our tree [1], for a total of 9, so that's only saving 9 lines.
Related to that, if you don't mind, Geliang also added another macro -- MPTCP_SCHED_TEST -- for tests that are currently only in our tree [2] (not ready yet). We asked him to reduce the size of this macro to the minimum. We accepted it because it removed quite a lot of similar code with very small differences [3]. Do you think we should revert this modification too?
Yeah. Pls don't hide such things in macros. Refactor into helper function in normal C.
But, what do you mean "in your tree" ? That's your development tree and you plan to send all that properly as patches to bpf-next someday?
[1] https://github.com/multipath-tcp/mptcp_net-next/blob/4369d9cbd752e166961ac0d...
[2] https://github.com/multipath-tcp/mptcp_net-next/blob/4369d9cbd752e166961ac0d...
[3] https://lore.kernel.org/mptcp/cover.1713321357.git.tanggeliang@kylinos.cn/T/...
Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Alexei,
Thank you for your reply!
On 07/05/2024 22:51, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 9:02 AM Matthieu Baerts matttbe@kernel.org wrote:
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:44, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
Each MPTCP subtest tests test__start_subtest(suffix), then invokes test_suffix(). It makes sense to add a new macro RUN_MPTCP_TEST to simpolify the code.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index baf976a7a1dd..9d1b255bb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -347,10 +347,14 @@ static void test_mptcpify(void) close(cgroup_fd); }
+#define RUN_MPTCP_TEST(suffix) \ +do { \
if (test__start_subtest(#suffix)) \
test_##suffix(); \
+} while (0)
Please no. Don't hide it behind macros.
I understand, I'm personally not a big fan of hiding code being a macro too. This one saves only one line. Geliang added a few more tests in our tree [1], for a total of 9, so that's only saving 9 lines.
Related to that, if you don't mind, Geliang also added another macro -- MPTCP_SCHED_TEST -- for tests that are currently only in our tree [2] (not ready yet). We asked him to reduce the size of this macro to the minimum. We accepted it because it removed quite a lot of similar code with very small differences [3]. Do you think we should revert this modification too?
Yeah. Pls don't hide such things in macros. Refactor into helper function in normal C.
Sure, we will revert that.
But, what do you mean "in your tree" ? That's your development tree and you plan to send all that properly as patches to bpf-next someday?
Yes, correct, we have some WIP patches in MPTCP development tree where we added a new bpf_struct_ops structure to implement new MPTCP packet schedulers in BPF. This work was paused for a while because we had to refine the packet scheduler API, but this task is now ongoing. Hopefully we will be able to send patches to bpf-next this "soon".
Cheers, Matt
On Tue, 2024-05-07 at 13:51 -0700, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 9:02 AM Matthieu Baerts matttbe@kernel.org wrote:
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:44, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Geliang Tang tanggeliang@kylinos.cn
Each MPTCP subtest tests test__start_subtest(suffix), then invokes test_suffix(). It makes sense to add a new macro RUN_MPTCP_TEST to simpolify the code.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org
tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 ++++++++--
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index baf976a7a1dd..9d1b255bb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -347,10 +347,14 @@ static void test_mptcpify(void) close(cgroup_fd); }
+#define RUN_MPTCP_TEST(suffix) \ +do { \ + if (test__start_subtest(#suffix)) \ + test_##suffix(); \ +} while (0)
Please no. Don't hide it behind macros.
I understand, I'm personally not a big fan of hiding code being a macro too. This one saves only one line. Geliang added a few more tests in our tree [1], for a total of 9, so that's only saving 9 lines.
Related to that, if you don't mind, Geliang also added another macro -- MPTCP_SCHED_TEST -- for tests that are currently only in our tree [2] (not ready yet). We asked him to reduce the size of this macro to the minimum. We accepted it because it removed quite a lot of similar code with very small differences [3]. Do you think we should revert this modification too?
Yeah. Pls don't hide such things in macros. Refactor into helper function in normal C.
I do agree to remove this RUN_MPTCP_TEST macro. But MPTCP_SCHED_TEST macro is different. I know this type of macro is unwelcome. But it's indeed a perfect place to use macro in MPTCP bpf sched tests.
From
''' static void test_first(void) { struct mptcp_bpf_first *skel;
skel = mptcp_bpf_first__open_and_load(); if (!ASSERT_OK_PTR(skel, "open_and_load: first")) return;
test_bpf_sched(skel->obj, "first", WITH_DATA, WITHOUT_DATA); mptcp_bpf_first__destroy(skel); }
static void test_bkup(void) { struct mptcp_bpf_bkup *skel;
skel = mptcp_bpf_bkup__open_and_load(); if (!ASSERT_OK_PTR(skel, "open_and_load: bkup")) return;
test_bpf_sched(skel->obj, "bkup", WITH_DATA, WITHOUT_DATA); mptcp_bpf_bkup__destroy(skel); }
static void test_rr(void) { struct mptcp_bpf_rr *skel;
skel = mptcp_bpf_rr__open_and_load(); if (!ASSERT_OK_PTR(skel, "open_and_load: rr")) return;
test_bpf_sched(skel->obj, "rr", WITH_DATA, WITH_DATA); mptcp_bpf_rr__destroy(skel); }
static void test_red(void) { struct mptcp_bpf_red *skel;
skel = mptcp_bpf_red__open_and_load(); if (!ASSERT_OK_PTR(skel, "open_and_load: red")) return;
test_bpf_sched(skel->obj, "red", WITH_DATA, WITH_DATA); mptcp_bpf_red__destroy(skel); }
static void test_burst(void) { struct mptcp_bpf_burst *skel;
skel = mptcp_bpf_burst__open_and_load(); if (!ASSERT_OK_PTR(skel, "open_and_load: burst")) return;
test_bpf_sched(skel->obj, "burst", WITH_DATA, WITH_DATA); mptcp_bpf_burst__destroy(skel); }
static void test_stale(void) { struct mptcp_bpf_stale *skel;
skel = mptcp_bpf_stale__open_and_load(); if (!ASSERT_OK_PTR(skel, "open_and_load: stale")) return;
test_bpf_sched(skel->obj, "stale", WITH_DATA, WITHOUT_DATA); mptcp_bpf_stale__destroy(skel); } '''
to
''' #define MPTCP_SCHED_TEST(sched, addr1, addr2) \ static void test_##sched(void) \ { \ struct mptcp_bpf_##sched *skel; \ \ skel = mptcp_bpf_##sched##__open_and_load(); \ if (!ASSERT_OK_PTR(skel, "open_and_load:" #sched)) \ return; \ \ test_bpf_sched(skel->obj, #sched, addr1, addr2); \ mptcp_bpf_##sched##__destroy(skel); \ }
MPTCP_SCHED_TEST(first, WITH_DATA, WITHOUT_DATA); MPTCP_SCHED_TEST(bkup, WITH_DATA, WITHOUT_DATA); MPTCP_SCHED_TEST(rr, WITH_DATA, WITH_DATA); MPTCP_SCHED_TEST(red, WITH_DATA, WITH_DATA); MPTCP_SCHED_TEST(burst, WITH_DATA, WITH_DATA); MPTCP_SCHED_TEST(stale, WITH_DATA, WITHOUT_DATA); '''
We can save so many code, and perfectly use BPF test skeleton template. It's small enough, and be difficult to refactor with a helper function in normal C.
Please reconsider whether to delete it, or at least keep it until the day it is officially sent to BPF mail list for review.
Thanks, -Geliang
But, what do you mean "in your tree" ? That's your development tree and you plan to send all that properly as patches to bpf-next someday?
[1] https://github.com/multipath-tcp/mptcp_net-next/blob/4369d9cbd752e166961ac0d...
[2] https://github.com/multipath-tcp/mptcp_net-next/blob/4369d9cbd752e166961ac0d...
[3] https://lore.kernel.org/mptcp/cover.1713321357.git.tanggeliang@kylinos.cn/T/...
Cheers, Matt -- Sponsored by the NGI0 Core fund.
From: Nicolas Rybowski nicolas.rybowski@tessares.net
Move Nicolas's patch into bpf selftests directory. This example added a test that was adding a different mark (SO_MARK) on each subflow, and changing the TCP CC only on the first subflow.
This example shows how it is possible to:
Identify the parent msk of an MPTCP subflow. Put different sockopt for each subflow of a same MPTCP connection.
Here especially, we implemented two different behaviours:
A socket mark (SOL_SOCKET SO_MARK) is put on each subflow of a same MPTCP connection. The order of creation of the current subflow defines its mark. The TCP CC algorithm of the very first subflow of an MPTCP connection is set to "reno".
The code comes from
commit 4d120186e4d6 ("bpf:examples: update mptcp_set_mark_kern.c")
in MPTCP repo https://github.com/multipath-tcp/mptcp_net-next (the "scripts" branch).
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/76 Co-developed-by: Geliang Tang tanggeliang@kylinos.cn Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Signed-off-by: Nicolas Rybowski nicolas.rybowski@tessares.net Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/bpf/progs/mptcp_subflow.c | 70 +++++++++++++++++++++++ 1 file changed, 70 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c new file mode 100644 index 000000000000..de9dbba37133 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2024, Kylin Software */ + +#include <sys/socket.h> // SOL_SOCKET, SO_MARK, ... +#include <linux/tcp.h> // TCP_CONGESTION +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + +char cc[TCP_CA_NAME_MAX] = "reno"; + +/* Associate a subflow counter to each token */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} mptcp_sf SEC(".maps"); + +SEC("sockops") +int mptcp_subflow(struct bpf_sock_ops *skops) +{ + __u32 init = 1, key, mark, *cnt; + struct mptcp_sock *msk; + struct bpf_sock *sk; + int err; + + if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + msk = bpf_skc_to_mptcp_sock(sk); + if (!msk) + return 1; + + key = msk->token; + cnt = bpf_map_lookup_elem(&mptcp_sf, &key); + if (cnt) { + /* A new subflow is added to an existing MPTCP connection */ + __sync_fetch_and_add(cnt, 1); + mark = *cnt; + } else { + /* A new MPTCP connection is just initiated and this is its primary subflow */ + bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY); + mark = init; + } + + /* Set the mark of the subflow's socket based on appearance order */ + err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (err < 0) + return 1; + if (mark == 1) + err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX); + + return 1; +}
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Nicolas Rybowski nicolas.rybowski@tessares.net
Move Nicolas's patch into bpf selftests directory. This example added a test that was adding a different mark (SO_MARK) on each subflow, and changing the TCP CC only on the first subflow.
This example shows how it is possible to:
Identify the parent msk of an MPTCP subflow. Put different sockopt for each subflow of a same MPTCP connection.
Here especially, we implemented two different behaviours:
A socket mark (SOL_SOCKET SO_MARK) is put on each subflow of a same MPTCP connection. The order of creation of the current subflow defines its mark.
The TCP CC algorithm of the very first subflow of an MPTCP connection is set to "reno".
why? What does it test? That bpf_setsockopt() can actually do it? But the next patch doesn't check that it's reno.
It looks to me that dropping this "set to reno" part won't change the purpose of the rest of selftest.
pw-bot: cr
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:49, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Nicolas Rybowski nicolas.rybowski@tessares.net
Move Nicolas's patch into bpf selftests directory. This example added a test that was adding a different mark (SO_MARK) on each subflow, and changing the TCP CC only on the first subflow.
This example shows how it is possible to:
Identify the parent msk of an MPTCP subflow. Put different sockopt for each subflow of a same MPTCP connection.
Here especially, we implemented two different behaviours:
A socket mark (SOL_SOCKET SO_MARK) is put on each subflow of a same MPTCP connection. The order of creation of the current subflow defines its mark.
The TCP CC algorithm of the very first subflow of an MPTCP connection is set to "reno".
why? What does it test? That bpf_setsockopt() can actually do it?
Correct.
Here is a bit of context: from the userspace, an application can do a setsockopt() on an MPTCP socket, and typically the same value will be set on all subflows (paths). If someone wants to have different values per subflow, the recommanded way is to use BPF.
We can indeed restrict this test to changing the MARK only. I think the CC has been modified just not to check one thing, but also to change something at the TCP level, because it is managed differently on MPTCP side -- but only when the userspace set something, or when new subflows are created. The result of this operation is easy to check with 'ss', and it was to show an exemple where this is set only on one subflow.
But the next patch doesn't check that it's reno.
No, I think it is checked: 'reno' is not hardcoded, but 'skel->data->cc' is used instead:
run_subflow(skel->data->cc);
It looks to me that dropping this "set to reno" part won't change the purpose of the rest of selftest.
Yes, up to you. If you still think it is better without it, we can remove the modification of the CC in patch 3/4, and the validation in patch 4/4.
pw-bot: cr
Cheers, Matt
On Tue, May 7, 2024 at 9:03 AM Matthieu Baerts matttbe@kernel.org wrote:
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:49, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Nicolas Rybowski nicolas.rybowski@tessares.net
Move Nicolas's patch into bpf selftests directory. This example added a test that was adding a different mark (SO_MARK) on each subflow, and changing the TCP CC only on the first subflow.
This example shows how it is possible to:
Identify the parent msk of an MPTCP subflow. Put different sockopt for each subflow of a same MPTCP connection.
Here especially, we implemented two different behaviours:
A socket mark (SOL_SOCKET SO_MARK) is put on each subflow of a same MPTCP connection. The order of creation of the current subflow defines its mark.
The TCP CC algorithm of the very first subflow of an MPTCP connection is set to "reno".
why? What does it test? That bpf_setsockopt() can actually do it?
Correct.
Here is a bit of context: from the userspace, an application can do a setsockopt() on an MPTCP socket, and typically the same value will be set on all subflows (paths). If someone wants to have different values per subflow, the recommanded way is to use BPF.
We can indeed restrict this test to changing the MARK only. I think the CC has been modified just not to check one thing, but also to change something at the TCP level, because it is managed differently on MPTCP side -- but only when the userspace set something, or when new subflows are created. The result of this operation is easy to check with 'ss', and it was to show an exemple where this is set only on one subflow.
But the next patch doesn't check that it's reno.
No, I think it is checked: 'reno' is not hardcoded, but 'skel->data->cc' is used instead:
run_subflow(skel->data->cc);
It looks to me that dropping this "set to reno" part won't change the purpose of the rest of selftest.
Yes, up to you. If you still think it is better without it, we can remove the modification of the CC in patch 3/4, and the validation in patch 4/4.
The concern with picking reno is extra deps to CI and every developer. Currently in selftests/bpf/config we do: CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y
I'd like to avoid adding reno there as well. Will bpf_setsockopt("dctcp") work?
Hi Alexei,
Thank you for your reply!
On 07/05/2024 22:54, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 9:03 AM Matthieu Baerts matttbe@kernel.org wrote:
Hi Alexei,
Thank you for the review!
On 07/05/2024 16:49, Alexei Starovoitov wrote:
On Tue, May 7, 2024 at 3:53 AM Matthieu Baerts (NGI0) matttbe@kernel.org wrote:
From: Nicolas Rybowski nicolas.rybowski@tessares.net
Move Nicolas's patch into bpf selftests directory. This example added a test that was adding a different mark (SO_MARK) on each subflow, and changing the TCP CC only on the first subflow.
This example shows how it is possible to:
Identify the parent msk of an MPTCP subflow. Put different sockopt for each subflow of a same MPTCP connection.
Here especially, we implemented two different behaviours:
A socket mark (SOL_SOCKET SO_MARK) is put on each subflow of a same MPTCP connection. The order of creation of the current subflow defines its mark.
The TCP CC algorithm of the very first subflow of an MPTCP connection is set to "reno".
why? What does it test? That bpf_setsockopt() can actually do it?
Correct.
Here is a bit of context: from the userspace, an application can do a setsockopt() on an MPTCP socket, and typically the same value will be set on all subflows (paths). If someone wants to have different values per subflow, the recommanded way is to use BPF.
We can indeed restrict this test to changing the MARK only. I think the CC has been modified just not to check one thing, but also to change something at the TCP level, because it is managed differently on MPTCP side -- but only when the userspace set something, or when new subflows are created. The result of this operation is easy to check with 'ss', and it was to show an exemple where this is set only on one subflow.
But the next patch doesn't check that it's reno.
No, I think it is checked: 'reno' is not hardcoded, but 'skel->data->cc' is used instead:
run_subflow(skel->data->cc);
It looks to me that dropping this "set to reno" part won't change the purpose of the rest of selftest.
Yes, up to you. If you still think it is better without it, we can remove the modification of the CC in patch 3/4, and the validation in patch 4/4.
The concern with picking reno is extra deps to CI and every developer. Currently in selftests/bpf/config we do: CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y
I'd like to avoid adding reno there as well. Will bpf_setsockopt("dctcp") work?
We picked Reno because this is an inlined kernel module that is always built: there is no kernel config to set, no extra deps. Also, it is usually not used as default, mostly used as fallback, so the verification should not be an issue.
We can switch to DCTCP or BBR if you prefer, but I think it is "safer" with Reno, no?
Cheers, Matt
On Wed, May 8, 2024 at 12:36 AM Matthieu Baerts matttbe@kernel.org wrote:
The concern with picking reno is extra deps to CI and every developer. Currently in selftests/bpf/config we do: CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y
I'd like to avoid adding reno there as well. Will bpf_setsockopt("dctcp") work?
We picked Reno because this is an inlined kernel module that is always built: there is no kernel config to set, no extra deps. Also, it is usually not used as default, mostly used as fallback, so the verification should not be an issue.
Ahh. didn't realize that it's builtin. Then sure. keep it as reno.
From: Geliang Tang tanggeliang@kylinos.cn
This patch adds a subtest named test_subflow to load and verify the newly added mptcp subflow example in test_mptcp. Add a helper endpoint_init() to add a new subflow endpoint. Add another helper ss_search() to verify the fwmark and congestion values set by mptcp_subflow prog using setsockopts.
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/76 Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Mat Martineau martineau@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 108 +++++++++++++++++++++++++ 1 file changed, 108 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 9d1b255bb654..b1f4b74efd2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -9,8 +9,12 @@ #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" +#include "mptcp_subflow.skel.h"
#define NS_TEST "mptcp_ns" +#define ADDR_1 "10.0.1.1" +#define ADDR_2 "10.0.1.2" +#define PORT_1 10001
#ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 @@ -347,6 +351,109 @@ static void test_mptcpify(void) close(cgroup_fd); }
+static int endpoint_init(char *flags) +{ + SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1); + SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2); + SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST); + SYS(fail, "ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags); + + return 0; +fail: + return -1; +} + +static int _ss_search(char *src, char *dst, char *port, char *keyword) +{ + char cmd[128]; + int n; + + n = snprintf(cmd, sizeof(cmd), + "ip netns exec %s ss -Menita src %s dst %s %s %d | grep -q '%s'", + NS_TEST, src, dst, port, PORT_1, keyword); + if (n < 0 || n >= sizeof(cmd)) + return -1; + + return system(cmd); +} + +static int ss_search(char *src, char *keyword) +{ + return _ss_search(src, ADDR_1, "dport", keyword); +} + +static void run_subflow(char *new) +{ + int server_fd, client_fd, err; + char cc[TCP_CA_NAME_MAX]; + socklen_t len = sizeof(cc); + + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); + if (!ASSERT_GE(server_fd, 0, "start_mptcp_server")) + return; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect to fd")) + goto fail; + + err = getsockopt(server_fd, SOL_TCP, TCP_CONGESTION, cc, &len); + if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)")) + goto fail; + + send_byte(client_fd); + + ASSERT_OK(ss_search(ADDR_1, "fwmark:0x1"), "ss_search fwmark:0x1"); + ASSERT_OK(ss_search(ADDR_2, "fwmark:0x2"), "ss_search fwmark:0x2"); + ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc"); + ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc"); + + close(client_fd); +fail: + close(server_fd); +} + +static void test_subflow(void) +{ + int cgroup_fd, prog_fd, err; + struct mptcp_subflow *skel; + struct nstoken *nstoken; + + cgroup_fd = test__join_cgroup("/mptcp_subflow"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow")) + return; + + skel = mptcp_subflow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow")) + goto close_cgroup; + + err = mptcp_subflow__attach(skel); + if (!ASSERT_OK(err, "skel_attach: mptcp_subflow")) + goto skel_destroy; + + prog_fd = bpf_program__fd(skel->progs.mptcp_subflow); + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (!ASSERT_OK(err, "prog_attach")) + goto skel_destroy; + + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns: mptcp_subflow")) + goto skel_destroy; + + if (!ASSERT_OK(endpoint_init("subflow"), "endpoint_init")) + goto close_netns; + + run_subflow(skel->data->cc); + +close_netns: + cleanup_netns(nstoken); +skel_destroy: + mptcp_subflow__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + #define RUN_MPTCP_TEST(suffix) \ do { \ if (test__start_subtest(#suffix)) \ @@ -357,4 +464,5 @@ void test_mptcp(void) { RUN_MPTCP_TEST(base); RUN_MPTCP_TEST(mptcpify); + RUN_MPTCP_TEST(subflow); }
linux-kselftest-mirror@lists.linaro.org