Patch 1 removes an unneeded address copy in subflow_syn_recv_sock().
Patch 2 simplifies subflow_syn_recv_sock() to postpone some actions and to avoid a bunch of conditionals.
Patch 3 stops reporting limits that are not taken into account when the userspace PM is used.
Patch 4 adds a new test to validate that the 'subflows' field reported by the kernel is correct. Such info can be retrieved via Netlink (e.g. with ss) or getsockopt(SOL_MPTCP, MPTCP_INFO).
Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net --- Geliang Tang (1): selftests: mptcp: add mptcp_info tests
Matthieu Baerts (1): mptcp: do not fill info not used by the PM in used
Paolo Abeni (2): mptcp: avoid unneeded address copy mptcp: simplify subflow_syn_recv_sock()
net/mptcp/sockopt.c | 20 +++++++---- net/mptcp/subflow.c | 43 +++++++--------------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 47 ++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 38 deletions(-) --- base-commit: 323fe43cf9aef79159ba8937218a3f076bf505af change-id: 20230324-upstream-net-next-20230324-misc-features-178b2b618414
Best regards,
From: Paolo Abeni pabeni@redhat.com
In the syn_recv fallback path, the msk is unused. We can skip setting the socket address.
Signed-off-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net --- net/mptcp/subflow.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index dadaf85db720..a11f4c525e01 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -821,8 +821,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto dispose_child; }
- if (new_msk) - mptcp_copy_inaddrs(new_msk, child); mptcp_subflow_drop_ctx(child); goto out; }
From: Paolo Abeni pabeni@redhat.com
Postpone the msk cloning to the child process creation so that we can avoid a bunch of conditionals.
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/61 Signed-off-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net --- net/mptcp/subflow.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a11f4c525e01..33dd27765116 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -696,14 +696,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); }
-static void mptcp_force_close(struct sock *sk) -{ - /* the msk is not yet exposed to user-space, and refcount is 2 */ - inet_sk_state_store(sk, TCP_CLOSE); - sk_common_release(sk); - sock_put(sk); -} - static void subflow_ulp_fallback(struct sock *sk, struct mptcp_subflow_context *old_ctx) { @@ -755,7 +747,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; - struct sock *new_msk = NULL; struct mptcp_sock *owner; struct sock *child;
@@ -784,14 +775,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * options. */ mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) { + if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) fallback = true; - goto create_child; - }
- new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); - if (!new_msk) - fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) || @@ -820,21 +806,23 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } - - mptcp_subflow_drop_ctx(child); - goto out; + goto fallback; }
/* ssk inherits options of listener sk */ ctx->setsockopt_seq = listener->setsockopt_seq;
if (ctx->mp_capable) { - owner = mptcp_sk(new_msk); + ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req); + if (!ctx->conn) + goto fallback; + + owner = mptcp_sk(ctx->conn);
/* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); + inet_sk_state_store(ctx->conn, TCP_ESTABLISHED);
/* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list @@ -844,11 +832,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* new mpc subflow takes ownership of the newly * created mptcp socket */ - mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; + owner->setsockopt_seq = ctx->setsockopt_seq; mptcp_pm_new_connection(owner, child, 1); mptcp_token_accept(subflow_req, owner); - ctx->conn = new_msk; - new_msk = NULL;
/* set msk addresses early to ensure mptcp_pm_get_local_id() * uses the correct data @@ -898,11 +884,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, } }
-out: - /* dispose of the left over mptcp master, if any */ - if (unlikely(new_msk)) - mptcp_force_close(new_msk); - /* check for expected invariant - should never trigger, just help * catching eariler subtle bugs */ @@ -920,6 +901,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* The last child reference will be released by the caller */ return child; + +fallback: + mptcp_subflow_drop_ctx(child); + return child; }
static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
Only the in-kernel PM uses the number of address and subflow limits allowed per connection.
It then makes more sense not to display such info when other PMs are used not to confuse the userspace by showing limits not being used.
While at it, we can get rid of the "val" variable and add indentations instead.
It would have been good to have done this modification directly in commit 3fd4c2a2d672 ("mptcp: bypass in-kernel PM restrictions for non-kernel PMs") but as we change a bit the behaviour, it is fine not to backport it to stable.
Acked-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net --- net/mptcp/sockopt.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 5cef4d3d21ac..b655cebda0f3 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -885,7 +885,6 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { u32 flags = 0; - u8 val;
memset(info, 0, sizeof(*info));
@@ -893,12 +892,19 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); - info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); - val = mptcp_pm_get_add_addr_signal_max(msk); - info->mptcpi_add_addr_signal_max = val; - val = mptcp_pm_get_add_addr_accept_max(msk); - info->mptcpi_add_addr_accepted_max = val; - info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); + + /* The following limits only make sense for the in-kernel PM */ + if (mptcp_pm_is_kernel(msk)) { + info->mptcpi_subflows_max = + mptcp_pm_get_subflows_max(msk); + info->mptcpi_add_addr_signal_max = + mptcp_pm_get_add_addr_signal_max(msk); + info->mptcpi_add_addr_accepted_max = + mptcp_pm_get_add_addr_accept_max(msk); + info->mptcpi_local_addr_max = + mptcp_pm_get_local_addr_max(msk); + } + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) flags |= MPTCP_INFO_FLAG_FALLBACK; if (READ_ONCE(msk->can_ack))
Hello,
On 24/03/2023 18:11, Matthieu Baerts wrote:
Only the in-kernel PM uses the number of address and subflow limits allowed per connection.
It then makes more sense not to display such info when other PMs are used not to confuse the userspace by showing limits not being used.
While at it, we can get rid of the "val" variable and add indentations instead.
It would have been good to have done this modification directly in commit 3fd4c2a2d672 ("mptcp: bypass in-kernel PM restrictions for non-kernel PMs")
I'm sorry, I just noticed I picked the wrong SHA for this commit and my scripts only checked the ones mentioned in the "Fixes" tags. We should have this instead:
commit 4d25247d3ae4 ("mptcp: bypass in-kernel PM restrictions for non-kernel PMs")
I can send a v2 later to fix the SHA if there is no other comments.
Cheers, Matt
From: Geliang Tang geliang.tang@suse.com
This patch adds the mptcp_info fields tests in endpoint_tests(). Add a new function chk_mptcp_info() to check the given number of the given mptcp_info field.
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/330 Signed-off-by: Geliang Tang geliang.tang@suse.com Reviewed-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 47 ++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 42e3bd1a05f5..fafd19ec7e1f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1719,6 +1719,46 @@ chk_subflow_nr() fi }
+chk_mptcp_info() +{ + local nr_info=$1 + local info + local cnt1 + local cnt2 + local dump_stats + + if [[ $nr_info = "subflows_"* ]]; then + info="subflows" + nr_info=${nr_info:9} + else + echo "[fail] unsupported argument: $nr_info" + fail_test + return 1 + fi + + printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info" + + cnt1=$(ss -N $ns1 -inmHM | grep "$info:" | + sed -n 's/.*('"$info"':)([[:digit:]]*).*$/\2/p;q') + [ -z "$cnt1" ] && cnt1=0 + cnt2=$(ss -N $ns2 -inmHM | grep "$info:" | + sed -n 's/.*('"$info"':)([[:digit:]]*).*$/\2/p;q') + [ -z "$cnt2" ] && cnt2=0 + if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then + echo "[fail] got $cnt1:$cnt2 $info expected $nr_info" + fail_test + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -inmHM + ss -N $ns2 -inmHM + dump_stats + fi +} + chk_link_usage() { local ns=$1 @@ -3118,13 +3158,18 @@ endpoint_tests() run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null &
wait_mpj $ns2 + chk_subflow_nr needtitle "before delete" 2 + chk_mptcp_info subflows_1 + pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr needtitle "after delete" 1 + chk_subflow_nr "" "after delete" 1 + chk_mptcp_info subflows_0
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 + chk_mptcp_info subflows_1 kill_tests_wait fi }
linux-kselftest-mirror@lists.linaro.org