From: Paolo Abeni <pabeni(a)redhat.com>
[ Upstream commit d4e85922e3e7ef2071f91f65e61629b60f3a9cf4 ]
If the peer closes all the existing subflows for a given
mptcp socket and later the application closes it, the current
implementation let it survive until the timewait timeout expires.
While the above is allowed by the protocol specification it
consumes resources for almost no reason and additionally
causes sporadic self-tests failures.
Let's move the mptcp socket to the TCP_CLOSE state when there are
no alive subflows at close time, so that the allocated resources
will be freed immediately.
Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
---
Hi Greg, Sasha,
Here is one MPTCP patch backport that recently failed to apply to the
5.15 stable tree: it clears resources earlier if there is no more
reasons to keep MPTCP sockets alive.
I had a simple conflict because in v5.15, the context is a bit different
when iterating over the different subflows in __mptcp_close() but the
idea is still the same: in this loop, a counter needs to be incremented.
---
net/mptcp/protocol.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 47f359dac247..5d05d85242bc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2726,6 +2726,7 @@ static void mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
bool do_cancel_work = false;
+ int subflows_alive = 0;
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2747,11 +2748,19 @@ static void mptcp_close(struct sock *sk, long timeout)
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ subflows_alive += ssk->sk_state != TCP_CLOSE;
+
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
}
sock_orphan(sk);
+ /* all the subflows are closed, only timeout can change the msk
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+ inet_sk_state_store(sk, TCP_CLOSE);
+
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (sk->sk_state == TCP_CLOSE) {
---
base-commit: e2c1a934fd8e4288e7a32f4088ceaccf469eb74c
change-id: 20230214-upstream-stable-20230214-linux-5-15-94-rc1-mptcp-fixes-517feb25bd47
Best regards,
--
Matthieu Baerts <matthieu.baerts(a)tessares.net>
If REQ_NOWAIT is set, then do a non-blocking allocation if the operation
is a write and we need to insert a new page. Currently REQ_NOWAIT cannot
be set as the queue isn't marked as supporting nowait, this change is in
preparation for allowing that.
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
drivers/block/brd.c | 39 ++++++++++++++++++++++-----------------
1 file changed, 22 insertions(+), 17 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 15a148d5aad9..1ddada0cdaca 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -80,26 +80,20 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector)
+static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
struct page *page;
- gfp_t gfp_flags;
page = brd_lookup_page(brd, sector);
if (page)
return 0;
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
- page = alloc_page(gfp_flags);
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
if (!page)
return -ENOMEM;
- if (radix_tree_preload(GFP_NOIO)) {
+ if (radix_tree_preload(gfp)) {
__free_page(page);
return -ENOMEM;
}
@@ -167,19 +161,20 @@ static void brd_free_pages(struct brd_device *brd)
/*
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
+ gfp_t gfp)
{
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
size_t copy;
int ret;
copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
if (ret)
return ret;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
}
return ret;
}
@@ -254,20 +249,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, enum req_op op,
+ unsigned int len, unsigned int off, blk_opf_t opf,
sector_t sector)
{
void *mem;
int err = 0;
- if (op_is_write(op)) {
- err = copy_to_brd_setup(brd, sector, len);
+ if (op_is_write(opf)) {
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+ * block or filesystem layers from page reclaim.
+ */
+ gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+
+ err = copy_to_brd_setup(brd, sector, len, gfp);
if (err)
goto out;
}
mem = kmap_atomic(page);
- if (!op_is_write(op)) {
+ if (!op_is_write(opf)) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else {
@@ -296,8 +297,12 @@ static void brd_submit_bio(struct bio *bio)
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
+ bio->bi_opf, sector);
if (err) {
+ if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
bio_io_error(bio);
return;
}
--
2.39.1