[PATCH 5.10 137/208] netlink: Fix wraparounds of sk->sk_rmem_alloc.

15 Jul 2025

5.10-stable review patch.  If anyone has any objections, please let me know.
------------------
From: Kuniyuki Iwashima kuniyu@google.com
[ Upstream commit ae8f160e7eb24240a2a79fc4c815c6a0d4ee16cc ]
Netlink has this pattern in some places
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
    atomic_add(skb->truesize, &sk->sk_rmem_alloc);
, which has the same problem fixed by commit 5a465a0da13e ("udp:
Fix multiple wraparounds of sk->sk_rmem_alloc.").
For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
is always false as the two operands are of int.
Then, a single socket can eat as many skb as possible until OOM
happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
Let's fix it by using atomic_add_return() and comparing the two
variables as unsigned int.
Before:
  [root@fedora ~]# ss -f netlink
  Recv-Q      Send-Q Local Address:Port                Peer Address:Port
  -1668710080 0               rtnl:nl_wraparound/293               *
After:
  [root@fedora ~]# ss -f netlink
  Recv-Q     Send-Q Local Address:Port                Peer Address:Port
  2147483072 0               rtnl:nl_wraparound/290               *
  ^
  `--- INT_MAX - 576
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Jason Baron jbaron@akamai.com
Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
Signed-off-by: Kuniyuki Iwashima kuniyu@google.com
Link: https://patch.msgid.link/20250704054824.1580222-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski kuba@kernel.org
Signed-off-by: Sasha Levin sashal@kernel.org
---
 net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 32 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4da043d9f2c7a..6f71b4c84d1d2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -379,7 +379,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
    WARN_ON(skb->sk != NULL);
    skb->sk = sk;
    skb->destructor = netlink_skb_destructor;
-	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
    sk_mem_charge(sk, skb->truesize);
 }
@@ -1207,41 +1206,48 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
    	      long *timeo, struct sock *ssk)
 {
+	DECLARE_WAITQUEUE(wait, current);
    struct netlink_sock *nlk;
+	unsigned int rmem;
nlk = nlk_sk(sk);
+	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
-	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	     test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
-		DECLARE_WAITQUEUE(wait, current);
-		if (!*timeo) {
-			if (!ssk || netlink_is_kernel(ssk))
-				netlink_overrun(sk);
-			sock_put(sk);
-			kfree_skb(skb);
-			return -EAGAIN;
-		}
-
-		__set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&nlk->wait, &wait);
+	if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
+	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+		netlink_skb_set_owner_r(skb, sk);
+		return 0;
+	}
-		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-		     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
-		    !sock_flag(sk, SOCK_DEAD))
-			*timeo = schedule_timeout(*timeo);
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-		__set_current_state(TASK_RUNNING);
-		remove_wait_queue(&nlk->wait, &wait);
+	if (!*timeo) {
+		if (!ssk || netlink_is_kernel(ssk))
+			netlink_overrun(sk);
    	sock_put(sk);
+		kfree_skb(skb);
+		return -EAGAIN;
+	}
-		if (signal_pending(current)) {
-			kfree_skb(skb);
-			return sock_intr_errno(*timeo);
-		}
-		return 1;
+	__set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&nlk->wait, &wait);
+	rmem = atomic_read(&sk->sk_rmem_alloc);
+
+	if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+	     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+	    !sock_flag(sk, SOCK_DEAD))
+		*timeo = schedule_timeout(*timeo);
+
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&nlk->wait, &wait);
+	sock_put(sk);
+
+	if (signal_pending(current)) {
+		kfree_skb(skb);
+		return sock_intr_errno(*timeo);
    }
-	netlink_skb_set_owner_r(skb, sk);
-	return 0;
+
+	return 1;
 }
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1301,6 +1307,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
    ret = -ECONNREFUSED;
    if (nlk->netlink_rcv != NULL) {
    	ret = skb->len;
+		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
    	netlink_skb_set_owner_r(skb, sk);
    	NETLINK_CB(skb).sk = ssk;
    	netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1379,13 +1386,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 {
    struct netlink_sock *nlk = nlk_sk(sk);
+	unsigned int rmem, rcvbuf;
-	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+	if ((rmem != skb->truesize || rmem <= rcvbuf) &&
        !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
    	netlink_skb_set_owner_r(skb, sk);
    	__netlink_sendskb(sk, skb);
-		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+		return rmem > (rcvbuf >> 1);
    }
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
    return -1;
 }
@@ -2202,6 +2215,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
    struct module *module;
    int err = -ENOBUFS;
    int alloc_min_size;
+	unsigned int rmem;
    int alloc_size;
if (!lock_taken)
@@ -2211,9 +2225,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
    	goto errout_skb;
    }
-	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		goto errout_skb;
-
    /* NLMSG_GOODSIZE is small to avoid high order allocations being
     * required, but it makes sense to _attempt_ a 16K bytes allocation
     * to reduce number of system calls on dump operations, if user
@@ -2236,6 +2247,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
    if (!skb)
    	goto errout_skb;
+	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+	if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
+		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+		goto errout_skb;
+	}
+
    /* Trim skb to allocated size. User is expected to provide buffer as
     * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
     * netlink_recvmsg())). dump will pack as many smaller messages as
-- 
2.39.5





    

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH 5.10 137/208] netlink: Fix wraparounds of sk->sk_rmem_alloc.