Re: [PATCH bpf-next v4 2/3] bpf, sockmap: Fix FIONREAD for sockmap

25 Nov 2025

...

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 0323a2b6c..2fccae0df 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -97,6 +97,7 @@ struct sk_psock {
   struct sk_buff_head		ingress_skb;
   struct list_head		ingress_msg;
   spinlock_t			ingress_lock;

ssize_t				ingress_size;
unsigned long			state;
struct list_head		link;
spinlock_t			link_lock;

[ ... ]
...
@@ -321,6 +322,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
   kfree_skb(skb);
 }
+static inline ssize_t sk_psock_get_msg_size(struct sk_psock *psock)
+{

return READ_ONCE(psock->ingress_size);

+}



+static inline void sk_psock_inc_msg_size(struct sk_psock *psock, ssize_t diff)
+{

WRITE_ONCE(psock->ingress_size, READ_ONCE(psock->ingress_size) + diff);

+}



Can sk_psock_inc_msg_size race with itself?  The function implements a
read-modify-write operation that isn't atomic.  Looking at the call sites:
- sk_psock_queue_msg() holds ingress_lock when calling this
- sk_psock_dequeue_msg() holds ingress_lock when calling this
- __sk_psock_purge_ingress_msg() holds ingress_lock when calling this
- __sk_msg_recvmsg() does NOT hold ingress_lock when calling this
The __sk_msg_recvmsg() path runs under the socket lock via
tcp_bpf_recvmsg()->lock_sock(), but sk_psock_queue_msg() can be called
from softirq context (network RX -> BPF verdict ->
sk_psock_verdict_apply() -> sk_psock_skb_ingress_self() ->
sk_psock_queue_msg()) without the socket lock.
This means concurrent updates from different lock contexts can race:
Thread A (softirq, ingress_lock): reads size=1000
Thread B (process, socket lock): reads size=1000, subtracts 50, writes 950
Thread A: adds 100 to stale 1000, writes 1100
Result: size=1100 but should be 1050 (lost the -50 update)
The ingress_size counter would drift from the actual queue size, breaking
the FIONREAD accuracy that this patch aims to provide.
...
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
   			      struct sk_msg *msg)
 {
@@ -329,6 +340,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
   spin_lock_bh(&psock->ingress_lock);
   if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
   	list_add_tail(&msg->list, &psock->ingress_msg);

sk_psock_inc_msg_size(psock, msg->sg.size);

ret = true;
} else {
sk_msg_free(psock->sk, msg);

[ ... ]
...
@@ -345,18 +357,25 @@ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
   msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);

if (msg)


if (msg) {
list_del(&msg->list);
sk_psock_inc_msg_size(psock, -msg->sg.size);


}
spin_unlock_bh(&psock->ingress_lock);
return msg;

}
[ ... ]
...
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index d73e03f77..c959d52a6 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -455,6 +455,7 @@ int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg
   				atomic_sub(copy, &sk->sk_rmem_alloc);
   			}
   			msg_rx->sg.size -= copy;

		sk_psock_inc_msg_size(psock, -copy);



This call to sk_psock_inc_msg_size() happens without holding ingress_lock,
just the socket lock.  Can this race with sk_psock_queue_msg() which holds
ingress_lock but not the socket lock?
...
		if (!sge->length) {
			sk_msg_iter_var_next(i);

@@ -819,9 +820,11 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
   	list_del(&msg->list);
   	if (!msg->skb)
   		atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);

sk_psock_inc_msg_size(psock, -((ssize_t)msg->sg.size));

sk_msg_free(psock->sk, msg);
kfree(msg);
}
WARN_ON_ONCE(psock->ingress_size);

}
[ ... ]
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19669112811

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH bpf-next v4 2/3] bpf, sockmap: Fix FIONREAD for sockmap