When a call is released, rxrpc takes the spinlock and removes it from ->recvmsg_q in an effort to prevent racing recvmsg() invocations from seeing the same call. Now, rxrpc_recvmsg() only takes the spinlock when actually removing a call from the queue; it doesn't, however, take it in the lead up to that when it checks to see if the queue is empty. It *does* hold the socket lock, which prevents a recvmsg/recvmsg race - but this doesn't prevent sendmsg from ending the call because sendmsg() drops the socket lock and relies on the call->user_mutex.
Fix this by firstly removing the bit in rxrpc_release_call() that dequeues the released call and, instead, rely on recvmsg() to simply discard released calls (done in a preceding fix).
Secondly, rxrpc_notify_socket() is abandoned if the call is already marked as released rather than trying to be clever by setting both pointers in call->recvmsg_link to NULL to trick list_empty(). This isn't perfect and can still race, resulting in a released call on the queue, but recvmsg() will now clean that up.
Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells dhowells@redhat.com Reviewed-by: Jeffrey Altman jaltman@auristor.com cc: Marc Dionne marc.dionne@auristor.com cc: Junvyyang, Tencent Zhuque Lab zhuque@tencent.com cc: LePremierHomme kwqcheii@proton.me cc: Jakub Kicinski kuba@kernel.org cc: Paolo Abeni pabeni@redhat.com cc: "David S. Miller" davem@davemloft.net cc: Eric Dumazet edumazet@google.com cc: Simon Horman horms@kernel.org cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org cc: stable@vger.kernel.org --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/call_object.c | 28 ++++++++++++---------------- net/rxrpc/recvmsg.c | 4 ++++ 3 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e7dcfb1369b6..8e5a73eb5268 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -325,7 +325,6 @@ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ - EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ @@ -338,6 +337,7 @@ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 15067ff7b1f2..918f41d97a2f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -561,7 +561,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - bool put = false, putu = false; + bool putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
@@ -573,23 +573,13 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
- /* Make sure we don't get any more notifications */ + /* Note that at this point, the call may still be on or may have been + * added back on to the socket receive queue. recvmsg() must discard + * released calls. The CALL_RELEASED flag should prevent further + * notifications. + */ spin_lock_irq(&rx->recvmsg_lock); - - if (!list_empty(&call->recvmsg_link)) { - _debug("unlinking once-pending call %p { e=%lx f=%lx }", - call, call->events, call->flags); - list_del(&call->recvmsg_link); - put = true; - } - - /* list_empty() must return false in rxrpc_notify_socket() */ - call->recvmsg_link.next = NULL; - call->recvmsg_link.prev = NULL; - spin_unlock_irq(&rx->recvmsg_lock); - if (put) - rxrpc_put_call(call, rxrpc_call_put_unnotify);
write_lock(&rx->call_lock);
@@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) rxrpc_put_call(call, rxrpc_call_put_release_sock); }
+ while ((call = list_first_entry_or_null(&rx->recvmsg_q, + struct rxrpc_call, recvmsg_link))) { + list_del_init(&call->recvmsg_link); + rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q); + } + _leave(""); }
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6990e37697de..7fa7e77f6bb9 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -29,6 +29,10 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
if (!list_empty(&call->recvmsg_link)) return; + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_notify_released); + return; + }
rcu_read_lock();
On Wed, Jul 16, 2025 at 12:53:02PM +0100, David Howells wrote:
When a call is released, rxrpc takes the spinlock and removes it from ->recvmsg_q in an effort to prevent racing recvmsg() invocations from seeing the same call. Now, rxrpc_recvmsg() only takes the spinlock when actually removing a call from the queue; it doesn't, however, take it in the lead up to that when it checks to see if the queue is empty. It *does* hold the socket lock, which prevents a recvmsg/recvmsg race - but this doesn't prevent sendmsg from ending the call because sendmsg() drops the socket lock and relies on the call->user_mutex.
Fix this by firstly removing the bit in rxrpc_release_call() that dequeues the released call and, instead, rely on recvmsg() to simply discard released calls (done in a preceding fix).
Secondly, rxrpc_notify_socket() is abandoned if the call is already marked as released rather than trying to be clever by setting both pointers in call->recvmsg_link to NULL to trick list_empty(). This isn't perfect and can still race, resulting in a released call on the queue, but recvmsg() will now clean that up.
Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells dhowells@redhat.com Reviewed-by: Jeffrey Altman jaltman@auristor.com
...
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
...
@@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) rxrpc_put_call(call, rxrpc_call_put_release_sock); }
- while ((call = list_first_entry_or_null(&rx->recvmsg_q,
struct rxrpc_call, recvmsg_link))) {
list_del_init(&call->recvmsg_link);
rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q);
- }
- _leave("");
}
Hi David,
I believe it is addressed in patch 5/5. But unfortunately this change breaks bisection.
.../call_object.c:634:24: error: use of undeclared identifier 'rxrpc_call_put_release_recvmsg_q' 634 | rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q); | ^
linux-stable-mirror@lists.linaro.org