Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
from array import array from socket import *
s = socket(AF_UNIX, SOCK_STREAM) s.bind('\0test\0') s.listen()
c = socket(AF_UNIX, SOCK_STREAM) c.connect(s.getsockname()) c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com --- This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6. --- --- net/unix/garbage.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a758531e102..b3fbdf129944 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -102,13 +102,14 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), /* Process the descriptors of this socket */ int nfd = UNIXCB(skb).fp->count; struct file **fp = UNIXCB(skb).fp->fp; + struct unix_sock *u;
while (nfd--) { /* Get the socket the fd matches if it indeed does so */ struct sock *sk = unix_get_socket(*fp++);
if (sk) { - struct unix_sock *u = unix_sk(sk); + u = unix_sk(sk);
/* Ignore non-candidates, they could * have been added to the queues after @@ -122,6 +123,13 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), } } if (hit && hitlist != NULL) { +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + u = unix_sk(x); + if (u->oob_skb) { + WARN_ON_ONCE(skb_unref(u->oob_skb)); + u->oob_skb = NULL; + } +#endif __skb_unlink(skb, &x->sk_receive_queue); __skb_queue_tail(hitlist, skb); } @@ -299,17 +307,9 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) { + list_for_each_entry(u, &gc_candidates, link) scan_children(&u->sk, inc_inflight, &hitlist);
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - if (u->oob_skb) { - kfree_skb(u->oob_skb); - u->oob_skb = NULL; - } -#endif - } - /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */
[ Sasha's backport helper bot ]
Hi,
Summary of potential issues: ⚠️ Could not find matching upstream commit
No upstream commit was identified. Using temporary commit for testing.
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.15.y | Success | Success | | stable/linux-6.1.y | Success | Success | | stable/linux-6.6.y | Success | Success |
On Mon, Mar 03, 2025 at 07:01:49PM -0800, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
from array import array from socket import *
s = socket(AF_UNIX, SOCK_STREAM) s.bind('\0test\0') s.listen()
c = socket(AF_UNIX, SOCK_STREAM) c.connect(s.getsockname()) c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
You need to get the networking maintainers to review and agree that this is ok for us to take, as we really don't want to take "custom" stuff like thi s at all. Why not just take the commits that are in newer kernels instead?
thanks,
greg k-h
+Paolo
From: Greg Kroah-Hartman gregkh@linuxfoundation.org Date: Wed, 5 Mar 2025 15:08:26 +0100
On Mon, Mar 03, 2025 at 07:01:49PM -0800, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
from array import array from socket import *
s = socket(AF_UNIX, SOCK_STREAM) s.bind('\0test\0') s.listen()
c = socket(AF_UNIX, SOCK_STREAM) c.connect(s.getsockname()) c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
You need to get the networking maintainers to review and agree that this is ok for us to take, as we really don't want to take "custom" stuff like thi s at all.
Paolo, could you take a look at this patch ? https://lore.kernel.org/netdev/20250304030149.82265-1-kuniyu@amazon.com/
Why not just take the commits that are in newer kernels instead?
That will be about 20 patches that rewrite the most lines of net/unix/garbage.c and cannot be applied cleanly.
I think backporting these commits is overkill to fix a small bug that can be fixed with a much smaller diff.
927fa5b3e4f5 af_unix: Fix uninit-value in __unix_walk_scc() 041933a1ec7b af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS 7172dc93d621 af_unix: Add dead flag to struct scm_fp_list. 1af2dface5d2 af_unix: Don't access successor in unix_del_edges() during GC. fd86344823b5 af_unix: Try not to hold unix_gc_lock during accept(). 118f457da9ed af_unix: Remove lock dance in unix_peek_fds(). 4090fa373f0e af_unix: Replace garbage collection algorithm. a15702d8b3aa af_unix: Detect dead SCC. bfdb01283ee8 af_unix: Assign a unique index to SCC. ad081928a8b0 af_unix: Avoid Tarjan's algorithm if unnecessary. 77e5593aebba af_unix: Skip GC if no cycle exists. ba31b4a4e101 af_unix: Save O(n) setup of Tarjan's algo. dcf70df2048d af_unix: Fix up unix_edge.successor for embryo socket. 3484f063172d af_unix: Detect Strongly Connected Components. 6ba76fd2848e af_unix: Iterate all vertices by DFS. 22c3c0c52d32 af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. 42f298c06b30 af_unix: Link struct unix_edge when queuing skb. 29b64e354029 af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. 1fbfdfaa5902 af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd.
On Wed, Mar 05, 2025 at 10:10:41AM -0800, Kuniyuki Iwashima wrote:
+Paolo
From: Greg Kroah-Hartman gregkh@linuxfoundation.org Date: Wed, 5 Mar 2025 15:08:26 +0100
On Mon, Mar 03, 2025 at 07:01:49PM -0800, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
from array import array from socket import *
s = socket(AF_UNIX, SOCK_STREAM) s.bind('\0test\0') s.listen()
c = socket(AF_UNIX, SOCK_STREAM) c.connect(s.getsockname()) c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
You need to get the networking maintainers to review and agree that this is ok for us to take, as we really don't want to take "custom" stuff like thi s at all.
Paolo, could you take a look at this patch ? https://lore.kernel.org/netdev/20250304030149.82265-1-kuniyu@amazon.com/
Why not just take the commits that are in newer kernels instead?
That will be about 20 patches that rewrite the most lines of net/unix/garbage.c and cannot be applied cleanly.
I think backporting these commits is overkill to fix a small bug that can be fixed with a much smaller diff.
927fa5b3e4f5 af_unix: Fix uninit-value in __unix_walk_scc() 041933a1ec7b af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS 7172dc93d621 af_unix: Add dead flag to struct scm_fp_list. 1af2dface5d2 af_unix: Don't access successor in unix_del_edges() during GC. fd86344823b5 af_unix: Try not to hold unix_gc_lock during accept(). 118f457da9ed af_unix: Remove lock dance in unix_peek_fds(). 4090fa373f0e af_unix: Replace garbage collection algorithm. a15702d8b3aa af_unix: Detect dead SCC. bfdb01283ee8 af_unix: Assign a unique index to SCC. ad081928a8b0 af_unix: Avoid Tarjan's algorithm if unnecessary. 77e5593aebba af_unix: Skip GC if no cycle exists. ba31b4a4e101 af_unix: Save O(n) setup of Tarjan's algo. dcf70df2048d af_unix: Fix up unix_edge.successor for embryo socket. 3484f063172d af_unix: Detect Strongly Connected Components. 6ba76fd2848e af_unix: Iterate all vertices by DFS. 22c3c0c52d32 af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. 42f298c06b30 af_unix: Link struct unix_edge when queuing skb. 29b64e354029 af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. 1fbfdfaa5902 af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd.
Sure, but now all fixes made upstream after these changes will not apply to older kernels at all, making supporting this old one-off change harder and harder over time.
But I'll defer to the maintainers here as to what they want. Taking 20+ patches in a stable tree is trivial for us, not a problem at all.
thanks,
greg k-h
On Wed, 05 Mar 2025, Greg KH wrote:
On Wed, Mar 05, 2025 at 10:10:41AM -0800, Kuniyuki Iwashima wrote:
+Paolo
From: Greg Kroah-Hartman gregkh@linuxfoundation.org Date: Wed, 5 Mar 2025 15:08:26 +0100
On Mon, Mar 03, 2025 at 07:01:49PM -0800, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
> from array import array > from socket import * > > s = socket(AF_UNIX, SOCK_STREAM) > s.bind('\0test\0') > s.listen() > > c = socket(AF_UNIX, SOCK_STREAM) > c.connect(s.getsockname()) > c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
> quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
You need to get the networking maintainers to review and agree that this is ok for us to take, as we really don't want to take "custom" stuff like thi s at all.
Paolo, could you take a look at this patch ? https://lore.kernel.org/netdev/20250304030149.82265-1-kuniyu@amazon.com/
Why not just take the commits that are in newer kernels instead?
That will be about 20 patches that rewrite the most lines of net/unix/garbage.c and cannot be applied cleanly.
I think backporting these commits is overkill to fix a small bug that can be fixed with a much smaller diff.
927fa5b3e4f5 af_unix: Fix uninit-value in __unix_walk_scc() 041933a1ec7b af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS 7172dc93d621 af_unix: Add dead flag to struct scm_fp_list. 1af2dface5d2 af_unix: Don't access successor in unix_del_edges() during GC. fd86344823b5 af_unix: Try not to hold unix_gc_lock during accept(). 118f457da9ed af_unix: Remove lock dance in unix_peek_fds(). 4090fa373f0e af_unix: Replace garbage collection algorithm. a15702d8b3aa af_unix: Detect dead SCC. bfdb01283ee8 af_unix: Assign a unique index to SCC. ad081928a8b0 af_unix: Avoid Tarjan's algorithm if unnecessary. 77e5593aebba af_unix: Skip GC if no cycle exists. ba31b4a4e101 af_unix: Save O(n) setup of Tarjan's algo. dcf70df2048d af_unix: Fix up unix_edge.successor for embryo socket. 3484f063172d af_unix: Detect Strongly Connected Components. 6ba76fd2848e af_unix: Iterate all vertices by DFS. 22c3c0c52d32 af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. 42f298c06b30 af_unix: Link struct unix_edge when queuing skb. 29b64e354029 af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. 1fbfdfaa5902 af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd.
Sure, but now all fixes made upstream after these changes will not apply to older kernels at all, making supporting this old one-off change harder and harder over time.
But I'll defer to the maintainers here as to what they want. Taking 20+ patches in a stable tree is trivial for us, not a problem at all.
Since the general expectation is that branches will be maintained for a long time, typically around 4 years, it could be seen as shortsighted to apply a drive-by fix which is highly likely to cause merge conflicts going forward. In order to ensure ease of future maintenance it would be nicer to apply all of the updates above, which as Greg says, would be trivial from the perspective of Stable.
Kuniyuki, has the suggested stack above been fully tested?
On Wed, 05 Mar 2025, Kuniyuki Iwashima wrote:
+Paolo
From: Greg Kroah-Hartman gregkh@linuxfoundation.org Date: Wed, 5 Mar 2025 15:08:26 +0100
On Mon, Mar 03, 2025 at 07:01:49PM -0800, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
from array import array from socket import *
s = socket(AF_UNIX, SOCK_STREAM) s.bind('\0test\0') s.listen()
c = socket(AF_UNIX, SOCK_STREAM) c.connect(s.getsockname()) c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
You need to get the networking maintainers to review and agree that this is ok for us to take, as we really don't want to take "custom" stuff like thi s at all.
Paolo, could you take a look at this patch ? https://lore.kernel.org/netdev/20250304030149.82265-1-kuniyu@amazon.com/
Why not just take the commits that are in newer kernels instead?
That will be about 20 patches that rewrite the most lines of net/unix/garbage.c and cannot be applied cleanly.
I think backporting these commits is overkill to fix a small bug that can be fixed with a much smaller diff.
927fa5b3e4f5 af_unix: Fix uninit-value in __unix_walk_scc() 041933a1ec7b af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS 7172dc93d621 af_unix: Add dead flag to struct scm_fp_list. 1af2dface5d2 af_unix: Don't access successor in unix_del_edges() during GC. fd86344823b5 af_unix: Try not to hold unix_gc_lock during accept(). 118f457da9ed af_unix: Remove lock dance in unix_peek_fds(). 4090fa373f0e af_unix: Replace garbage collection algorithm. a15702d8b3aa af_unix: Detect dead SCC. bfdb01283ee8 af_unix: Assign a unique index to SCC. ad081928a8b0 af_unix: Avoid Tarjan's algorithm if unnecessary. 77e5593aebba af_unix: Skip GC if no cycle exists. ba31b4a4e101 af_unix: Save O(n) setup of Tarjan's algo. dcf70df2048d af_unix: Fix up unix_edge.successor for embryo socket. 3484f063172d af_unix: Detect Strongly Connected Components. 6ba76fd2848e af_unix: Iterate all vertices by DFS. 22c3c0c52d32 af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. 42f298c06b30 af_unix: Link struct unix_edge when queuing skb. 29b64e354029 af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. 1fbfdfaa5902 af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd.
Okay, I've taken some time to apply a set to linux-6.6.y that looks similar to this [0]. Would someone please give me some advice on how to test it please? Are there some unit tests I could run to ensure that everything is working as expected?
Or even better; if someone could pull the request below and tell me whether it's correct or not.
Thank you.
[0] The following changes since commit 9c2dd8954dad0430e83ee55b985ba55070e50cf7:
Linux 6.6.90 (2025-05-09 09:44:08 +0200)
are available in the Git repository at:
https://github.com/lag-google/linux.git tb-b404256079-af_unix-uaf
for you to fetch changes up to dcddf5a33645b7e305ab91966ed3c6b319d7e28e:
af_unix: Fix uninit-value in __unix_walk_scc() (2025-05-15 15:58:08 +0100)
---------------------------------------------------------------- Kuniyuki Iwashima (24): af_unix: Return struct unix_sock from unix_get_socket(). af_unix: Run GC on only one CPU. af_unix: Try to run GC async. af_unix: Replace BUG_ON() with WARN_ON_ONCE(). af_unix: Remove io_uring code for GC. af_unix: Remove CONFIG_UNIX_SCM. af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd. af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. af_unix: Link struct unix_edge when queuing skb. af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. af_unix: Iterate all vertices by DFS. af_unix: Detect Strongly Connected Components. af_unix: Save listener for embryo socket. af_unix: Fix up unix_edge.successor for embryo socket. af_unix: Save O(n) setup of Tarjan's algo. af_unix: Skip GC if no cycle exists. af_unix: Avoid Tarjan's algorithm if unnecessary. af_unix: Assign a unique index to SCC. af_unix: Detect dead SCC. af_unix: Replace garbage collection algorithm. af_unix: Remove lock dance in unix_peek_fds(). af_unix: Try not to hold unix_gc_lock during accept(). af_unix: Don't access successor in unix_del_edges() during GC. af_unix: Add dead flag to struct scm_fp_list.
Michal Luczaj (1): af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS
Shigeru Yoshida (1): af_unix: Fix uninit-value in __unix_walk_scc()
include/net/af_unix.h | 49 +++- include/net/scm.h | 11 + net/Makefile | 2 +- net/core/scm.c | 17 ++ net/unix/Kconfig | 5 - net/unix/Makefile | 2 - net/unix/af_unix.c | 120 +++++---- net/unix/garbage.c | 691 +++++++++++++++++++++++++++++++++++--------------- net/unix/scm.c | 161 ------------ net/unix/scm.h | 10 - 10 files changed, 617 insertions(+), 451 deletions(-) delete mode 100644 net/unix/scm.c delete mode 100644 net/unix/scm.h
From: Lee Jones lee@kernel.org Date: Fri, 16 May 2025 09:18:43 +0100
On Wed, 05 Mar 2025, Kuniyuki Iwashima wrote:
+Paolo
From: Greg Kroah-Hartman gregkh@linuxfoundation.org Date: Wed, 5 Mar 2025 15:08:26 +0100
On Mon, Mar 03, 2025 at 07:01:49PM -0800, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
> from array import array > from socket import * > > s = socket(AF_UNIX, SOCK_STREAM) > s.bind('\0test\0') > s.listen() > > c = socket(AF_UNIX, SOCK_STREAM) > c.connect(s.getsockname()) > c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
> quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
You need to get the networking maintainers to review and agree that this is ok for us to take, as we really don't want to take "custom" stuff like thi s at all.
Paolo, could you take a look at this patch ? https://lore.kernel.org/netdev/20250304030149.82265-1-kuniyu@amazon.com/
Why not just take the commits that are in newer kernels instead?
That will be about 20 patches that rewrite the most lines of net/unix/garbage.c and cannot be applied cleanly.
I think backporting these commits is overkill to fix a small bug that can be fixed with a much smaller diff.
927fa5b3e4f5 af_unix: Fix uninit-value in __unix_walk_scc() 041933a1ec7b af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS 7172dc93d621 af_unix: Add dead flag to struct scm_fp_list. 1af2dface5d2 af_unix: Don't access successor in unix_del_edges() during GC. fd86344823b5 af_unix: Try not to hold unix_gc_lock during accept(). 118f457da9ed af_unix: Remove lock dance in unix_peek_fds(). 4090fa373f0e af_unix: Replace garbage collection algorithm. a15702d8b3aa af_unix: Detect dead SCC. bfdb01283ee8 af_unix: Assign a unique index to SCC. ad081928a8b0 af_unix: Avoid Tarjan's algorithm if unnecessary. 77e5593aebba af_unix: Skip GC if no cycle exists. ba31b4a4e101 af_unix: Save O(n) setup of Tarjan's algo. dcf70df2048d af_unix: Fix up unix_edge.successor for embryo socket. 3484f063172d af_unix: Detect Strongly Connected Components. 6ba76fd2848e af_unix: Iterate all vertices by DFS. 22c3c0c52d32 af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. 42f298c06b30 af_unix: Link struct unix_edge when queuing skb. 29b64e354029 af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. 1fbfdfaa5902 af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd.
Okay, I've taken some time to apply a set to linux-6.6.y that looks similar to this [0]. Would someone please give me some advice on how to test it please? Are there some unit tests I could run to ensure that everything is working as expected?
Thanks for working on this! and sorry for the late response, I'm personally busy this month.
Could you run tools/testing/selftests/net/af_unix/scm_rights.c with KASAN and kmemleak ?
The test is added by
2a79651bf2fa selftest: af_unix: Add test case for backtrack after finalising SCC. e060e433e512 selftest: af_unix: Make SCM_RIGHTS into OOB data. 2aa0cff26ed5 selftest: af_unix: Test GC for SCM_RIGHTS.
The 2nd commit will cover the test case of the python script in the patch of this thread.
On Mon, 03 Mar 2025, Kuniyuki Iwashima wrote:
Embryo socket is not queued in gc_candidates, so we can't drop a reference held by its oob_skb.
Let's say we create listener and embryo sockets, send the listener's fd to the embryo as OOB data, and close() them without recv()ing the OOB data.
There is a self-reference cycle like
listener -> embryo.oob_skb -> listener
, so this must be cleaned up by GC. Otherwise, the listener's refcnt is not released and sockets are leaked:
# unshare -n # cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 0 -1 NI 0 yes kernel ...
# python3
from array import array from socket import *
s = socket(AF_UNIX, SOCK_STREAM) s.bind('\0test\0') s.listen()
c = socket(AF_UNIX, SOCK_STREAM) c.connect(s.getsockname()) c.sendmsg([b'x'], [(SOL_SOCKET, SCM_RIGHTS, array('i', [s.fileno()]))], MSG_OOB)
1
quit()
# cat /proc/net/protocols | grep UNIX-STREAM UNIX-STREAM 1024 3 -1 NI 0 yes kernel ... ^^^ 3 sockets still in use after FDs are close()d
Let's drop the embryo socket's oob_skb ref in scan_inflight().
This also fixes a racy access to oob_skb that commit 9841991a446c ("af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock.") fixed for the new Tarjan's algo-based GC.
Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: Lei Lu llfamsec@gmail.com Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com
This has no upstream commit because I replaced the entire GC in 6.10 and the new GC does not have this bug, and this fix is only applicable to the old GC (<= 6.9), thus for 5.15/6.1/6.6.
net/unix/garbage.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
Tested-by: Lee Jones lee@kernel.org
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a758531e102..b3fbdf129944 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -102,13 +102,14 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), /* Process the descriptors of this socket */ int nfd = UNIXCB(skb).fp->count; struct file **fp = UNIXCB(skb).fp->fp;
struct unix_sock *u;
while (nfd--) { /* Get the socket the fd matches if it indeed does so */ struct sock *sk = unix_get_socket(*fp++); if (sk) {
struct unix_sock *u = unix_sk(sk);
u = unix_sk(sk);
/* Ignore non-candidates, they could * have been added to the queues after @@ -122,6 +123,13 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), } } if (hit && hitlist != NULL) { +#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
u = unix_sk(x);
if (u->oob_skb) {
WARN_ON_ONCE(skb_unref(u->oob_skb));
u->oob_skb = NULL;
}
+#endif __skb_unlink(skb, &x->sk_receive_queue); __skb_queue_tail(hitlist, skb); } @@ -299,17 +307,9 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link) {
- list_for_each_entry(u, &gc_candidates, link) scan_children(&u->sk, inc_inflight, &hitlist);
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (u->oob_skb) {
kfree_skb(u->oob_skb);
u->oob_skb = NULL;
}
-#endif
- }
- /* not_cycle_list contains those sockets which do not make up a
*/
- cycle. Restore these to the inflight list.
-- 2.39.5 (Apple Git-154)
linux-stable-mirror@lists.linaro.org