Link: https://www.cve.org/CVERecord/?id=CVE-2024-56658
5.15: https://lore.kernel.org/all/20250115091642.335047-1-kovalev@altlinux.org/
--- v1: https://lore.kernel.org/all/20250115091913.335173-1-kovalev@altlinux.org/
v2: https://lore.kernel.org/all/20250121192730.155559-1-kovalev@altlinux.org/
v3: (Suggested-by [1]: Hazem Mohamed Abuelfotoh abuehaze@amazon.com) Added a backport of commit 41467d2ff4df ("net: net_namespace: Optimize the code") as a prerequisite for 0f6ede9fbc74 ("net: defer final 'struct net' free in netns dismantle").
[1] https://lore.kernel.org/all/20250127134248.25731-1-abuehaze@amazon.com/
[PATCH v3 5.10 1/2] net: net_namespace: Optimize the code [PATCH v3 5.10 2/2] net: defer final 'struct net' free in netns dismantle
From: Yajun Deng yajun.deng@linux.dev
commit 41467d2ff4dfe1837cbb0f45e2088e6e787580c6 upstream.
There is only one caller for ops_free(), so inline it. Separate net_drop_ns() and net_free(), so the net_free() can be called directly. Add free_exit_list() helper function for free net_exit_list.
==================== v2: - v1 does not apply, rebase it. ====================
Signed-off-by: Yajun Deng yajun.deng@linux.dev Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- net/core/net_namespace.c | 52 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6192a05ebcce2c..ef19a0eaa55aa3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -113,7 +113,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) }
ng = net_alloc_generic(); - if (ng == NULL) + if (!ng) return -ENOMEM;
/* @@ -170,13 +170,6 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) return err; }
-static void ops_free(const struct pernet_operations *ops, struct net *net) -{ - if (ops->id && ops->size) { - kfree(net_generic(net, *ops->id)); - } -} - static void ops_pre_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { @@ -208,7 +201,7 @@ static void ops_free_list(const struct pernet_operations *ops, struct net *net; if (ops->size && ops->id) { list_for_each_entry(net, net_exit_list, exit_list) - ops_free(ops, net); + kfree(net_generic(net, *ops->id)); } }
@@ -454,15 +447,18 @@ static struct net *net_alloc(void)
static void net_free(struct net *net) { - kfree(rcu_access_pointer(net->gen)); - kmem_cache_free(net_cachep, net); + if (refcount_dec_and_test(&net->passive)) { + kfree(rcu_access_pointer(net->gen)); + kmem_cache_free(net_cachep, net); + } }
void net_drop_ns(void *p) { - struct net *ns = p; - if (ns && refcount_dec_and_test(&ns->passive)) - net_free(ns); + struct net *net = (struct net *)p; + + if (net) + net_free(net); }
struct net *copy_net_ns(unsigned long flags, @@ -502,7 +498,7 @@ struct net *copy_net_ns(unsigned long flags, key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); - net_drop_ns(net); + net_free(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -636,7 +632,7 @@ static void cleanup_net(struct work_struct *work) key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); - net_drop_ns(net); + net_free(net); } }
@@ -1160,6 +1156,14 @@ static int __init net_ns_init(void)
pure_initcall(net_ns_init);
+static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) +{ + ops_pre_exit_list(ops, net_exit_list); + synchronize_rcu(); + ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); +} + #ifdef CONFIG_NET_NS static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) @@ -1185,10 +1189,7 @@ static int __register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + free_exit_list(ops, &net_exit_list); return error; }
@@ -1201,10 +1202,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + + free_exit_list(ops, &net_exit_list); }
#else @@ -1227,10 +1226,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) } else { LIST_HEAD(net_exit_list); list_add(&init_net.exit_list, &net_exit_list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + free_exit_list(ops, &net_exit_list); } }
[ Sasha's backport helper bot ]
Hi,
The upstream commit SHA1 provided is correct: 41467d2ff4dfe1837cbb0f45e2088e6e787580c6
WARNING: Author mismatch between patch and upstream commit: Backport author: Vasiliy Kovalevkovalev@altlinux.org Commit author: Yajun Dengyajun.deng@linux.dev
Status in newer kernel trees: 6.13.y | Branch not found 6.12.y | Present (exact SHA1) 6.6.y | Present (exact SHA1) 6.1.y | Present (exact SHA1) 5.15.y | Present (exact SHA1) 5.10.y | Present (different SHA1: d33542e7aebc)
Note: The patch differs from the upstream commit: --- 1: 41467d2ff4dfe ! 1: 1bdbc1a4c6a3a net: net_namespace: Optimize the code @@ Metadata ## Commit message ## net: net_namespace: Optimize the code
+ commit 41467d2ff4dfe1837cbb0f45e2088e6e787580c6 upstream. + There is only one caller for ops_free(), so inline it. Separate net_drop_ns() and net_free(), so the net_free() can be called directly. @@ Commit message
Signed-off-by: Yajun Deng yajun.deng@linux.dev Signed-off-by: David S. Miller davem@davemloft.net + Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org
## net/core/net_namespace.c ## @@ net/core/net_namespace.c: static int net_assign_generic(struct net *net, unsigned int id, void *data) @@ net/core/net_namespace.c: static struct net *net_alloc(void)
struct net *copy_net_ns(unsigned long flags, @@ net/core/net_namespace.c: struct net *copy_net_ns(unsigned long flags, - put_userns: key_remove_domain(net->key_domain); + #endif put_user_ns(user_ns); - net_drop_ns(net); + net_free(net); @@ net/core/net_namespace.c: struct net *copy_net_ns(unsigned long flags, dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ net/core/net_namespace.c: static void cleanup_net(struct work_struct *work) - dec_net_namespaces(net->ucounts); key_remove_domain(net->key_domain); + #endif put_user_ns(net->user_ns); - net_drop_ns(net); + net_free(net); ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.10.y | Success | Success |
From: Eric Dumazet edumazet@google.com
commit 0f6ede9fbc747e2553612271bce108f7517e7a45 upstream.
Ilya reported a slab-use-after-free in dst_destroy [1]
Issue is in xfrm6_net_init() and xfrm4_net_init() :
They copy xfrm[46]_dst_ops_template into net->xfrm.xfrm[46]_dst_ops.
But net structure might be freed before all the dst callbacks are called. So when dst_destroy() calls later :
if (dst->ops->destroy) dst->ops->destroy(dst);
dst->ops points to the old net->xfrm.xfrm[46]_dst_ops, which has been freed.
See a relevant issue fixed in :
ac888d58869b ("net: do not delay dst_entries_add() in dst_release()")
A fix is to queue the 'struct net' to be freed after one another cleanup_net() round (and existing rcu_barrier())
[1]
BUG: KASAN: slab-use-after-free in dst_destroy (net/core/dst.c:112) Read of size 8 at addr ffff8882137ccab0 by task swapper/37/0 Dec 03 05:46:18 kernel: CPU: 37 UID: 0 PID: 0 Comm: swapper/37 Kdump: loaded Not tainted 6.12.0 #67 Hardware name: Red Hat KVM/RHEL, BIOS 1.16.1-1.el9 04/01/2014 Call Trace: <IRQ> dump_stack_lvl (lib/dump_stack.c:124) print_address_description.constprop.0 (mm/kasan/report.c:378) ? dst_destroy (net/core/dst.c:112) print_report (mm/kasan/report.c:489) ? dst_destroy (net/core/dst.c:112) ? kasan_addr_to_slab (mm/kasan/common.c:37) kasan_report (mm/kasan/report.c:603) ? dst_destroy (net/core/dst.c:112) ? rcu_do_batch (kernel/rcu/tree.c:2567) dst_destroy (net/core/dst.c:112) rcu_do_batch (kernel/rcu/tree.c:2567) ? __pfx_rcu_do_batch (kernel/rcu/tree.c:2491) ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4339 kernel/locking/lockdep.c:4406) rcu_core (kernel/rcu/tree.c:2825) handle_softirqs (kernel/softirq.c:554) __irq_exit_rcu (kernel/softirq.c:589 kernel/softirq.c:428 kernel/softirq.c:637) irq_exit_rcu (kernel/softirq.c:651) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1049 arch/x86/kernel/apic/apic.c:1049) </IRQ> <TASK> asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:743) Code: 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 6e ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 0f 00 2d c7 c9 27 00 fb f4 <fa> c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 RSP: 0018:ffff888100d2fe00 EFLAGS: 00000246 RAX: 00000000001870ed RBX: 1ffff110201a5fc2 RCX: ffffffffb61a3e46 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb3d4d123 RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed11c7e1835d R10: ffff888e3f0c1aeb R11: 0000000000000000 R12: 0000000000000000 R13: ffff888100d20000 R14: dffffc0000000000 R15: 0000000000000000 ? ct_kernel_exit.constprop.0 (kernel/context_tracking.c:148) ? cpuidle_idle_call (kernel/sched/idle.c:186) default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) cpuidle_idle_call (kernel/sched/idle.c:186) ? __pfx_cpuidle_idle_call (kernel/sched/idle.c:168) ? lock_release (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5848) ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406) ? tsc_verify_tsc_adjust (arch/x86/kernel/tsc_sync.c:59) do_idle (kernel/sched/idle.c:326) cpu_startup_entry (kernel/sched/idle.c:423 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:202 arch/x86/kernel/smpboot.c:282) ? __pfx_start_secondary (arch/x86/kernel/smpboot.c:232) ? soft_restart_cpu (arch/x86/kernel/head_64.S:452) common_startup_64 (arch/x86/kernel/head_64.S:414) </TASK> Dec 03 05:46:18 kernel: Allocated by task 12184: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) __kasan_slab_alloc (mm/kasan/common.c:319 mm/kasan/common.c:345) kmem_cache_alloc_noprof (mm/slub.c:4085 mm/slub.c:4134 mm/slub.c:4141) copy_net_ns (net/core/net_namespace.c:421 net/core/net_namespace.c:480) create_new_namespaces (kernel/nsproxy.c:110) unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) ksys_unshare (kernel/fork.c:3313) __x64_sys_unshare (kernel/fork.c:3382) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Dec 03 05:46:18 kernel: Freed by task 11: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) kasan_save_free_info (mm/kasan/generic.c:582) __kasan_slab_free (mm/kasan/common.c:271) kmem_cache_free (mm/slub.c:4579 mm/slub.c:4681) cleanup_net (net/core/net_namespace.c:456 net/core/net_namespace.c:446 net/core/net_namespace.c:647) process_one_work (kernel/workqueue.c:3229) worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391) kthread (kernel/kthread.c:389) ret_from_fork (arch/x86/kernel/process.c:147) ret_from_fork_asm (arch/x86/entry/entry_64.S:257) Dec 03 05:46:18 kernel: Last potentially related work creation: kasan_save_stack (mm/kasan/common.c:48) __kasan_record_aux_stack (mm/kasan/generic.c:541) insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) __queue_work (kernel/workqueue.c:2340) queue_work_on (kernel/workqueue.c:2391) xfrm_policy_insert (net/xfrm/xfrm_policy.c:1610) xfrm_add_policy (net/xfrm/xfrm_user.c:2116) xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) netlink_rcv_skb (net/netlink/af_netlink.c:2536) xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) netlink_sendmsg (net/netlink/af_netlink.c:1886) sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) vfs_write (fs/read_write.c:590 fs/read_write.c:683) ksys_write (fs/read_write.c:736) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Dec 03 05:46:18 kernel: Second to last potentially related work creation: kasan_save_stack (mm/kasan/common.c:48) __kasan_record_aux_stack (mm/kasan/generic.c:541) insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) __queue_work (kernel/workqueue.c:2340) queue_work_on (kernel/workqueue.c:2391) __xfrm_state_insert (./include/linux/workqueue.h:723 net/xfrm/xfrm_state.c:1150 net/xfrm/xfrm_state.c:1145 net/xfrm/xfrm_state.c:1513) xfrm_state_update (./include/linux/spinlock.h:396 net/xfrm/xfrm_state.c:1940) xfrm_add_sa (net/xfrm/xfrm_user.c:912) xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) netlink_rcv_skb (net/netlink/af_netlink.c:2536) xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) netlink_sendmsg (net/netlink/af_netlink.c:1886) sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) vfs_write (fs/read_write.c:590 fs/read_write.c:683) ksys_write (fs/read_write.c:736) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
Fixes: a8a572a6b5f2 ("xfrm: dst_entries_init() per-net dst_ops") Reported-by: Ilya Maximets i.maximets@ovn.org Closes: https://lore.kernel.org/netdev/CANn89iKKYDVpB=MtmfH7nyv2p=rJWSLedO5k7wSZgtY_... Signed-off-by: Eric Dumazet edumazet@google.com Acked-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Kuniyuki Iwashima kuniyu@amazon.com Link: https://patch.msgid.link/20241204125455.3871859-1-edumazet@google.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- include/net/net_namespace.h | 1 + net/core/net_namespace.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index eb0e7731f3b1c8..df95d553923937 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -80,6 +80,7 @@ struct net { * or to unregister pernet ops * (pernet_ops_rwsem write locked). */ + struct llist_node defer_free_list; struct llist_node cleanup_list; /* namespaces on death row */
#ifdef CONFIG_KEYS diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ef19a0eaa55aa3..8cadb4c860749c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -445,11 +445,28 @@ static struct net *net_alloc(void) goto out; }
+static LLIST_HEAD(defer_free_list); + +static void net_complete_free(void) +{ + struct llist_node *kill_list; + struct net *net, *next; + + /* Get the list of namespaces to free from last round. */ + kill_list = llist_del_all(&defer_free_list); + + llist_for_each_entry_safe(net, next, kill_list, defer_free_list) + kmem_cache_free(net_cachep, net); + +} + static void net_free(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); - kmem_cache_free(net_cachep, net); + + /* Wait for an extra rcu_barrier() before final free. */ + llist_add(&net->defer_free_list, &defer_free_list); } }
@@ -624,6 +641,8 @@ static void cleanup_net(struct work_struct *work) */ rcu_barrier();
+ net_complete_free(); + /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list);
[ Sasha's backport helper bot ]
Hi,
The upstream commit SHA1 provided is correct: 0f6ede9fbc747e2553612271bce108f7517e7a45
WARNING: Author mismatch between patch and upstream commit: Backport author: Vasiliy Kovalevkovalev@altlinux.org Commit author: Eric Dumazetedumazet@google.com
Status in newer kernel trees: 6.13.y | Branch not found 6.12.y | Present (different SHA1: 6610c7f8a8d4) 6.6.y | Present (different SHA1: b7a79e51297f) 6.1.y | Present (different SHA1: 3267b254dc0a) 5.15.y | Not found 5.10.y | Not found
Note: The patch differs from the upstream commit: --- Failed to apply patch cleanly, falling back to interdiff... ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.10.y | Success | Success |
linux-stable-mirror@lists.linaro.org