6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Omar Sandoval <osandov(a)fb.com>
commit 4da3768e1820cf15cced390242d8789aed34f54d upstream.
When re-injecting a soft interrupt from an INT3, INT0, or (select) INTn
instruction, discard the exception and retry the instruction if the code
stream is changed (e.g. by a different vCPU) between when the CPU
executes the instruction and when KVM decodes the instruction to get the
next RIP.
As effectively predicted by commit 6ef88d6e36c2 ("KVM: SVM: Re-inject
INT3/INTO instead of retrying the instruction"), failure to verify that
the correct INTn instruction was decoded can effectively clobber guest
state due to decoding the wrong instruction and thus specifying the
wrong next RIP.
The bug most often manifests as "Oops: int3" panics on static branch
checks in Linux guests. Enabling or disabling a static branch in Linux
uses the kernel's "text poke" code patching mechanism. To modify code
while other CPUs may be executing that code, Linux (temporarily)
replaces the first byte of the original instruction with an int3 (opcode
0xcc), then patches in the new code stream except for the first byte,
and finally replaces the int3 with the first byte of the new code
stream. If a CPU hits the int3, i.e. executes the code while it's being
modified, then the guest kernel must look up the RIP to determine how to
handle the #BP, e.g. by emulating the new instruction. If the RIP is
incorrect, then this lookup fails and the guest kernel panics.
The bug reproduces almost instantly by hacking the guest kernel to
repeatedly check a static branch[1] while running a drgn script[2] on
the host to constantly swap out the memory containing the guest's TSS.
[1]: https://gist.github.com/osandov/44d17c51c28c0ac998ea0334edf90b5a
[2]: https://gist.github.com/osandov/10e45e45afa29b11e0c7209247afc00b
Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
Cc: stable(a)vger.kernel.org
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Link: https://patch.msgid.link/1cc6dcdf36e3add7ee7c8d90ad58414eeb6c3d34.176227876…
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/kvm_host.h | 9 +++++++++
arch/x86/kvm/svm/svm.c | 24 +++++++++++++-----------
arch/x86/kvm/x86.c | 21 +++++++++++++++++++++
3 files changed, 43 insertions(+), 11 deletions(-)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2059,6 +2059,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* the gfn, i.e. retrying the instruction will hit a
* !PRESENT fault, which results in a new shadow page
* and sends KVM back to square one.
+ *
+ * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip
+ * an instruction if it could generate a given software
+ * interrupt, which must be encoded via
+ * EMULTYPE_SET_SOFT_INT_VECTOR().
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
@@ -2069,6 +2074,10 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_PF (1 << 6)
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+#define EMULTYPE_SKIP_SOFT_INT (1 << 9)
+
+#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16)
+#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -369,6 +369,7 @@ static void svm_set_interrupt_shadow(str
}
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
+ int emul_type,
bool commit_side_effects)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -390,7 +391,7 @@ static int __svm_skip_emulated_instructi
if (unlikely(!commit_side_effects))
old_rflags = svm->vmcb->save.rflags;
- if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+ if (!kvm_emulate_instruction(vcpu, emul_type))
return 0;
if (unlikely(!commit_side_effects))
@@ -408,11 +409,13 @@ done:
static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
- return __svm_skip_emulated_instruction(vcpu, true);
+ return __svm_skip_emulated_instruction(vcpu, EMULTYPE_SKIP, true);
}
-static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
+static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu, u8 vector)
{
+ const int emul_type = EMULTYPE_SKIP | EMULTYPE_SKIP_SOFT_INT |
+ EMULTYPE_SET_SOFT_INT_VECTOR(vector);
unsigned long rip, old_rip = kvm_rip_read(vcpu);
struct vcpu_svm *svm = to_svm(vcpu);
@@ -428,7 +431,7 @@ static int svm_update_soft_interrupt_rip
* in use, the skip must not commit any side effects such as clearing
* the interrupt shadow or RFLAGS.RF.
*/
- if (!__svm_skip_emulated_instruction(vcpu, !nrips))
+ if (!__svm_skip_emulated_instruction(vcpu, emul_type, !nrips))
return -EIO;
rip = kvm_rip_read(vcpu);
@@ -464,7 +467,7 @@ static void svm_inject_exception(struct
kvm_deliver_exception_payload(vcpu, ex);
if (kvm_exception_is_soft(ex->vector) &&
- svm_update_soft_interrupt_rip(vcpu))
+ svm_update_soft_interrupt_rip(vcpu, ex->vector))
return;
svm->vmcb->control.event_inj = ex->vector
@@ -3743,11 +3746,12 @@ static bool svm_set_vnmi_pending(struct
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
+ struct kvm_queued_interrupt *intr = &vcpu->arch.interrupt;
struct vcpu_svm *svm = to_svm(vcpu);
u32 type;
- if (vcpu->arch.interrupt.soft) {
- if (svm_update_soft_interrupt_rip(vcpu))
+ if (intr->soft) {
+ if (svm_update_soft_interrupt_rip(vcpu, intr->nr))
return;
type = SVM_EVTINJ_TYPE_SOFT;
@@ -3755,12 +3759,10 @@ static void svm_inject_irq(struct kvm_vc
type = SVM_EVTINJ_TYPE_INTR;
}
- trace_kvm_inj_virq(vcpu->arch.interrupt.nr,
- vcpu->arch.interrupt.soft, reinjected);
+ trace_kvm_inj_virq(intr->nr, intr->soft, reinjected);
++vcpu->stat.irq_injections;
- svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
- SVM_EVTINJ_VALID | type;
+ svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
}
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9055,6 +9055,23 @@ static bool is_vmware_backdoor_opcode(st
return false;
}
+static bool is_soft_int_instruction(struct x86_emulate_ctxt *ctxt,
+ int emulation_type)
+{
+ u8 vector = EMULTYPE_GET_SOFT_INT_VECTOR(emulation_type);
+
+ switch (ctxt->b) {
+ case 0xcc:
+ return vector == BP_VECTOR;
+ case 0xcd:
+ return vector == ctxt->src.val;
+ case 0xce:
+ return vector == OF_VECTOR;
+ default:
+ return false;
+ }
+}
+
/*
* Decode an instruction for emulation. The caller is responsible for handling
* code breakpoints. Note, manually detecting code breakpoints is unnecessary
@@ -9156,6 +9173,10 @@ int x86_emulate_instruction(struct kvm_v
* injecting single-step #DBs.
*/
if (emulation_type & EMULTYPE_SKIP) {
+ if (emulation_type & EMULTYPE_SKIP_SOFT_INT &&
+ !is_soft_int_instruction(ctxt, emulation_type))
+ return 0;
+
if (ctxt->mode != X86EMUL_MODE_PROT64)
ctxt->eip = (u32)ctxt->_eip;
else
6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
commit a51f025b5038abd3d22eed2ede4cd46793d89565 upstream.
Syzbot identified an issue [1] in pcl818_ai_cancel(), which stems from
the fact that in case of early device detach via pcl818_detach(),
subdevice dev->read_subdev may not have initialized its pointer to
&struct comedi_async as intended. Thus, any such dereferencing of
&s->async->cmd will lead to general protection fault and kernel crash.
Mitigate this problem by removing a call to pcl818_ai_cancel() from
pcl818_detach() altogether. This way, if the subdevice setups its
support for async commands, everything async-related will be
handled via subdevice's own ->cancel() function in
comedi_device_detach_locked() even before pcl818_detach(). If no
support for asynchronous commands is provided, there is no need
to cancel anything either.
[1] Syzbot crash:
Oops: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
CPU: 1 UID: 0 PID: 6050 Comm: syz.0.18 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025
RIP: 0010:pcl818_ai_cancel+0x69/0x3f0 drivers/comedi/drivers/pcl818.c:762
...
Call Trace:
<TASK>
pcl818_detach+0x66/0xd0 drivers/comedi/drivers/pcl818.c:1115
comedi_device_detach_locked+0x178/0x750 drivers/comedi/drivers.c:207
do_devconfig_ioctl drivers/comedi/comedi_fops.c:848 [inline]
comedi_unlocked_ioctl+0xcde/0x1020 drivers/comedi/comedi_fops.c:2178
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:597 [inline]
...
Reported-by: syzbot+fce5d9d5bd067d6fbe9b(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=fce5d9d5bd067d6fbe9b
Fixes: 00aba6e7b565 ("staging: comedi: pcl818: remove 'neverending_ai' from private data")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://patch.msgid.link/20251023141457.398685-1-n.zhandarovich@fintech.ru
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/comedi/drivers/pcl818.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
--- a/drivers/comedi/drivers/pcl818.c
+++ b/drivers/comedi/drivers/pcl818.c
@@ -1111,10 +1111,9 @@ static void pcl818_detach(struct comedi_
{
struct pcl818_private *devpriv = dev->private;
- if (devpriv) {
- pcl818_ai_cancel(dev, dev->read_subdev);
+ if (devpriv)
pcl818_reset(dev);
- }
+
pcl818_free_dma(dev);
comedi_legacy_detach(dev);
}
6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Ye Bin <yebin10(a)huawei.com>
commit 986835bf4d11032bba4ab8414d18fce038c61bb4 upstream.
There's issue when file system corrupted:
------------[ cut here ]------------
kernel BUG at fs/jbd2/transaction.c:1289!
Oops: invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 5 UID: 0 PID: 2031 Comm: mkdir Not tainted 6.18.0-rc1-next
RIP: 0010:jbd2_journal_get_create_access+0x3b6/0x4d0
RSP: 0018:ffff888117aafa30 EFLAGS: 00010202
RAX: 0000000000000000 RBX: ffff88811a86b000 RCX: ffffffff89a63534
RDX: 1ffff110200ec602 RSI: 0000000000000004 RDI: ffff888100763010
RBP: ffff888100763000 R08: 0000000000000001 R09: ffff888100763028
R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000000
R13: ffff88812c432000 R14: ffff88812c608000 R15: ffff888120bfc000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f91d6970c99 CR3: 00000001159c4000 CR4: 00000000000006f0
Call Trace:
<TASK>
__ext4_journal_get_create_access+0x42/0x170
ext4_getblk+0x319/0x6f0
ext4_bread+0x11/0x100
ext4_append+0x1e6/0x4a0
ext4_init_new_dir+0x145/0x1d0
ext4_mkdir+0x326/0x920
vfs_mkdir+0x45c/0x740
do_mkdirat+0x234/0x2f0
__x64_sys_mkdir+0xd6/0x120
do_syscall_64+0x5f/0xfa0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The above issue occurs with us in errors=continue mode when accompanied by
storage failures. There have been many inconsistencies in the file system
data.
In the case of file system data inconsistency, for example, if the block
bitmap of a referenced block is not set, it can lead to the situation where
a block being committed is allocated and used again. As a result, the
following condition will not be satisfied then trigger BUG_ON. Of course,
it is entirely possible to construct a problematic image that can trigger
this BUG_ON through specific operations. In fact, I have constructed such
an image and easily reproduced this issue.
Therefore, J_ASSERT() holds true only under ideal conditions, but it may
not necessarily be satisfied in exceptional scenarios. Using J_ASSERT()
directly in abnormal situations would cause the system to crash, which is
clearly not what we want. So here we directly trigger a JBD abort instead
of immediately invoking BUG_ON.
Fixes: 470decc613ab ("[PATCH] jbd2: initial copy of files from jbd")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251025072657.307851-1-yebin(a)huaweicloud.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/jbd2/transaction.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1288,14 +1288,23 @@ int jbd2_journal_get_create_access(handl
* committing transaction's lists, but it HAS to be in Forget state in
* that case: the transaction must have deleted the buffer for it to be
* reused here.
+ * In the case of file system data inconsistency, for example, if the
+ * block bitmap of a referenced block is not set, it can lead to the
+ * situation where a block being committed is allocated and used again.
+ * As a result, the following condition will not be satisfied, so here
+ * we directly trigger a JBD abort instead of immediately invoking
+ * bugon.
*/
spin_lock(&jh->b_state_lock);
- J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
- jh->b_transaction == NULL ||
- (jh->b_transaction == journal->j_committing_transaction &&
- jh->b_jlist == BJ_Forget)));
+ if (!(jh->b_transaction == transaction || jh->b_transaction == NULL ||
+ (jh->b_transaction == journal->j_committing_transaction &&
+ jh->b_jlist == BJ_Forget)) || jh->b_next_transaction != NULL) {
+ err = -EROFS;
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_abort(journal, err);
+ goto out;
+ }
- J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
if (jh->b_transaction == NULL) {
6.17-stable review patch. If anyone has any objections, please let me know.
------------------
From: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
commit c14ecb555c3ee80eeb030a4e46d00e679537f03a upstream.
KCSAN reports:
BUG: KCSAN: data-race in do_raw_write_lock / do_raw_write_lock
write (marked) to 0xffff800009cf504c of 4 bytes by task 1102 on cpu 1:
do_raw_write_lock+0x120/0x204
_raw_write_lock_irq
do_exit
call_usermodehelper_exec_async
ret_from_fork
read to 0xffff800009cf504c of 4 bytes by task 1103 on cpu 0:
do_raw_write_lock+0x88/0x204
_raw_write_lock_irq
do_exit
call_usermodehelper_exec_async
ret_from_fork
value changed: 0xffffffff -> 0x00000001
Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 1103 Comm: kworker/u4:1 6.1.111
Commit 1a365e822372 ("locking/spinlock/debug: Fix various data races") has
adressed most of these races, but seems to be not consistent/not complete.
>From do_raw_write_lock() only debug_write_lock_after() part has been
converted to WRITE_ONCE(), but not debug_write_lock_before() part.
Do it now.
Fixes: 1a365e822372 ("locking/spinlock/debug: Fix various data races")
Reported-by: Adrian Freihofer <adrian.freihofer(a)siemens.com>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
Signed-off-by: Boqun Feng <boqun.feng(a)gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reviewed-by: Paul E. McKenney <paulmck(a)kernel.org>
Acked-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/locking/spinlock_debug.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -184,8 +184,8 @@ void do_raw_read_unlock(rwlock_t *lock)
static inline void debug_write_lock_before(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
- RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
- RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ RWLOCK_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+ RWLOCK_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
lock, "cpu recursion");
}
6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Bagas Sanjaya <bagasdotme(a)gmail.com>
commit ba2457109d5b47a90fe565b39524f7225fc23e60 upstream.
Sasha has also maintaining stable branch in conjunction with Greg
since cb5d21946d2a2f ("MAINTAINERS: Add Sasha as a stable branch
maintainer"). Mention him in 2.Process.rst.
Cc: stable(a)vger.kernel.org
Signed-off-by: Bagas Sanjaya <bagasdotme(a)gmail.com>
Reviewed-by: Randy Dunlap <rdunlap(a)infradead.org>
Acked-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Jonathan Corbet <corbet(a)lwn.net>
Message-ID: <20251022034336.22839-1-bagasdotme(a)gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
Documentation/process/2.Process.rst | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -104,8 +104,10 @@ kernels go out with a handful of known r
of them are serious.
Once a stable release is made, its ongoing maintenance is passed off to the
-"stable team," currently Greg Kroah-Hartman. The stable team will release
-occasional updates to the stable release using the 5.x.y numbering scheme.
+"stable team," currently consists of Greg Kroah-Hartman and Sasha Levin. The
+stable team will release occasional updates to the stable release using the
+5.x.y numbering scheme.
+
To be considered for an update release, a patch must (1) fix a significant
bug, and (2) already be merged into the mainline for the next development
kernel. Kernels will typically receive stable updates for a little more
6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Sabrina Dubroca <sd(a)queasysnail.net>
[ Upstream commit 42e42562c9cfcdacf000f1b42284a4fad24f8546 ]
While reverting commit f75a2804da39 ("xfrm: destroy xfrm_state
synchronously on net exit path"), I incorrectly changed
xfrm_state_flush's "proto" argument back to IPSEC_PROTO_ANY. This
reverts some of the changes in commit dbb2483b2a46 ("xfrm: clean up
xfrm protocol checks"), and leads to some states not being removed
when we exit the netns.
Pass 0 instead of IPSEC_PROTO_ANY from both xfrm_state_fini
xfrm6_tunnel_net_exit, so that xfrm_state_flush deletes all states.
Fixes: 2a198bbec691 ("Revert "xfrm: destroy xfrm_state synchronously on net exit path"")
Reported-by: syzbot+6641a61fe0e2e89ae8c5(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6641a61fe0e2e89ae8c5
Tested-by: syzbot+6641a61fe0e2e89ae8c5(a)syzkaller.appspotmail.com
Signed-off-by: Sabrina Dubroca <sd(a)queasysnail.net>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
net/ipv6/xfrm6_tunnel.c | 2 +-
net/xfrm/xfrm_state.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5120a763da0d9..0a0eeaed05910 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -334,7 +334,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
+ xfrm_state_flush(net, 0, false);
xfrm_flush_gc();
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3a79ebcbbc369..b9bac68364527 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -3214,7 +3214,7 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
+ xfrm_state_flush(net, 0, false);
flush_work(&xfrm_state_gc_work);
WARN_ON(!list_empty(&net->xfrm.state_all));
--
2.51.0
6.17-stable review patch. If anyone has any objections, please let me know.
------------------
From: Qianchang Zhao <pioooooooooip(a)gmail.com>
commit 1fab1fa091f5aa97265648b53ea031deedd26235 upstream.
ipc_msg_send_request() waits for a generic netlink reply using an
ipc_msg_table_entry on the stack. The generic netlink handler
(handle_generic_event()/handle_response()) fills entry->response under
ipc_msg_table_lock, but ipc_msg_send_request() used to validate and free
entry->response without holding the same lock.
Under high concurrency this allows a race where handle_response() is
copying data into entry->response while ipc_msg_send_request() has just
freed it, leading to a slab-use-after-free reported by KASAN in
handle_generic_event():
BUG: KASAN: slab-use-after-free in handle_generic_event+0x3c4/0x5f0 [ksmbd]
Write of size 12 at addr ffff888198ee6e20 by task pool/109349
...
Freed by task:
kvfree
ipc_msg_send_request [ksmbd]
ksmbd_rpc_open -> ksmbd_session_rpc_open [ksmbd]
Fix by:
- Taking ipc_msg_table_lock in ipc_msg_send_request() while validating
entry->response, freeing it when invalid, and removing the entry from
ipc_msg_table.
- Returning the final entry->response pointer to the caller only after
the hash entry is removed under the lock.
- Returning NULL in the error path, preserving the original API
semantics.
This makes all accesses to entry->response consistent with
handle_response(), which already updates and fills the response buffer
under ipc_msg_table_lock, and closes the race that allowed the UAF.
Cc: stable(a)vger.kernel.org
Reported-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Reported-by: Zhitong Liu <liuzhitong1993(a)gmail.com>
Signed-off-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/smb/server/transport_ipc.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -553,12 +553,16 @@ static void *ipc_msg_send_request(struct
up_write(&ipc_msg_table_lock);
ret = ipc_msg_send(msg);
- if (ret)
+ if (ret) {
+ down_write(&ipc_msg_table_lock);
goto out;
+ }
ret = wait_event_interruptible_timeout(entry.wait,
entry.response != NULL,
IPC_WAIT_TIMEOUT);
+
+ down_write(&ipc_msg_table_lock);
if (entry.response) {
ret = ipc_validate_msg(&entry);
if (ret) {
@@ -567,7 +571,6 @@ static void *ipc_msg_send_request(struct
}
}
out:
- down_write(&ipc_msg_table_lock);
hash_del(&entry.ipc_table_hlist);
up_write(&ipc_msg_table_lock);
return entry.response;