From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
commit 58a039e679fe72bd0efa8b2abe669a7914bb4429 upstream.
Commit ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in
remap_file_pages()") fixed a security issue, it added an LSM check when
trying to remap file pages, so that LSMs have the opportunity to evaluate
such action like for other memory operations such as mmap() and
mprotect().
However, that commit called security_mmap_file() inside the mmap_lock
lock, while the other calls do it before taking the lock, after commit
8b3ec6814c83 ("take security_mmap_file() outside of ->mmap_sem").
This caused lock inversion issue with IMA which was taking the mmap_lock
and i_mutex lock in the opposite way when the remap_file_pages() system
call was called.
Solve the issue by splitting the critical region in remap_file_pages() in
two regions: the first takes a read lock of mmap_lock, retrieves the VMA
and the file descriptor associated, and calculates the 'prot' and 'flags'
variables; the second takes a write lock on mmap_lock, checks that the VMA
flags and the VMA file descriptor are the same as the ones obtained in the
first critical region (otherwise the system call fails), and calls
do_mmap().
In between, after releasing the read lock and before taking the write
lock, call security_mmap_file(), and solve the lock inversion issue.
Link: https://lkml.kernel.org/r/20241018161415.3845146-1-roberto.sassu@huaweiclou…
Fixes: ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in remap_file_pages()")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reported-by: syzbot+1cd571a672400ef3a930(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-security-module/66f7b10e.050a0220.46d20.0036.…
Tested-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Reviewed-by: Paul Moore <paul(a)paul-moore.com>
Tested-by: syzbot+1cd571a672400ef3a930(a)syzkaller.appspotmail.com
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Dmitry Kasatkin <dmitry.kasatkin(a)gmail.com>
Cc: Eric Snowberg <eric.snowberg(a)oracle.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: "Serge E. Hallyn" <serge(a)hallyn.com>
Cc: Shu Han <ebpqwerty472123(a)gmail.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Alexey Panov <apanov(a)astralinux.ru>
---
Tested with the syzkaller reproducers for
https://syzkaller.appspot.com/bug?extid=1cd571a672400ef3a930https://syzkaller.appspot.com/bug?extid=b02bbe0ff80a09a08c1b:
the issue triggers on vanilla v6.1.y and no longer reproduces with this
patch applied.
mm/mmap.c | 69 +++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 52 insertions(+), 17 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index f93526b3df0c..5672d22be7e7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2974,6 +2974,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
unsigned long populate = 0;
unsigned long ret = -EINVAL;
struct file *file;
+ vm_flags_t vm_flags;
pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/mm/remap_file_pages.rst.\n",
current->comm, current->pid);
@@ -2990,12 +2991,60 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (pgoff + (size >> PAGE_SHIFT) < pgoff)
return ret;
- if (mmap_write_lock_killable(mm))
+ if (mmap_read_lock_killable(mm))
return -EINTR;
+ /*
+ * Look up VMA under read lock first so we can perform the security
+ * without holding locks (which can be problematic). We reacquire a
+ * write lock later and check nothing changed underneath us.
+ */
vma = vma_lookup(mm, start);
- if (!vma || !(vma->vm_flags & VM_SHARED))
+ if (!vma || !(vma->vm_flags & VM_SHARED)) {
+ mmap_read_unlock(mm);
+ return -EINVAL;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+ prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+ prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED)
+ flags |= MAP_LOCKED;
+
+ /* Save vm_flags used to calculate prot and flags, and recheck later. */
+ vm_flags = vma->vm_flags;
+ file = get_file(vma->vm_file);
+
+ mmap_read_unlock(mm);
+
+ /* Call outside mmap_lock to be consistent with other callers. */
+ ret = security_mmap_file(file, prot, flags);
+ if (ret) {
+ fput(file);
+ return ret;
+ }
+
+ ret = -EINVAL;
+
+ /* OK security check passed, take write lock + let it rip. */
+ if (mmap_write_lock_killable(mm)) {
+ fput(file);
+ return -EINTR;
+ }
+
+ vma = vma_lookup(mm, start);
+
+ if (!vma)
+ goto out;
+
+ /* Make sure things didn't change under us. */
+ if (vma->vm_flags != vm_flags)
+ goto out;
+ if (vma->vm_file != file)
goto out;
if (start + size > vma->vm_end) {
@@ -3023,25 +3072,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
goto out;
}
- prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
- prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
- prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
-
- flags &= MAP_NONBLOCK;
- flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
- if (vma->vm_flags & VM_LOCKED)
- flags |= MAP_LOCKED;
-
- file = get_file(vma->vm_file);
- ret = security_mmap_file(vma->vm_file, prot, flags);
- if (ret)
- goto out_fput;
ret = do_mmap(vma->vm_file, start, size,
prot, flags, pgoff, &populate, NULL);
-out_fput:
- fput(file);
out:
mmap_write_unlock(mm);
+ fput(file);
if (populate)
mm_populate(ret, populate);
if (!IS_ERR_VALUE(ret))
--
2.30.2
The patch titled
Subject: arm64: kernel: initialize missing kexec_buf->random field
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
arm64-kernel-initialize-missing-kexec_buf-random-field.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yeoreum Yun <yeoreum.yun(a)arm.com>
Subject: arm64: kernel: initialize missing kexec_buf->random field
Date: Thu, 27 Nov 2025 18:26:44 +0000
Commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly")
introduced the kexec_buf->random field to enable random placement of
kexec_buf.
However, this field was never properly initialized for kexec images that
do not need to be placed randomly, leading to the following UBSAN warning:
[ +0.364528] ------------[ cut here ]------------
[ +0.000019] UBSAN: invalid-load in ./include/linux/kexec.h:210:12
[ +0.000131] load of value 2 is not a valid value for type 'bool' (aka '_Bool')
[ +0.000003] CPU: 4 UID: 0 PID: 927 Comm: kexec Not tainted 6.18.0-rc7+ #3 PREEMPT(full)
[ +0.000002] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
[ +0.000000] Call trace:
[ +0.000001] show_stack+0x24/0x40 (C)
[ +0.000006] __dump_stack+0x28/0x48
[ +0.000002] dump_stack_lvl+0x7c/0xb0
[ +0.000002] dump_stack+0x18/0x34
[ +0.000001] ubsan_epilogue+0x10/0x50
[ +0.000002] __ubsan_handle_load_invalid_value+0xc8/0xd0
[ +0.000003] locate_mem_hole_callback+0x28c/0x2a0
[ +0.000003] kexec_locate_mem_hole+0xf4/0x2f0
[ +0.000001] kexec_add_buffer+0xa8/0x178
[ +0.000002] image_load+0xf0/0x258
[ +0.000001] __arm64_sys_kexec_file_load+0x510/0x718
[ +0.000002] invoke_syscall+0x68/0xe8
[ +0.000001] el0_svc_common+0xb0/0xf8
[ +0.000002] do_el0_svc+0x28/0x48
[ +0.000001] el0_svc+0x40/0xe8
[ +0.000002] el0t_64_sync_handler+0x84/0x140
[ +0.000002] el0t_64_sync+0x1bc/0x1c0
To address this, initialise kexec_buf->random field properly.
Link: https://lkml.kernel.org/r/20251127182644.1577592-1-yeoreum.yun@arm.com
Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly")
Signed-off-by: Yeoreum Yun <yeoreum.yun(a)arm.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Coiby Xu <coxu(a)redhat.com>
Cc: levi.yun <yeoreum.yun(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Jan Pazdziora <jpazdziora(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm64/kernel/kexec_image.c | 3 +++
arch/arm64/kernel/machine_kexec_file.c | 6 +++++-
2 files changed, 8 insertions(+), 1 deletion(-)
--- a/arch/arm64/kernel/kexec_image.c~arm64-kernel-initialize-missing-kexec_buf-random-field
+++ a/arch/arm64/kernel/kexec_image.c
@@ -76,6 +76,9 @@ static void *image_load(struct kimage *i
kbuf.buf_min = 0;
kbuf.buf_max = ULONG_MAX;
kbuf.top_down = false;
+#ifdef CONFIG_CRASH_DUMP
+ kbuf.random = false;
+#endif
kbuf.buffer = kernel;
kbuf.bufsz = kernel_len;
--- a/arch/arm64/kernel/machine_kexec_file.c~arm64-kernel-initialize-missing-kexec_buf-random-field
+++ a/arch/arm64/kernel/machine_kexec_file.c
@@ -94,7 +94,11 @@ int load_other_segments(struct kimage *i
char *initrd, unsigned long initrd_len,
char *cmdline)
{
- struct kexec_buf kbuf = {};
+ struct kexec_buf kbuf = {
+#ifdef CONFIG_CRASH_DUMP
+ .random = false,
+#endif
+ };
void *dtb = NULL;
unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
_
Patches currently in -mm which might be from yeoreum.yun(a)arm.com are
arm64-kernel-initialize-missing-kexec_buf-random-field.patch
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x ae155060247be8dcae3802a95bd1bdf93ab3215d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112433-making-debatable-abfe@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae155060247be8dcae3802a95bd1bdf93ab3215d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:24 +0100
Subject: [PATCH] mptcp: fix duplicate reset on fastclose
The CI reports sporadic failures of the fastclose self-tests. The root
cause is a duplicate reset, not carrying the relevant MPTCP option.
In the failing scenario the bad reset is received by the peer before
the fastclose one, preventing the reception of the latter.
Indeed there is window of opportunity at fastclose time for the
following race:
mptcp_do_fastclose
__mptcp_close_ssk
__tcp_close()
tcp_set_state() [1]
tcp_send_active_reset() [2]
After [1] the stack will send reset to in-flight data reaching the now
closed port. Such reset may race with [2].
Address the issue explicitly sending a single reset on fastclose before
explicitly moving the subflow to close status.
Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/596
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-6-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c59246c1fde6..a70267a74e3c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2409,7 +2409,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
/* flags for __mptcp_close_ssk() */
#define MPTCP_CF_PUSH BIT(1)
-#define MPTCP_CF_FASTCLOSE BIT(2)
/* be sure to send a reset only if the caller asked for it, also
* clean completely the subflow status when the subflow reaches
@@ -2420,7 +2419,7 @@ static void __mptcp_subflow_disconnect(struct sock *ssk,
unsigned int flags)
{
if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
- (flags & MPTCP_CF_FASTCLOSE)) {
+ subflow->send_fastclose) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
@@ -2467,14 +2466,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
- /* be sure to force the tcp_close path
- * to generate the egress reset
- */
- ssk->sk_lingertime = 0;
- sock_set_flag(ssk, SOCK_LINGER);
- subflow->send_fastclose = 1;
- }
+ if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
+ tcp_set_state(ssk, TCP_CLOSE);
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
@@ -2779,9 +2772,26 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
mptcp_set_state(sk, TCP_CLOSE);
- mptcp_for_each_subflow_safe(msk, subflow, tmp)
- __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
- subflow, MPTCP_CF_FASTCLOSE);
+
+ /* Explicitly send the fastclose reset as need */
+ if (__mptcp_check_fallback(msk))
+ return;
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* Some subflow socket states don't allow/need a reset.*/
+ if ((1 << ssk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto unlock;
+
+ subflow->send_fastclose = 1;
+ tcp_send_active_reset(ssk, ssk->sk_allocation,
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
+unlock:
+ release_sock(ssk);
+ }
}
static void mptcp_worker(struct work_struct *work)