The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6eb350a2233100a283f882c023e5ad426d0ed63b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101556-riches-vivacious-ee8f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6eb350a2233100a283f882c023e5ad426d0ed63b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Wed, 13 Aug 2025 17:02:30 +0200
Subject: [PATCH] rseq: Protect event mask against membarrier IPI
rseq_need_restart() reads and clears task::rseq_event_mask with preemption
disabled to guard against the scheduler.
But membarrier() uses an IPI and sets the PREEMPT bit in the event mask
from the IPI, which leaves that RMW operation unprotected.
Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Boqun Feng <boqun.feng(a)gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: stable(a)vger.kernel.org
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3eb5598..1fbeb61babeb 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
diff --git a/kernel/rseq.c b/kernel/rseq.c
index b7a1ec327e81..2452b7366b00 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -342,12 +342,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
/*
* Load and clear event mask atomically with respect to
- * scheduler preemption.
+ * scheduler preemption and membarrier IPIs.
*/
- preempt_disable();
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
return !!event_mask;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6eb350a2233100a283f882c023e5ad426d0ed63b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101556-district-timid-1eda@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6eb350a2233100a283f882c023e5ad426d0ed63b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Wed, 13 Aug 2025 17:02:30 +0200
Subject: [PATCH] rseq: Protect event mask against membarrier IPI
rseq_need_restart() reads and clears task::rseq_event_mask with preemption
disabled to guard against the scheduler.
But membarrier() uses an IPI and sets the PREEMPT bit in the event mask
from the IPI, which leaves that RMW operation unprotected.
Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Boqun Feng <boqun.feng(a)gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: stable(a)vger.kernel.org
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3eb5598..1fbeb61babeb 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
diff --git a/kernel/rseq.c b/kernel/rseq.c
index b7a1ec327e81..2452b7366b00 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -342,12 +342,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
/*
* Load and clear event mask atomically with respect to
- * scheduler preemption.
+ * scheduler preemption and membarrier IPIs.
*/
- preempt_disable();
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
return !!event_mask;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5973a62efa34c80c9a4e5eac1fca6f6209b902af
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101551-unsealed-whisking-8514@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5973a62efa34c80c9a4e5eac1fca6f6209b902af Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Fri, 19 Sep 2025 14:27:51 -0700
Subject: [PATCH] arm64: map [_text, _stext) virtual address range
non-executable+read-only
Since the referenced fixes commit, the kernel's .text section is only
mapped starting from _stext; the region [_text, _stext) is omitted. As a
result, other vmalloc/vmap allocations may use the virtual addresses
nominally in the range [_text, _stext). This address reuse confuses
multiple things:
1. crash_prepare_elf64_headers() sets up a segment in /proc/vmcore
mapping the entire range [_text, _end) to
[__pa_symbol(_text), __pa_symbol(_end)). Reading an address in
[_text, _stext) from /proc/vmcore therefore gives the incorrect
result.
2. Tools doing symbolization (either by reading /proc/kallsyms or based
on the vmlinux ELF file) will incorrectly identify vmalloc/vmap
allocations in [_text, _stext) as kernel symbols.
In practice, both of these issues affect the drgn debugger.
Specifically, there were cases where the vmap IRQ stacks for some CPUs
were allocated in [_text, _stext). As a result, drgn could not get the
stack trace for a crash in an IRQ handler because the core dump
contained invalid data for the IRQ stack address. The stack addresses
were also symbolized as being in the _text symbol.
Fix this by bringing back the mapping of [_text, _stext), but now make
it non-executable and read-only. This prevents other allocations from
using it while still achieving the original goal of not mapping
unpredictable data as executable. Other than the changed protection,
this is effectively a revert of the fixes commit.
Fixes: e2a073dde921 ("arm64: omit [_text, _stext) from permanent kernel mapping")
Cc: stable(a)vger.kernel.org
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index e6d35eff1486..e8ddbde31a83 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
+ /*
+ * [_stext, _text) isn't executed after boot and contains some
+ * non-executable, unpredictable data, so map it non-executable.
+ */
+ map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
+ false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 77c7926a4df6..23c05dc7a8f2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_stext);
+ kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -280,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- setup_initial_init_mm(_stext, _etext, _edata, _end);
+ setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 70c2ca813c18..524d34a0e921 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -279,7 +279,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
- memblock_reserve(__pa_symbol(_stext), _end - _stext);
+ memblock_reserve(__pa_symbol(_text), _end - _text);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0ba1a15e7e74..10c258099581 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -965,8 +965,8 @@ void __init mark_linear_text_alias_ro(void)
/*
* Remove the write permissions from the linear alias of .text/.rodata
*/
- update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
- (unsigned long)__init_begin - (unsigned long)_stext,
+ update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
+ (unsigned long)__init_begin - (unsigned long)_text,
PAGE_KERNEL_RO);
}
@@ -1037,7 +1037,7 @@ static inline bool force_pte_mapping(void)
static void __init map_mem(pgd_t *pgdp)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
- phys_addr_t kernel_start = __pa_symbol(_stext);
+ phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
phys_addr_t start, end;
phys_addr_t early_kfence_pool;
@@ -1086,7 +1086,7 @@ static void __init map_mem(pgd_t *pgdp)
}
/*
- * Map the linear alias of the [_stext, __init_begin) interval
+ * Map the linear alias of the [_text, __init_begin) interval
* as non-executable now, and remove the write permission in
* mark_linear_text_alias_ro() below (which will be called after
* alternative patching has completed). This makes the contents
@@ -1113,6 +1113,10 @@ void mark_rodata_ro(void)
WRITE_ONCE(rodata_is_rw, false);
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
+ /* mark the range between _text and _stext as read only. */
+ update_mapping_prot(__pa_symbol(_text), (unsigned long)_text,
+ (unsigned long)_stext - (unsigned long)_text,
+ PAGE_KERNEL_RO);
}
static void __init declare_vma(struct vm_struct *vma,
@@ -1183,7 +1187,7 @@ static void __init declare_kernel_vmas(void)
{
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
- declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
+ declare_vma(&vmlinux_seg[0], _text, _etext, VM_NO_GUARD);
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5973a62efa34c80c9a4e5eac1fca6f6209b902af
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101549-slurp-darkened-955a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5973a62efa34c80c9a4e5eac1fca6f6209b902af Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Fri, 19 Sep 2025 14:27:51 -0700
Subject: [PATCH] arm64: map [_text, _stext) virtual address range
non-executable+read-only
Since the referenced fixes commit, the kernel's .text section is only
mapped starting from _stext; the region [_text, _stext) is omitted. As a
result, other vmalloc/vmap allocations may use the virtual addresses
nominally in the range [_text, _stext). This address reuse confuses
multiple things:
1. crash_prepare_elf64_headers() sets up a segment in /proc/vmcore
mapping the entire range [_text, _end) to
[__pa_symbol(_text), __pa_symbol(_end)). Reading an address in
[_text, _stext) from /proc/vmcore therefore gives the incorrect
result.
2. Tools doing symbolization (either by reading /proc/kallsyms or based
on the vmlinux ELF file) will incorrectly identify vmalloc/vmap
allocations in [_text, _stext) as kernel symbols.
In practice, both of these issues affect the drgn debugger.
Specifically, there were cases where the vmap IRQ stacks for some CPUs
were allocated in [_text, _stext). As a result, drgn could not get the
stack trace for a crash in an IRQ handler because the core dump
contained invalid data for the IRQ stack address. The stack addresses
were also symbolized as being in the _text symbol.
Fix this by bringing back the mapping of [_text, _stext), but now make
it non-executable and read-only. This prevents other allocations from
using it while still achieving the original goal of not mapping
unpredictable data as executable. Other than the changed protection,
this is effectively a revert of the fixes commit.
Fixes: e2a073dde921 ("arm64: omit [_text, _stext) from permanent kernel mapping")
Cc: stable(a)vger.kernel.org
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index e6d35eff1486..e8ddbde31a83 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
+ /*
+ * [_stext, _text) isn't executed after boot and contains some
+ * non-executable, unpredictable data, so map it non-executable.
+ */
+ map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
+ false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 77c7926a4df6..23c05dc7a8f2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_stext);
+ kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -280,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- setup_initial_init_mm(_stext, _etext, _edata, _end);
+ setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 70c2ca813c18..524d34a0e921 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -279,7 +279,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
- memblock_reserve(__pa_symbol(_stext), _end - _stext);
+ memblock_reserve(__pa_symbol(_text), _end - _text);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0ba1a15e7e74..10c258099581 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -965,8 +965,8 @@ void __init mark_linear_text_alias_ro(void)
/*
* Remove the write permissions from the linear alias of .text/.rodata
*/
- update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
- (unsigned long)__init_begin - (unsigned long)_stext,
+ update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
+ (unsigned long)__init_begin - (unsigned long)_text,
PAGE_KERNEL_RO);
}
@@ -1037,7 +1037,7 @@ static inline bool force_pte_mapping(void)
static void __init map_mem(pgd_t *pgdp)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
- phys_addr_t kernel_start = __pa_symbol(_stext);
+ phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
phys_addr_t start, end;
phys_addr_t early_kfence_pool;
@@ -1086,7 +1086,7 @@ static void __init map_mem(pgd_t *pgdp)
}
/*
- * Map the linear alias of the [_stext, __init_begin) interval
+ * Map the linear alias of the [_text, __init_begin) interval
* as non-executable now, and remove the write permission in
* mark_linear_text_alias_ro() below (which will be called after
* alternative patching has completed). This makes the contents
@@ -1113,6 +1113,10 @@ void mark_rodata_ro(void)
WRITE_ONCE(rodata_is_rw, false);
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
+ /* mark the range between _text and _stext as read only. */
+ update_mapping_prot(__pa_symbol(_text), (unsigned long)_text,
+ (unsigned long)_stext - (unsigned long)_text,
+ PAGE_KERNEL_RO);
}
static void __init declare_vma(struct vm_struct *vma,
@@ -1183,7 +1187,7 @@ static void __init declare_kernel_vmas(void)
{
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
- declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
+ declare_vma(&vmlinux_seg[0], _text, _etext, VM_NO_GUARD);
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 5973a62efa34c80c9a4e5eac1fca6f6209b902af
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101548-stiffly-eagle-d9a5@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5973a62efa34c80c9a4e5eac1fca6f6209b902af Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Fri, 19 Sep 2025 14:27:51 -0700
Subject: [PATCH] arm64: map [_text, _stext) virtual address range
non-executable+read-only
Since the referenced fixes commit, the kernel's .text section is only
mapped starting from _stext; the region [_text, _stext) is omitted. As a
result, other vmalloc/vmap allocations may use the virtual addresses
nominally in the range [_text, _stext). This address reuse confuses
multiple things:
1. crash_prepare_elf64_headers() sets up a segment in /proc/vmcore
mapping the entire range [_text, _end) to
[__pa_symbol(_text), __pa_symbol(_end)). Reading an address in
[_text, _stext) from /proc/vmcore therefore gives the incorrect
result.
2. Tools doing symbolization (either by reading /proc/kallsyms or based
on the vmlinux ELF file) will incorrectly identify vmalloc/vmap
allocations in [_text, _stext) as kernel symbols.
In practice, both of these issues affect the drgn debugger.
Specifically, there were cases where the vmap IRQ stacks for some CPUs
were allocated in [_text, _stext). As a result, drgn could not get the
stack trace for a crash in an IRQ handler because the core dump
contained invalid data for the IRQ stack address. The stack addresses
were also symbolized as being in the _text symbol.
Fix this by bringing back the mapping of [_text, _stext), but now make
it non-executable and read-only. This prevents other allocations from
using it while still achieving the original goal of not mapping
unpredictable data as executable. Other than the changed protection,
this is effectively a revert of the fixes commit.
Fixes: e2a073dde921 ("arm64: omit [_text, _stext) from permanent kernel mapping")
Cc: stable(a)vger.kernel.org
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index e6d35eff1486..e8ddbde31a83 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
+ /*
+ * [_stext, _text) isn't executed after boot and contains some
+ * non-executable, unpredictable data, so map it non-executable.
+ */
+ map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
+ false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 77c7926a4df6..23c05dc7a8f2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_stext);
+ kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -280,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- setup_initial_init_mm(_stext, _etext, _edata, _end);
+ setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 70c2ca813c18..524d34a0e921 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -279,7 +279,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
- memblock_reserve(__pa_symbol(_stext), _end - _stext);
+ memblock_reserve(__pa_symbol(_text), _end - _text);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0ba1a15e7e74..10c258099581 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -965,8 +965,8 @@ void __init mark_linear_text_alias_ro(void)
/*
* Remove the write permissions from the linear alias of .text/.rodata
*/
- update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
- (unsigned long)__init_begin - (unsigned long)_stext,
+ update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
+ (unsigned long)__init_begin - (unsigned long)_text,
PAGE_KERNEL_RO);
}
@@ -1037,7 +1037,7 @@ static inline bool force_pte_mapping(void)
static void __init map_mem(pgd_t *pgdp)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
- phys_addr_t kernel_start = __pa_symbol(_stext);
+ phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
phys_addr_t start, end;
phys_addr_t early_kfence_pool;
@@ -1086,7 +1086,7 @@ static void __init map_mem(pgd_t *pgdp)
}
/*
- * Map the linear alias of the [_stext, __init_begin) interval
+ * Map the linear alias of the [_text, __init_begin) interval
* as non-executable now, and remove the write permission in
* mark_linear_text_alias_ro() below (which will be called after
* alternative patching has completed). This makes the contents
@@ -1113,6 +1113,10 @@ void mark_rodata_ro(void)
WRITE_ONCE(rodata_is_rw, false);
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
+ /* mark the range between _text and _stext as read only. */
+ update_mapping_prot(__pa_symbol(_text), (unsigned long)_text,
+ (unsigned long)_stext - (unsigned long)_text,
+ PAGE_KERNEL_RO);
}
static void __init declare_vma(struct vm_struct *vma,
@@ -1183,7 +1187,7 @@ static void __init declare_kernel_vmas(void)
{
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
- declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
+ declare_vma(&vmlinux_seg[0], _text, _etext, VM_NO_GUARD);
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 8e7e265d558e0257d6dacc78ec64aff4ba75f61e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101502-elf-bootie-19bf@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e7e265d558e0257d6dacc78ec64aff4ba75f61e Mon Sep 17 00:00:00 2001
From: Charalampos Mitrodimas <charmitro(a)posteo.net>
Date: Sat, 16 Aug 2025 14:14:37 +0000
Subject: [PATCH] debugfs: fix mount options not being applied
Mount options (uid, gid, mode) are silently ignored when debugfs is
mounted. This is a regression introduced during the conversion to the
new mount API.
When the mount API conversion was done, the parsed options were never
applied to the superblock when it was reused. As a result, the mount
options were ignored when debugfs was mounted.
Fix this by following the same pattern as the tracefs fix in commit
e4d32142d1de ("tracing: Fix tracefs mount options"). Call
debugfs_reconfigure() in debugfs_get_tree() to apply the mount options
to the superblock after it has been created or reused.
As an example, with the bug the "mode" mount option is ignored:
$ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test
$ mount | grep debugfs_test
debugfs on /tmp/debugfs_test type debugfs (rw,relatime)
$ ls -ld /tmp/debugfs_test
drwx------ 25 root root 0 Aug 4 14:16 /tmp/debugfs_test
With the fix applied, it works as expected:
$ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test
$ mount | grep debugfs_test
debugfs on /tmp/debugfs_test type debugfs (rw,relatime,mode=666)
$ ls -ld /tmp/debugfs_test
drw-rw-rw- 37 root root 0 Aug 2 17:28 /tmp/debugfs_test
Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220406
Cc: stable(a)vger.kernel.org
Reviewed-by: Eric Sandeen <sandeen(a)redhat.com>
Signed-off-by: Charalampos Mitrodimas <charmitro(a)posteo.net>
Link: https://lore.kernel.org/20250816-debugfs-mount-opts-v3-1-d271dad57b5b@poste…
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a0357b0cf362..c12d649df6a5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -183,6 +183,9 @@ static int debugfs_reconfigure(struct fs_context *fc)
struct debugfs_fs_info *sb_opts = sb->s_fs_info;
struct debugfs_fs_info *new_opts = fc->s_fs_info;
+ if (!new_opts)
+ return 0;
+
sync_filesystem(sb);
/* structure copy of new mount options to sb */
@@ -282,10 +285,16 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc)
static int debugfs_get_tree(struct fs_context *fc)
{
+ int err;
+
if (!(debugfs_allow & DEBUGFS_ALLOW_API))
return -EPERM;
- return get_tree_single(fc, debugfs_fill_super);
+ err = get_tree_single(fc, debugfs_fill_super);
+ if (err)
+ return err;
+
+ return debugfs_reconfigure(fc);
}
static void debugfs_free_fc(struct fs_context *fc)
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x 8e7e265d558e0257d6dacc78ec64aff4ba75f61e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101559-usher-corroding-5b5a@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e7e265d558e0257d6dacc78ec64aff4ba75f61e Mon Sep 17 00:00:00 2001
From: Charalampos Mitrodimas <charmitro(a)posteo.net>
Date: Sat, 16 Aug 2025 14:14:37 +0000
Subject: [PATCH] debugfs: fix mount options not being applied
Mount options (uid, gid, mode) are silently ignored when debugfs is
mounted. This is a regression introduced during the conversion to the
new mount API.
When the mount API conversion was done, the parsed options were never
applied to the superblock when it was reused. As a result, the mount
options were ignored when debugfs was mounted.
Fix this by following the same pattern as the tracefs fix in commit
e4d32142d1de ("tracing: Fix tracefs mount options"). Call
debugfs_reconfigure() in debugfs_get_tree() to apply the mount options
to the superblock after it has been created or reused.
As an example, with the bug the "mode" mount option is ignored:
$ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test
$ mount | grep debugfs_test
debugfs on /tmp/debugfs_test type debugfs (rw,relatime)
$ ls -ld /tmp/debugfs_test
drwx------ 25 root root 0 Aug 4 14:16 /tmp/debugfs_test
With the fix applied, it works as expected:
$ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test
$ mount | grep debugfs_test
debugfs on /tmp/debugfs_test type debugfs (rw,relatime,mode=666)
$ ls -ld /tmp/debugfs_test
drw-rw-rw- 37 root root 0 Aug 2 17:28 /tmp/debugfs_test
Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220406
Cc: stable(a)vger.kernel.org
Reviewed-by: Eric Sandeen <sandeen(a)redhat.com>
Signed-off-by: Charalampos Mitrodimas <charmitro(a)posteo.net>
Link: https://lore.kernel.org/20250816-debugfs-mount-opts-v3-1-d271dad57b5b@poste…
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a0357b0cf362..c12d649df6a5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -183,6 +183,9 @@ static int debugfs_reconfigure(struct fs_context *fc)
struct debugfs_fs_info *sb_opts = sb->s_fs_info;
struct debugfs_fs_info *new_opts = fc->s_fs_info;
+ if (!new_opts)
+ return 0;
+
sync_filesystem(sb);
/* structure copy of new mount options to sb */
@@ -282,10 +285,16 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc)
static int debugfs_get_tree(struct fs_context *fc)
{
+ int err;
+
if (!(debugfs_allow & DEBUGFS_ALLOW_API))
return -EPERM;
- return get_tree_single(fc, debugfs_fill_super);
+ err = get_tree_single(fc, debugfs_fill_super);
+ if (err)
+ return err;
+
+ return debugfs_reconfigure(fc);
}
static void debugfs_free_fc(struct fs_context *fc)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 72d271a7baa7062cb27e774ac37c5459c6d20e22
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101537-visor-thank-7800@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72d271a7baa7062cb27e774ac37c5459c6d20e22 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Thu, 7 Aug 2025 03:55:23 +1000
Subject: [PATCH] fscontext: do not consume log entries when returning
-EMSGSIZE
Userspace generally expects APIs that return -EMSGSIZE to allow for them
to adjust their buffer size and retry the operation. However, the
fscontext log would previously clear the message even in the -EMSGSIZE
case.
Given that it is very cheap for us to check whether the buffer is too
small before we remove the message from the ring buffer, let's just do
that instead. While we're at it, refactor some fscontext_read() into a
separate helper to make the ring buffer logic a bit easier to read.
Fixes: 007ec26cdc9f ("vfs: Implement logging through fs_context")
Cc: David Howells <dhowells(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.2+
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Link: https://lore.kernel.org/20250807-fscontext-log-cleanups-v3-1-8d91d6242dc3@c…
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 1aaf4cb2afb2..f645c99204eb 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -18,50 +18,56 @@
#include "internal.h"
#include "mount.h"
+static inline const char *fetch_message_locked(struct fc_log *log, size_t len,
+ bool *need_free)
+{
+ const char *p;
+ int index;
+
+ if (unlikely(log->head == log->tail))
+ return ERR_PTR(-ENODATA);
+
+ index = log->tail & (ARRAY_SIZE(log->buffer) - 1);
+ p = log->buffer[index];
+ if (unlikely(strlen(p) > len))
+ return ERR_PTR(-EMSGSIZE);
+
+ log->buffer[index] = NULL;
+ *need_free = log->need_free & (1 << index);
+ log->need_free &= ~(1 << index);
+ log->tail++;
+
+ return p;
+}
+
/*
* Allow the user to read back any error, warning or informational messages.
+ * Only one message is returned for each read(2) call.
*/
static ssize_t fscontext_read(struct file *file,
char __user *_buf, size_t len, loff_t *pos)
{
struct fs_context *fc = file->private_data;
- struct fc_log *log = fc->log.log;
- unsigned int logsize = ARRAY_SIZE(log->buffer);
- ssize_t ret;
- char *p;
+ ssize_t err;
+ const char *p __free(kfree) = NULL, *message;
bool need_free;
- int index, n;
+ int n;
- ret = mutex_lock_interruptible(&fc->uapi_mutex);
- if (ret < 0)
- return ret;
-
- if (log->head == log->tail) {
- mutex_unlock(&fc->uapi_mutex);
- return -ENODATA;
- }
-
- index = log->tail & (logsize - 1);
- p = log->buffer[index];
- need_free = log->need_free & (1 << index);
- log->buffer[index] = NULL;
- log->need_free &= ~(1 << index);
- log->tail++;
+ err = mutex_lock_interruptible(&fc->uapi_mutex);
+ if (err < 0)
+ return err;
+ message = fetch_message_locked(fc->log.log, len, &need_free);
mutex_unlock(&fc->uapi_mutex);
+ if (IS_ERR(message))
+ return PTR_ERR(message);
- ret = -EMSGSIZE;
- n = strlen(p);
- if (n > len)
- goto err_free;
- ret = -EFAULT;
- if (copy_to_user(_buf, p, n) != 0)
- goto err_free;
- ret = n;
-
-err_free:
if (need_free)
- kfree(p);
- return ret;
+ p = message;
+
+ n = strlen(message);
+ if (copy_to_user(_buf, message, n))
+ return -EFAULT;
+ return n;
}
static int fscontext_release(struct inode *inode, struct file *file)