The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5cd474e57368f0957c343bb21e309cf82826b1ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090931-unmapped-return-e9dd@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5cd474e57368 ("arm64: sdei: abort running SDEI handlers during crash")
dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
ac20ffbb0279 ("arm64: scs: use vmapped IRQ and SDEI shadow stacks")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5cd474e57368f0957c343bb21e309cf82826b1ef Mon Sep 17 00:00:00 2001
From: D Scott Phillips <scott(a)os.amperecomputing.com>
Date: Mon, 26 Jun 2023 17:29:39 -0700
Subject: [PATCH] arm64: sdei: abort running SDEI handlers during crash
Interrupts are blocked in SDEI context, per the SDEI spec: "The client
interrupts cannot preempt the event handler." If we crashed in the SDEI
handler-running context (as with ACPI's AGDI) then we need to clean up the
SDEI state before proceeding to the crash kernel so that the crash kernel
can have working interrupts.
Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME
the handler, discarding the interrupted context.
Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking")
Signed-off-by: D Scott Phillips <scott(a)os.amperecomputing.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: James Morse <james.morse(a)arm.com>
Tested-by: Mihai Carabas <mihai.carabas(a)oracle.com>
Link: https://lore.kernel.org/r/20230627002939.2758-1-scott@os.amperecomputing.com
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 4292d9bafb9d..484cb6972e99 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -17,6 +17,9 @@
#include <asm/virt.h>
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
extern unsigned long sdei_exit_mode;
/* Software Delegated Exception entry point from firmware*/
@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
unsigned long pc,
unsigned long pstate);
+/* Abort a running handler. Context is discarded. */
+void __sdei_handler_abort(void);
+
/*
* The above entry point does the minimum to call C code. This function does
* anything else, before calling the driver.
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a40e5e50fa55..6ad61de03d0a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -986,9 +986,13 @@ SYM_CODE_START(__sdei_asm_handler)
mov x19, x1
-#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ /* Store the registered-event for crash_smp_send_stop() */
ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
-#endif
+ cbnz w4, 1f
+ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+ b 2f
+1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2: str x19, [x5]
#ifdef CONFIG_VMAP_STACK
/*
@@ -1055,6 +1059,14 @@ SYM_CODE_START(__sdei_asm_handler)
ldr_l x2, sdei_exit_mode
+ /* Clear the registered-event seen by crash_smp_send_stop() */
+ ldrb w3, [x4, #SDEI_EVENT_PRIORITY]
+ cbnz w3, 1f
+ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+ b 2f
+1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2: str xzr, [x5]
+
alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
sdei_handler_exit exit_mode=x2
alternative_else_nop_endif
@@ -1065,4 +1077,15 @@ alternative_else_nop_endif
#endif
SYM_CODE_END(__sdei_asm_handler)
NOKPROBE(__sdei_asm_handler)
+
+SYM_CODE_START(__sdei_handler_abort)
+ mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+ adr x1, 1f
+ ldr_l x2, sdei_exit_mode
+ sdei_handler_exit exit_mode=x2
+ // exit the handler and jump to the next instruction.
+ // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
+1: ret
+SYM_CODE_END(__sdei_handler_abort)
+NOKPROBE(__sdei_handler_abort)
#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 830be01af32d..255d12f881c2 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
#endif
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
{
unsigned long *p;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index edd63894d61e..960b98b43506 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1044,10 +1044,8 @@ void crash_smp_send_stop(void)
* If this cpu is the only one alive at this point in time, online or
* not, there are no stop messages to be sent around, so just back out.
*/
- if (num_other_online_cpus() == 0) {
- sdei_mask_local_cpu();
- return;
- }
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -1066,7 +1064,9 @@ void crash_smp_send_stop(void)
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(&mask));
+skip_ipi:
sdei_mask_local_cpu();
+ sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index f9040bd61081..285fe7ad490d 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1095,3 +1095,22 @@ int sdei_event_handler(struct pt_regs *regs,
return err;
}
NOKPROBE_SYMBOL(sdei_event_handler);
+
+void sdei_handler_abort(void)
+{
+ /*
+ * If the crash happened in an SDEI event handler then we need to
+ * finish the handler with the firmware so that we can have working
+ * interrupts in the crash kernel.
+ */
+ if (__this_cpu_read(sdei_active_critical_event)) {
+ pr_warn("still in SDEI critical event context, attempting to finish handler.\n");
+ __sdei_handler_abort();
+ __this_cpu_write(sdei_active_critical_event, NULL);
+ }
+ if (__this_cpu_read(sdei_active_normal_event)) {
+ pr_warn("still in SDEI normal event context, attempting to finish handler.\n");
+ __sdei_handler_abort();
+ __this_cpu_write(sdei_active_normal_event, NULL);
+ }
+}
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 14dc461b0e82..255701e1251b 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes);
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
void __init sdei_init(void);
+void sdei_handler_abort(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
static inline void sdei_init(void) { }
+static inline void sdei_handler_abort(void) { }
#endif /* CONFIG_ARM_SDE_INTERFACE */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1a721de8489fa559ff4471f73c58bb74ac5580d3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090947-apprehend-immobile-0fb5@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a721de8489fa559ff4471f73c58bb74ac5580d3 Mon Sep 17 00:00:00 2001
From: Li Lingfeng <lilingfeng3(a)huawei.com>
Date: Thu, 31 Aug 2023 15:59:00 +0800
Subject: [PATCH] block: don't add or resize partition on the disk with
GENHD_FL_NO_PART
Commit a33df75c6328 ("block: use an xarray for disk->part_tbl") remove
disk_expand_part_tbl() in add_partition(), which means all kinds of
devices will support extended dynamic `dev_t`.
However, some devices with GENHD_FL_NO_PART are not expected to add or
resize partition.
Fix this by adding check of GENHD_FL_NO_PART before add or resize
partition.
Fixes: a33df75c6328 ("block: use an xarray for disk->part_tbl")
Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20230831075900.1725842-1-lilingfeng@huaweicloud.c…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/ioctl.c b/block/ioctl.c
index 648670ddb164..d5f5cd61efd7 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition p;
long long start, length;
+ if (disk->flags & GENHD_FL_NO_PART)
+ return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090947-drainer-chatter-7ac5@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
06accc8779c1 ("bpf: add support for open-coded iterator loops")
215bf4962f6c ("bpf: add iterator kfuncs registration and validation logic")
a461f5adf177 ("bpf: generalize dynptr_get_spi to be usable for iters")
d0e1ac227945 ("bpf: move kfunc_call_arg_meta higher in the file")
653ae3a874ac ("bpf: clean up visit_insn()'s instruction processing")
98ddcf389d1b ("bpf: honor env->test_state_freq flag in is_state_visited()")
d54e0f6c1adf ("bpf: improve stack slot state printing")
0d80a619c113 ("bpf: allow ctx writes using BPF_ST_MEM instruction")
6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.")
20c09d92faee ("bpf: Introduce kptr_rcu.")
8d093b4e95a2 ("bpf: Mark cgroups and dfl_cgrp fields as trusted.")
66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
05421aecd4ed ("bpf: Add xdp dynptrs")
b5964b968ac6 ("bpf: Add skb dynptrs")
d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
485ec51ef976 ("bpf: Refactor verifier dynptr into get_dynptr_arg_reg")
8357b366cbb0 ("bpf: Define no-ops for externally called bpf dynptr functions")
1d18feb2c915 ("bpf: Allow initializing dynptrs in kfuncs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090946-wrath-compacter-d8a5@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
06accc8779c1 ("bpf: add support for open-coded iterator loops")
215bf4962f6c ("bpf: add iterator kfuncs registration and validation logic")
a461f5adf177 ("bpf: generalize dynptr_get_spi to be usable for iters")
d0e1ac227945 ("bpf: move kfunc_call_arg_meta higher in the file")
653ae3a874ac ("bpf: clean up visit_insn()'s instruction processing")
98ddcf389d1b ("bpf: honor env->test_state_freq flag in is_state_visited()")
d54e0f6c1adf ("bpf: improve stack slot state printing")
0d80a619c113 ("bpf: allow ctx writes using BPF_ST_MEM instruction")
6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.")
20c09d92faee ("bpf: Introduce kptr_rcu.")
8d093b4e95a2 ("bpf: Mark cgroups and dfl_cgrp fields as trusted.")
66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
05421aecd4ed ("bpf: Add xdp dynptrs")
b5964b968ac6 ("bpf: Add skb dynptrs")
d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
485ec51ef976 ("bpf: Refactor verifier dynptr into get_dynptr_arg_reg")
8357b366cbb0 ("bpf: Define no-ops for externally called bpf dynptr functions")
1d18feb2c915 ("bpf: Allow initializing dynptrs in kfuncs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090945-french-startling-3c46@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
06accc8779c1 ("bpf: add support for open-coded iterator loops")
215bf4962f6c ("bpf: add iterator kfuncs registration and validation logic")
a461f5adf177 ("bpf: generalize dynptr_get_spi to be usable for iters")
d0e1ac227945 ("bpf: move kfunc_call_arg_meta higher in the file")
653ae3a874ac ("bpf: clean up visit_insn()'s instruction processing")
98ddcf389d1b ("bpf: honor env->test_state_freq flag in is_state_visited()")
d54e0f6c1adf ("bpf: improve stack slot state printing")
0d80a619c113 ("bpf: allow ctx writes using BPF_ST_MEM instruction")
6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.")
20c09d92faee ("bpf: Introduce kptr_rcu.")
8d093b4e95a2 ("bpf: Mark cgroups and dfl_cgrp fields as trusted.")
66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
05421aecd4ed ("bpf: Add xdp dynptrs")
b5964b968ac6 ("bpf: Add skb dynptrs")
d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
485ec51ef976 ("bpf: Refactor verifier dynptr into get_dynptr_arg_reg")
8357b366cbb0 ("bpf: Define no-ops for externally called bpf dynptr functions")
1d18feb2c915 ("bpf: Allow initializing dynptrs in kfuncs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090944-pretender-manifesto-9755@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
06accc8779c1 ("bpf: add support for open-coded iterator loops")
215bf4962f6c ("bpf: add iterator kfuncs registration and validation logic")
a461f5adf177 ("bpf: generalize dynptr_get_spi to be usable for iters")
d0e1ac227945 ("bpf: move kfunc_call_arg_meta higher in the file")
653ae3a874ac ("bpf: clean up visit_insn()'s instruction processing")
98ddcf389d1b ("bpf: honor env->test_state_freq flag in is_state_visited()")
d54e0f6c1adf ("bpf: improve stack slot state printing")
0d80a619c113 ("bpf: allow ctx writes using BPF_ST_MEM instruction")
6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.")
20c09d92faee ("bpf: Introduce kptr_rcu.")
8d093b4e95a2 ("bpf: Mark cgroups and dfl_cgrp fields as trusted.")
66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
05421aecd4ed ("bpf: Add xdp dynptrs")
b5964b968ac6 ("bpf: Add skb dynptrs")
d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
485ec51ef976 ("bpf: Refactor verifier dynptr into get_dynptr_arg_reg")
8357b366cbb0 ("bpf: Define no-ops for externally called bpf dynptr functions")
1d18feb2c915 ("bpf: Allow initializing dynptrs in kfuncs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090943-judiciary-transport-c1ac@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
06accc8779c1 ("bpf: add support for open-coded iterator loops")
215bf4962f6c ("bpf: add iterator kfuncs registration and validation logic")
a461f5adf177 ("bpf: generalize dynptr_get_spi to be usable for iters")
d0e1ac227945 ("bpf: move kfunc_call_arg_meta higher in the file")
653ae3a874ac ("bpf: clean up visit_insn()'s instruction processing")
98ddcf389d1b ("bpf: honor env->test_state_freq flag in is_state_visited()")
d54e0f6c1adf ("bpf: improve stack slot state printing")
0d80a619c113 ("bpf: allow ctx writes using BPF_ST_MEM instruction")
6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.")
20c09d92faee ("bpf: Introduce kptr_rcu.")
8d093b4e95a2 ("bpf: Mark cgroups and dfl_cgrp fields as trusted.")
66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
05421aecd4ed ("bpf: Add xdp dynptrs")
b5964b968ac6 ("bpf: Add skb dynptrs")
d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
485ec51ef976 ("bpf: Refactor verifier dynptr into get_dynptr_arg_reg")
8357b366cbb0 ("bpf: Define no-ops for externally called bpf dynptr functions")
1d18feb2c915 ("bpf: Allow initializing dynptrs in kfuncs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090942-raging-premiere-af0c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
06accc8779c1 ("bpf: add support for open-coded iterator loops")
215bf4962f6c ("bpf: add iterator kfuncs registration and validation logic")
a461f5adf177 ("bpf: generalize dynptr_get_spi to be usable for iters")
d0e1ac227945 ("bpf: move kfunc_call_arg_meta higher in the file")
653ae3a874ac ("bpf: clean up visit_insn()'s instruction processing")
98ddcf389d1b ("bpf: honor env->test_state_freq flag in is_state_visited()")
d54e0f6c1adf ("bpf: improve stack slot state printing")
0d80a619c113 ("bpf: allow ctx writes using BPF_ST_MEM instruction")
6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.")
20c09d92faee ("bpf: Introduce kptr_rcu.")
8d093b4e95a2 ("bpf: Mark cgroups and dfl_cgrp fields as trusted.")
66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
05421aecd4ed ("bpf: Add xdp dynptrs")
b5964b968ac6 ("bpf: Add skb dynptrs")
d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
485ec51ef976 ("bpf: Refactor verifier dynptr into get_dynptr_arg_reg")
8357b366cbb0 ("bpf: Define no-ops for externally called bpf dynptr functions")
1d18feb2c915 ("bpf: Allow initializing dynptrs in kfuncs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090941-predator-relocate-32e5@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x 6a5a148aaf14747570cc634f9cdfcb0393f5617f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090940-recede-unicorn-1885@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
6a5a148aaf14 ("bpf: fix bpf_probe_read_kernel prototype mismatch")
1f9a1ea821ff ("bpf: Support new sign-extension load insns")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6a5a148aaf14747570cc634f9cdfcb0393f5617f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Tue, 1 Aug 2023 13:13:58 +0200
Subject: [PATCH] bpf: fix bpf_probe_read_kernel prototype mismatch
bpf_probe_read_kernel() has a __weak definition in core.c and another
definition with an incompatible prototype in kernel/trace/bpf_trace.c,
when CONFIG_BPF_EVENTS is enabled.
Since the two are incompatible, there cannot be a shared declaration in
a header file, but the lack of a prototype causes a W=1 warning:
kernel/bpf/core.c:1638:12: error: no previous prototype for 'bpf_probe_read_kernel' [-Werror=missing-prototypes]
On 32-bit architectures, the local prototype
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
passes arguments in other registers as the one in bpf_trace.c
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
which uses 64-bit arguments in pairs of registers.
As both versions of the function are fairly simple and only really
differ in one line, just move them into a header file as an inline
function that does not add any overhead for the bpf_trace.c callers
and actually avoids a function call for the other one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/ac25cb0f-b804-1649-3afb-1dc6138c2716@iogearbox.…
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Link: https://lore.kernel.org/r/20230801111449.185301-1-arnd@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index baccdec22f19..0f8f036d8bd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,12 +1650,6 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
-{
- memset(dst, 0, size);
- return -EFAULT;
-}
-
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -2066,8 +2060,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
- (const void *)(long) (SRC + insn->off)); \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
@@ -2082,7 +2076,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT; \
LDX_PROBE_MEMSX_##SIZEOP: \
- bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
(const void *)(long) (SRC + insn->off)); \
DST = *((SIZE *)&DST); \
CONT;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c92eb8c6ff08..83bde2475ae5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -223,17 +223,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5905afc2c7bb713d52c7c7585565feecbb686b44
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090919-eastcoast-tameness-5f28@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5905afc2c7bb ("block: fix pin count management when merging same-page segments")
403b6fb8dac1 ("block: convert bio_map_user_iov to use iov_iter_extract_pages")
f62e52d1276b ("iov_iter: Define flags to qualify page extraction.")
7ee4ccf57484 ("block: set FOLL_PCI_P2PDMA in bio_map_user_iov()")
5e3e3f2e15df ("block: set FOLL_PCI_P2PDMA in __bio_iov_iter_get_pages()")
d82076403cef ("iov_iter: introduce iov_iter_get_pages_[alloc_]flags()")
91e5adda5cf4 ("block/blk-map: Remove set but unused variable 'added'")
e88811bc43b9 ("block: use on-stack page vec for <= UIO_FASTIOV")
eba2d3d79829 ("get rid of non-advancing variants")
480cb846c27b ("block: convert to advancing variants of iov_iter_get_pages{,_alloc}()")
3cf42da327f2 ("iov_iter: saner helper for page array allocation")
8520008417c5 ("fold __pipe_get_pages() into pipe_get_pages()")
0aa4fc32f540 ("ITER_XARRAY: don't open-code DIV_ROUND_UP()")
451c0ba9475e ("unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts")
68fe506f3731 ("unify xarray_get_pages() and xarray_get_pages_alloc()")
acbdeb8320b0 ("unify pipe_get_pages() and pipe_get_pages_alloc()")
c81ce28df500 ("iov_iter_get_pages(): sanity-check arguments")
91329559eb07 ("iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper")
12d426ab64a1 ("ITER_PIPE: fold data_start() and pipe_space_for_user() together")
10f525a8cd7a ("ITER_PIPE: cache the type of last buffer")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5905afc2c7bb713d52c7c7585565feecbb686b44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Tue, 5 Sep 2023 14:47:31 +0200
Subject: [PATCH] block: fix pin count management when merging same-page
segments
There is no need to unpin the added page when adding it to the bio fails
as that is done by the loop below. Instead we want to unpin it when adding
a single page to the bio more than once as bio_release_pages will only
unpin it once.
Fixes: d1916c86ccdc ("block: move same page handling from __bio_add_pc_page to the callers")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Link: https://lore.kernel.org/r/20230905124731.328255-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..8584babf3ea0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page)) {
- if (same_page)
- bio_release_page(bio, page);
+ max_sectors, &same_page))
break;
- }
+ if (same_page)
+ bio_release_page(bio, page);
bytes -= n;
offs = 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5905afc2c7bb713d52c7c7585565feecbb686b44
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090918-essence-exemplary-3e4c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5905afc2c7bb ("block: fix pin count management when merging same-page segments")
403b6fb8dac1 ("block: convert bio_map_user_iov to use iov_iter_extract_pages")
f62e52d1276b ("iov_iter: Define flags to qualify page extraction.")
7ee4ccf57484 ("block: set FOLL_PCI_P2PDMA in bio_map_user_iov()")
5e3e3f2e15df ("block: set FOLL_PCI_P2PDMA in __bio_iov_iter_get_pages()")
d82076403cef ("iov_iter: introduce iov_iter_get_pages_[alloc_]flags()")
91e5adda5cf4 ("block/blk-map: Remove set but unused variable 'added'")
e88811bc43b9 ("block: use on-stack page vec for <= UIO_FASTIOV")
eba2d3d79829 ("get rid of non-advancing variants")
480cb846c27b ("block: convert to advancing variants of iov_iter_get_pages{,_alloc}()")
3cf42da327f2 ("iov_iter: saner helper for page array allocation")
8520008417c5 ("fold __pipe_get_pages() into pipe_get_pages()")
0aa4fc32f540 ("ITER_XARRAY: don't open-code DIV_ROUND_UP()")
451c0ba9475e ("unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts")
68fe506f3731 ("unify xarray_get_pages() and xarray_get_pages_alloc()")
acbdeb8320b0 ("unify pipe_get_pages() and pipe_get_pages_alloc()")
c81ce28df500 ("iov_iter_get_pages(): sanity-check arguments")
91329559eb07 ("iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper")
12d426ab64a1 ("ITER_PIPE: fold data_start() and pipe_space_for_user() together")
10f525a8cd7a ("ITER_PIPE: cache the type of last buffer")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5905afc2c7bb713d52c7c7585565feecbb686b44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Tue, 5 Sep 2023 14:47:31 +0200
Subject: [PATCH] block: fix pin count management when merging same-page
segments
There is no need to unpin the added page when adding it to the bio fails
as that is done by the loop below. Instead we want to unpin it when adding
a single page to the bio more than once as bio_release_pages will only
unpin it once.
Fixes: d1916c86ccdc ("block: move same page handling from __bio_add_pc_page to the callers")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Link: https://lore.kernel.org/r/20230905124731.328255-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..8584babf3ea0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page)) {
- if (same_page)
- bio_release_page(bio, page);
+ max_sectors, &same_page))
break;
- }
+ if (same_page)
+ bio_release_page(bio, page);
bytes -= n;
offs = 0;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5905afc2c7bb713d52c7c7585565feecbb686b44
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090916-icon-refute-e9fe@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
5905afc2c7bb ("block: fix pin count management when merging same-page segments")
403b6fb8dac1 ("block: convert bio_map_user_iov to use iov_iter_extract_pages")
f62e52d1276b ("iov_iter: Define flags to qualify page extraction.")
7ee4ccf57484 ("block: set FOLL_PCI_P2PDMA in bio_map_user_iov()")
5e3e3f2e15df ("block: set FOLL_PCI_P2PDMA in __bio_iov_iter_get_pages()")
d82076403cef ("iov_iter: introduce iov_iter_get_pages_[alloc_]flags()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5905afc2c7bb713d52c7c7585565feecbb686b44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Tue, 5 Sep 2023 14:47:31 +0200
Subject: [PATCH] block: fix pin count management when merging same-page
segments
There is no need to unpin the added page when adding it to the bio fails
as that is done by the loop below. Instead we want to unpin it when adding
a single page to the bio more than once as bio_release_pages will only
unpin it once.
Fixes: d1916c86ccdc ("block: move same page handling from __bio_add_pc_page to the callers")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Link: https://lore.kernel.org/r/20230905124731.328255-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..8584babf3ea0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page)) {
- if (same_page)
- bio_release_page(bio, page);
+ max_sectors, &same_page))
break;
- }
+ if (same_page)
+ bio_release_page(bio, page);
bytes -= n;
offs = 0;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5905afc2c7bb713d52c7c7585565feecbb686b44
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090917-doable-levers-3d4c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
5905afc2c7bb ("block: fix pin count management when merging same-page segments")
403b6fb8dac1 ("block: convert bio_map_user_iov to use iov_iter_extract_pages")
f62e52d1276b ("iov_iter: Define flags to qualify page extraction.")
7ee4ccf57484 ("block: set FOLL_PCI_P2PDMA in bio_map_user_iov()")
5e3e3f2e15df ("block: set FOLL_PCI_P2PDMA in __bio_iov_iter_get_pages()")
d82076403cef ("iov_iter: introduce iov_iter_get_pages_[alloc_]flags()")
91e5adda5cf4 ("block/blk-map: Remove set but unused variable 'added'")
e88811bc43b9 ("block: use on-stack page vec for <= UIO_FASTIOV")
eba2d3d79829 ("get rid of non-advancing variants")
480cb846c27b ("block: convert to advancing variants of iov_iter_get_pages{,_alloc}()")
3cf42da327f2 ("iov_iter: saner helper for page array allocation")
8520008417c5 ("fold __pipe_get_pages() into pipe_get_pages()")
0aa4fc32f540 ("ITER_XARRAY: don't open-code DIV_ROUND_UP()")
451c0ba9475e ("unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts")
68fe506f3731 ("unify xarray_get_pages() and xarray_get_pages_alloc()")
acbdeb8320b0 ("unify pipe_get_pages() and pipe_get_pages_alloc()")
c81ce28df500 ("iov_iter_get_pages(): sanity-check arguments")
91329559eb07 ("iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper")
12d426ab64a1 ("ITER_PIPE: fold data_start() and pipe_space_for_user() together")
10f525a8cd7a ("ITER_PIPE: cache the type of last buffer")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5905afc2c7bb713d52c7c7585565feecbb686b44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Tue, 5 Sep 2023 14:47:31 +0200
Subject: [PATCH] block: fix pin count management when merging same-page
segments
There is no need to unpin the added page when adding it to the bio fails
as that is done by the loop below. Instead we want to unpin it when adding
a single page to the bio more than once as bio_release_pages will only
unpin it once.
Fixes: d1916c86ccdc ("block: move same page handling from __bio_add_pc_page to the callers")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Link: https://lore.kernel.org/r/20230905124731.328255-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..8584babf3ea0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page)) {
- if (same_page)
- bio_release_page(bio, page);
+ max_sectors, &same_page))
break;
- }
+ if (same_page)
+ bio_release_page(bio, page);
bytes -= n;
offs = 0;
}
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5905afc2c7bb713d52c7c7585565feecbb686b44
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090916-opal-democracy-f820@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
5905afc2c7bb ("block: fix pin count management when merging same-page segments")
403b6fb8dac1 ("block: convert bio_map_user_iov to use iov_iter_extract_pages")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5905afc2c7bb713d52c7c7585565feecbb686b44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Tue, 5 Sep 2023 14:47:31 +0200
Subject: [PATCH] block: fix pin count management when merging same-page
segments
There is no need to unpin the added page when adding it to the bio fails
as that is done by the loop below. Instead we want to unpin it when adding
a single page to the bio more than once as bio_release_pages will only
unpin it once.
Fixes: d1916c86ccdc ("block: move same page handling from __bio_add_pc_page to the callers")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Link: https://lore.kernel.org/r/20230905124731.328255-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..8584babf3ea0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page)) {
- if (same_page)
- bio_release_page(bio, page);
+ max_sectors, &same_page))
break;
- }
+ if (same_page)
+ bio_release_page(bio, page);
bytes -= n;
offs = 0;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x d74e481609808330b4625b3691cf01e1f56e255e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090938-ceremony-cinch-9f48@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
d74e48160980 ("media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts")
255385ca433c ("media: venus: hfi: Add a 6xx boot logic")
ff2a7013b3e6 ("media: venus: hfi,pm,firmware: Convert to block relative addressing")
7482a983dea3 ("media: venus: redesign clocks and pm domains control")
fd1ee315dcd4 ("media: venus: cache vb payload to be used by clock scaling")
219031a6e7df ("media: venus: fix build on 32bit environments")
c0e284ccfeda ("media: venus: Update clock scaling")
8dbebb2bd01e ("media: venus: Fix occasionally failures to suspend")
32f0a6ddc8c9 ("media: venus: Use on-chip interconnect API")
d42974e438fe ("media: venus: dec: populate properly timestamps and flags for capture buffers")
beac82904a87 ("media: venus: make decoder compliant with stateful codec API")
14ea00d65c65 ("media: venus: helpers: add three more helper functions")
1e485ee5a724 ("media: venus: helpers: export few helper functions")
a132459d4009 ("media: venus: core: fix max load for msm8996 and sdm845")
5792ae7c3dd4 ("media: venus: firmware: check fw size against DT memory region size")
87e25f4b2c3c ("media: venus: fix reported size of 0-length buffers")
df381dc8e475 ("media: venus: firmware: add no TZ boot and shutdown routine")
f9799fcce4bb ("media: venus: firmware: register separate platform_device for firmware loader")
a4cf7e3c069d ("media: venus: firmware: move load firmware in a separate function")
5df317c8786b ("media: venus: firmware: add routine to reset ARM9")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d74e481609808330b4625b3691cf01e1f56e255e Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Date: Tue, 30 May 2023 14:30:36 +0200
Subject: [PATCH] media: venus: hfi_venus: Write to VIDC_CTRL_INIT after
unmasking interrupts
The startup procedure shouldn't be started with interrupts masked, as that
may entail silent failures.
Kick off initialization only after the interrupts are unmasked.
Cc: stable(a)vger.kernel.org # v4.12+
Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files")
Signed-off-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov(a)gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
index 918a283bd890..5506a0d196ef 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -453,7 +453,6 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
void __iomem *wrapper_base = hdev->core->wrapper_base;
int ret = 0;
- writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
if (IS_V6(hdev->core)) {
mask_val = readl(wrapper_base + WRAPPER_INTR_MASK);
mask_val &= ~(WRAPPER_INTR_MASK_A2HWD_BASK_V6 |
@@ -464,6 +463,7 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
writel(mask_val, wrapper_base + WRAPPER_INTR_MASK);
writel(1, cpu_cs_base + CPU_CS_SCIACMDARG3);
+ writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
while (!ctrl_status && count < max_tries) {
ctrl_status = readl(cpu_cs_base + CPU_CS_SCIACMDARG0);
if ((ctrl_status & CPU_CS_SCIACMDARG0_ERROR_STATUS_MASK) == 4) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x d74e481609808330b4625b3691cf01e1f56e255e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090937-endorphin-postage-0d99@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
d74e48160980 ("media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts")
255385ca433c ("media: venus: hfi: Add a 6xx boot logic")
ff2a7013b3e6 ("media: venus: hfi,pm,firmware: Convert to block relative addressing")
7482a983dea3 ("media: venus: redesign clocks and pm domains control")
fd1ee315dcd4 ("media: venus: cache vb payload to be used by clock scaling")
219031a6e7df ("media: venus: fix build on 32bit environments")
c0e284ccfeda ("media: venus: Update clock scaling")
8dbebb2bd01e ("media: venus: Fix occasionally failures to suspend")
32f0a6ddc8c9 ("media: venus: Use on-chip interconnect API")
d42974e438fe ("media: venus: dec: populate properly timestamps and flags for capture buffers")
beac82904a87 ("media: venus: make decoder compliant with stateful codec API")
14ea00d65c65 ("media: venus: helpers: add three more helper functions")
1e485ee5a724 ("media: venus: helpers: export few helper functions")
a132459d4009 ("media: venus: core: fix max load for msm8996 and sdm845")
5792ae7c3dd4 ("media: venus: firmware: check fw size against DT memory region size")
87e25f4b2c3c ("media: venus: fix reported size of 0-length buffers")
df381dc8e475 ("media: venus: firmware: add no TZ boot and shutdown routine")
f9799fcce4bb ("media: venus: firmware: register separate platform_device for firmware loader")
a4cf7e3c069d ("media: venus: firmware: move load firmware in a separate function")
5df317c8786b ("media: venus: firmware: add routine to reset ARM9")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d74e481609808330b4625b3691cf01e1f56e255e Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Date: Tue, 30 May 2023 14:30:36 +0200
Subject: [PATCH] media: venus: hfi_venus: Write to VIDC_CTRL_INIT after
unmasking interrupts
The startup procedure shouldn't be started with interrupts masked, as that
may entail silent failures.
Kick off initialization only after the interrupts are unmasked.
Cc: stable(a)vger.kernel.org # v4.12+
Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files")
Signed-off-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov(a)gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
index 918a283bd890..5506a0d196ef 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -453,7 +453,6 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
void __iomem *wrapper_base = hdev->core->wrapper_base;
int ret = 0;
- writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
if (IS_V6(hdev->core)) {
mask_val = readl(wrapper_base + WRAPPER_INTR_MASK);
mask_val &= ~(WRAPPER_INTR_MASK_A2HWD_BASK_V6 |
@@ -464,6 +463,7 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
writel(mask_val, wrapper_base + WRAPPER_INTR_MASK);
writel(1, cpu_cs_base + CPU_CS_SCIACMDARG3);
+ writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
while (!ctrl_status && count < max_tries) {
ctrl_status = readl(cpu_cs_base + CPU_CS_SCIACMDARG0);
if ((ctrl_status & CPU_CS_SCIACMDARG0_ERROR_STATUS_MASK) == 4) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d74e481609808330b4625b3691cf01e1f56e255e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090937-manila-baggy-02be@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
d74e48160980 ("media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts")
255385ca433c ("media: venus: hfi: Add a 6xx boot logic")
ff2a7013b3e6 ("media: venus: hfi,pm,firmware: Convert to block relative addressing")
7482a983dea3 ("media: venus: redesign clocks and pm domains control")
fd1ee315dcd4 ("media: venus: cache vb payload to be used by clock scaling")
219031a6e7df ("media: venus: fix build on 32bit environments")
c0e284ccfeda ("media: venus: Update clock scaling")
8dbebb2bd01e ("media: venus: Fix occasionally failures to suspend")
32f0a6ddc8c9 ("media: venus: Use on-chip interconnect API")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d74e481609808330b4625b3691cf01e1f56e255e Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Date: Tue, 30 May 2023 14:30:36 +0200
Subject: [PATCH] media: venus: hfi_venus: Write to VIDC_CTRL_INIT after
unmasking interrupts
The startup procedure shouldn't be started with interrupts masked, as that
may entail silent failures.
Kick off initialization only after the interrupts are unmasked.
Cc: stable(a)vger.kernel.org # v4.12+
Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files")
Signed-off-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov(a)gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
index 918a283bd890..5506a0d196ef 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -453,7 +453,6 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
void __iomem *wrapper_base = hdev->core->wrapper_base;
int ret = 0;
- writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
if (IS_V6(hdev->core)) {
mask_val = readl(wrapper_base + WRAPPER_INTR_MASK);
mask_val &= ~(WRAPPER_INTR_MASK_A2HWD_BASK_V6 |
@@ -464,6 +463,7 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
writel(mask_val, wrapper_base + WRAPPER_INTR_MASK);
writel(1, cpu_cs_base + CPU_CS_SCIACMDARG3);
+ writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
while (!ctrl_status && count < max_tries) {
ctrl_status = readl(cpu_cs_base + CPU_CS_SCIACMDARG0);
if ((ctrl_status & CPU_CS_SCIACMDARG0_ERROR_STATUS_MASK) == 4) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d74e481609808330b4625b3691cf01e1f56e255e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090936-undrafted-majestic-da5d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d74e48160980 ("media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts")
255385ca433c ("media: venus: hfi: Add a 6xx boot logic")
ff2a7013b3e6 ("media: venus: hfi,pm,firmware: Convert to block relative addressing")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d74e481609808330b4625b3691cf01e1f56e255e Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Date: Tue, 30 May 2023 14:30:36 +0200
Subject: [PATCH] media: venus: hfi_venus: Write to VIDC_CTRL_INIT after
unmasking interrupts
The startup procedure shouldn't be started with interrupts masked, as that
may entail silent failures.
Kick off initialization only after the interrupts are unmasked.
Cc: stable(a)vger.kernel.org # v4.12+
Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files")
Signed-off-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov(a)gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
index 918a283bd890..5506a0d196ef 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -453,7 +453,6 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
void __iomem *wrapper_base = hdev->core->wrapper_base;
int ret = 0;
- writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
if (IS_V6(hdev->core)) {
mask_val = readl(wrapper_base + WRAPPER_INTR_MASK);
mask_val &= ~(WRAPPER_INTR_MASK_A2HWD_BASK_V6 |
@@ -464,6 +463,7 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
writel(mask_val, wrapper_base + WRAPPER_INTR_MASK);
writel(1, cpu_cs_base + CPU_CS_SCIACMDARG3);
+ writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
while (!ctrl_status && count < max_tries) {
ctrl_status = readl(cpu_cs_base + CPU_CS_SCIACMDARG0);
if ((ctrl_status & CPU_CS_SCIACMDARG0_ERROR_STATUS_MASK) == 4) {
stable-rc/linux-5.4.y build: 17 builds: 0 failed, 17 passed, 34 warnings (v5.4.256-220-gf804807a002f)
Full Build Summary: https://kernelci.org/build/stable-rc/branch/linux-5.4.y/kernel/v5.4.256-220…
Tree: stable-rc
Branch: linux-5.4.y
Git Describe: v5.4.256-220-gf804807a002f
Git Commit: f804807a002fa77179520b09face6d36f4876036
Git URL: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Built: 7 unique architectures
Warnings Detected:
arc:
arm64:
defconfig (gcc-10): 3 warnings
defconfig+arm64-chromebook (gcc-10): 4 warnings
arm:
imx_v6_v7_defconfig (gcc-10): 1 warning
omap2plus_defconfig (gcc-10): 1 warning
i386:
allnoconfig (gcc-10): 2 warnings
i386_defconfig (gcc-10): 3 warnings
tinyconfig (gcc-10): 2 warnings
mips:
riscv:
x86_64:
allnoconfig (gcc-10): 4 warnings
tinyconfig (gcc-10): 4 warnings
x86_64_defconfig (gcc-10): 5 warnings
x86_64_defconfig+x86-chromebook (gcc-10): 5 warnings
Warnings summary:
7 ld: warning: creating DT_TEXTREL in a PIE
7 fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
4 ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
4 arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
3 ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
2 arch/x86/entry/entry_64.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.
2 arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x1c1: unsupported intra-function call
2 arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x151: unsupported intra-function call
2 arch/x86/entry/entry_64.S:1756: Warning: no instruction mnemonic suffix given and no register operands; using default for `sysret'
1 drivers/gpu/drm/mediatek/mtk_drm_gem.c:273:10: warning: returning ‘int’ from a function with return type ‘void *’ makes pointer from integer without a cast [-Wint-conversion]
Section mismatches summary:
1 WARNING: vmlinux.o(___ksymtab_gpl+vic_init_cascaded+0x0): Section mismatch in reference from the variable __ksymtab_vic_init_cascaded to the function .init.text:vic_init_cascaded()
================================================================================
Detailed per-defconfig build reports:
--------------------------------------------------------------------------------
32r2el_defconfig (mips, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
allnoconfig (i386, gcc-10) — PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
allnoconfig (x86_64, gcc-10) — PASS, 0 errors, 4 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.S:1756: Warning: no instruction mnemonic suffix given and no register operands; using default for `sysret'
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x151: unsupported intra-function call
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
defconfig (riscv, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
defconfig (arm64, gcc-10) — PASS, 0 errors, 3 warnings, 0 section mismatches
Warnings:
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
--------------------------------------------------------------------------------
defconfig+arm64-chromebook (arm64, gcc-10) — PASS, 0 errors, 4 warnings, 0 section mismatches
Warnings:
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
drivers/gpu/drm/mediatek/mtk_drm_gem.c:273:10: warning: returning ‘int’ from a function with return type ‘void *’ makes pointer from integer without a cast [-Wint-conversion]
--------------------------------------------------------------------------------
haps_hs_smp_defconfig (arc, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
i386_defconfig (i386, gcc-10) — PASS, 0 errors, 3 warnings, 0 section mismatches
Warnings:
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
imx_v6_v7_defconfig (arm, gcc-10) — PASS, 0 errors, 1 warning, 0 section mismatches
Warnings:
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
--------------------------------------------------------------------------------
multi_v5_defconfig (arm, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
Section mismatches:
WARNING: vmlinux.o(___ksymtab_gpl+vic_init_cascaded+0x0): Section mismatch in reference from the variable __ksymtab_vic_init_cascaded to the function .init.text:vic_init_cascaded()
--------------------------------------------------------------------------------
multi_v7_defconfig (arm, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
omap2plus_defconfig (arm, gcc-10) — PASS, 0 errors, 1 warning, 0 section mismatches
Warnings:
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
--------------------------------------------------------------------------------
tinyconfig (x86_64, gcc-10) — PASS, 0 errors, 4 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.S:1756: Warning: no instruction mnemonic suffix given and no register operands; using default for `sysret'
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x151: unsupported intra-function call
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
tinyconfig (i386, gcc-10) — PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
vexpress_defconfig (arm, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
x86_64_defconfig (x86_64, gcc-10) — PASS, 0 errors, 5 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x1c1: unsupported intra-function call
arch/x86/entry/entry_64.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
x86_64_defconfig+x86-chromebook (x86_64, gcc-10) — PASS, 0 errors, 5 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x1c1: unsupported intra-function call
arch/x86/entry/entry_64.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
---
For more info write to <info(a)kernelci.org>
stable-rc/linux-6.1.y build: 22 builds: 18 failed, 4 passed, 108 errors, 19 warnings (v6.1.52-547-g0480b8535974)
Full Build Summary: https://kernelci.org/build/stable-rc/branch/linux-6.1.y/kernel/v6.1.52-547-…
Tree: stable-rc
Branch: linux-6.1.y
Git Describe: v6.1.52-547-g0480b8535974
Git Commit: 0480b8535974b203450ef268b4f9277c0aff3544
Git URL: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Built: 7 unique architectures
Build Failures Detected:
arc:
haps_hs_smp_defconfig: (gcc-10) FAIL
arm64:
allnoconfig: (gcc-10) FAIL
defconfig: (gcc-10) FAIL
defconfig+arm64-chromebook: (gcc-10) FAIL
arm:
imx_v6_v7_defconfig: (gcc-10) FAIL
multi_v5_defconfig: (gcc-10) FAIL
multi_v7_defconfig: (gcc-10) FAIL
omap2plus_defconfig: (gcc-10) FAIL
vexpress_defconfig: (gcc-10) FAIL
i386:
allnoconfig: (gcc-10) FAIL
i386_defconfig: (gcc-10) FAIL
mips:
32r2el_defconfig: (gcc-10) FAIL
riscv:
allnoconfig: (gcc-10) FAIL
defconfig: (gcc-10) FAIL
rv32_defconfig: (gcc-10) FAIL
x86_64:
allnoconfig: (gcc-10) FAIL
x86_64_defconfig: (gcc-10) FAIL
x86_64_defconfig+x86-chromebook: (gcc-10) FAIL
Errors and Warnings Detected:
arc:
haps_hs_smp_defconfig (gcc-10): 6 errors, 1 warning
arm64:
allnoconfig (gcc-10): 6 errors, 1 warning
defconfig (gcc-10): 6 errors, 1 warning
defconfig+arm64-chromebook (gcc-10): 6 errors, 1 warning
arm:
imx_v6_v7_defconfig (gcc-10): 6 errors, 1 warning
multi_v5_defconfig (gcc-10): 6 errors, 1 warning
multi_v7_defconfig (gcc-10): 6 errors, 1 warning
omap2plus_defconfig (gcc-10): 6 errors, 1 warning
vexpress_defconfig (gcc-10): 6 errors, 1 warning
i386:
allnoconfig (gcc-10): 6 errors, 1 warning
i386_defconfig (gcc-10): 6 errors, 1 warning
mips:
32r2el_defconfig (gcc-10): 6 errors, 2 warnings
riscv:
allnoconfig (gcc-10): 6 errors, 1 warning
defconfig (gcc-10): 6 errors, 1 warning
rv32_defconfig (gcc-10): 6 errors, 1 warning
x86_64:
allnoconfig (gcc-10): 6 errors, 1 warning
x86_64_defconfig (gcc-10): 6 errors, 1 warning
x86_64_defconfig+x86-chromebook (gcc-10): 6 errors, 1 warning
Errors summary:
85 io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
18 io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
5 io_uring/io-wq.c:191:43: error: 'struct io_worker' has no member named 'wq'; did you mean 'wqe'?
Warnings summary:
15 cc1: some warnings being treated as errors
3 cc1: all warnings being treated as errors
1 arch/mips/boot/dts/img/boston.dts:128.19-178.5: Warning (pci_device_reg): /pci@14000000/pci2_root@0,0,0: PCI unit address format error, expected "0,0"
================================================================================
Detailed per-defconfig build reports:
--------------------------------------------------------------------------------
32r2el_defconfig (mips, gcc-10) — FAIL, 6 errors, 2 warnings, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
arch/mips/boot/dts/img/boston.dts:128.19-178.5: Warning (pci_device_reg): /pci@14000000/pci2_root@0,0,0: PCI unit address format error, expected "0,0"
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
allnoconfig (arm64, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
allnoconfig (x86_64, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
allnoconfig (riscv, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
allnoconfig (i386, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
defconfig (arm64, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
defconfig (riscv, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
defconfig+arm64-chromebook (arm64, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
haps_hs_smp_defconfig (arc, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: 'struct io_worker' has no member named 'wq'; did you mean 'wqe'?
io_uring/io-wq.c:191:43: error: 'struct io_worker' has no member named 'wq'; did you mean 'wqe'?
io_uring/io-wq.c:191:43: error: 'struct io_worker' has no member named 'wq'; did you mean 'wqe'?
io_uring/io-wq.c:191:43: error: 'struct io_worker' has no member named 'wq'; did you mean 'wqe'?
io_uring/io-wq.c:191:43: error: 'struct io_worker' has no member named 'wq'; did you mean 'wqe'?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
i386_defconfig (i386, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: all warnings being treated as errors
--------------------------------------------------------------------------------
imx_v6_v7_defconfig (arm, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
multi_v5_defconfig (arm, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
multi_v7_defconfig (arm, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
nommu_k210_defconfig (riscv, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
nommu_k210_sdcard_defconfig (riscv, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
omap2plus_defconfig (arm, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
rv32_defconfig (riscv, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
tinyconfig (i386, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
tinyconfig (x86_64, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
vexpress_defconfig (arm, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: some warnings being treated as errors
--------------------------------------------------------------------------------
x86_64_defconfig (x86_64, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: all warnings being treated as errors
--------------------------------------------------------------------------------
x86_64_defconfig+x86-chromebook (x86_64, gcc-10) — FAIL, 6 errors, 1 warning, 0 section mismatches
Errors:
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:191:43: error: ‘struct io_worker’ has no member named ‘wq’; did you mean ‘wqe’?
io_uring/io-wq.c:192:1: error: control reaches end of non-void function [-Werror=return-type]
Warnings:
cc1: all warnings being treated as errors
---
For more info write to <info(a)kernelci.org>
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 45500dc4e01c167ee063f3dcc22f51ced5b2b1e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090909-unnatural-giddiness-7f7a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
45500dc4e01c ("io_uring: break out of iowq iopoll on teardown")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45500dc4e01c167ee063f3dcc22f51ced5b2b1e9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 7 Sep 2023 13:50:07 +0100
Subject: [PATCH] io_uring: break out of iowq iopoll on teardown
io-wq will retry iopoll even when it failed with -EAGAIN. If that
races with task exit, which sets TIF_NOTIFY_SIGNAL for all its workers,
such workers might potentially infinitely spin retrying iopoll again and
again and each time failing on some allocation / waiting / etc. Don't
keep spinning if io-wq is dying.
Fixes: 561fb04a6a225 ("io_uring: replace workqueue usage with io-wq")
Cc: stable(a)vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 62f345587df5..1ecc8c748768 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -174,6 +174,16 @@ static void io_worker_ref_put(struct io_wq *wq)
complete(&wq->worker_done);
}
+bool io_wq_worker_stopped(void)
+{
+ struct io_worker *worker = current->worker_private;
+
+ if (WARN_ON_ONCE(!io_wq_current_is_worker()))
+ return true;
+
+ return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state);
+}
+
static void io_worker_cancel_cb(struct io_worker *worker)
{
struct io_wq_acct *acct = io_wq_get_acct(worker);
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index 06d9ca90c577..2b2a6406dd8e 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -52,6 +52,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
int io_wq_max_workers(struct io_wq *wq, int *new_count);
+bool io_wq_worker_stopped(void);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0f0ba31c3850..58d8dd34a45f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1975,6 +1975,8 @@ void io_wq_submit_work(struct io_wq_work *work)
if (!needs_poll) {
if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
break;
+ if (io_wq_worker_stopped())
+ break;
cond_resched();
continue;
}
On Sat, 9 Sept 2023 at 18:16, <gregkh(a)linuxfoundation.org> wrote:
>
>
> This is a note to let you know that I've just added the patch titled
>
> arm64: dts: qcom: sdm845-db845c: Mark cont splash memory region as reserved
>
> to the 5.10-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> arm64-dts-qcom-sdm845-db845c-mark-cont-splash-memory-region-as-reserved.patch
> and it can be found in the queue-5.10 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Please skip this patch for 5.10.y. We ran into a boot (mdss crash)
regression with this patch.
Regards,
Amit Pundir
>
> From 110e70fccce4f22b53986ae797d665ffb1950aa6 Mon Sep 17 00:00:00 2001
> From: Amit Pundir <amit.pundir(a)linaro.org>
> Date: Wed, 26 Jul 2023 18:57:19 +0530
> Subject: arm64: dts: qcom: sdm845-db845c: Mark cont splash memory region as reserved
>
> From: Amit Pundir <amit.pundir(a)linaro.org>
>
> commit 110e70fccce4f22b53986ae797d665ffb1950aa6 upstream.
>
> Adding a reserved memory region for the framebuffer memory
> (the splash memory region set up by the bootloader).
>
> It fixes a kernel panic (arm-smmu: Unhandled context fault
> at this particular memory region) reported on DB845c running
> v5.10.y.
>
> Cc: stable(a)vger.kernel.org # v5.10+
> Reviewed-by: Caleb Connolly <caleb.connolly(a)linaro.org>
> Signed-off-by: Amit Pundir <amit.pundir(a)linaro.org>
> Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
> Link: https://lore.kernel.org/r/20230726132719.2117369-2-amit.pundir@linaro.org
> Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> ---
> arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
> +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
> @@ -85,6 +85,14 @@
> };
> };
>
> + reserved-memory {
> + /* Cont splash region set up by the bootloader */
> + cont_splash_mem: framebuffer@9d400000 {
> + reg = <0x0 0x9d400000 0x0 0x2400000>;
> + no-map;
> + };
> + };
> +
> lt9611_1v8: lt9611-vdd18-regulator {
> compatible = "regulator-fixed";
> regulator-name = "LT9611_1V8";
> @@ -482,6 +490,7 @@
> };
>
> &mdss {
> + memory-region = <&cont_splash_mem>;
> status = "okay";
> };
>
>
>
> Patches currently in stable-queue which might be from amit.pundir(a)linaro.org are
>
> queue-5.10/arm64-dts-qcom-sdm845-db845c-mark-cont-splash-memory-region-as-reserved.patch
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 45500dc4e01c167ee063f3dcc22f51ced5b2b1e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090945-mural-humming-67c0@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
45500dc4e01c ("io_uring: break out of iowq iopoll on teardown")
ed29b0b4fd83 ("io_uring: move to separate directory")
e0deb6a025ae ("io_uring: avoid io-wq -EAGAIN looping for !IOPOLL")
1d5f5ea7cb7d ("io-wq: remove worker to owner tw dependency")
d01905db14eb ("io_uring: clean iowq submit work cancellation")
255657d23704 ("io_uring: clean io_wq_submit_work()'s main loop")
90fa02883f06 ("io_uring: implement async hybrid mode for pollable requests")
3b44b3712c5b ("io_uring: split logic of force_nonblock")
9882131cd9de ("io_uring: kill io_wq_current_is_worker() in iopoll")
9983028e7660 ("io_uring: optimise req->ctx reloads")
5e49c973fc39 ("io_uring: clean up io_import_iovec")
51aac424aef9 ("io_uring: optimise io_import_iovec nonblock passing")
c88598a92a58 ("io_uring: optimise read/write iov state storing")
538941e2681c ("io_uring: encapsulate rw state")
d886e185a128 ("io_uring: control ->async_data with a REQ_F flag")
30d51dd4ad20 ("io_uring: clean up buffer select")
ef05d9ebcc92 ("io_uring: kill off ->inflight_entry field")
6f33b0bc4ea4 ("io_uring: use slist for completion batching")
c450178d9be9 ("io_uring: dedup CQE flushing non-empty checks")
4c928904ff77 ("block: move CONFIG_BLOCK guard to top Makefile")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45500dc4e01c167ee063f3dcc22f51ced5b2b1e9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 7 Sep 2023 13:50:07 +0100
Subject: [PATCH] io_uring: break out of iowq iopoll on teardown
io-wq will retry iopoll even when it failed with -EAGAIN. If that
races with task exit, which sets TIF_NOTIFY_SIGNAL for all its workers,
such workers might potentially infinitely spin retrying iopoll again and
again and each time failing on some allocation / waiting / etc. Don't
keep spinning if io-wq is dying.
Fixes: 561fb04a6a225 ("io_uring: replace workqueue usage with io-wq")
Cc: stable(a)vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 62f345587df5..1ecc8c748768 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -174,6 +174,16 @@ static void io_worker_ref_put(struct io_wq *wq)
complete(&wq->worker_done);
}
+bool io_wq_worker_stopped(void)
+{
+ struct io_worker *worker = current->worker_private;
+
+ if (WARN_ON_ONCE(!io_wq_current_is_worker()))
+ return true;
+
+ return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state);
+}
+
static void io_worker_cancel_cb(struct io_worker *worker)
{
struct io_wq_acct *acct = io_wq_get_acct(worker);
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index 06d9ca90c577..2b2a6406dd8e 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -52,6 +52,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
int io_wq_max_workers(struct io_wq *wq, int *new_count);
+bool io_wq_worker_stopped(void);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0f0ba31c3850..58d8dd34a45f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1975,6 +1975,8 @@ void io_wq_submit_work(struct io_wq_work *work)
if (!needs_poll) {
if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
break;
+ if (io_wq_worker_stopped())
+ break;
cond_resched();
continue;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 45500dc4e01c167ee063f3dcc22f51ced5b2b1e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090946-waking-jokester-8c89@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
45500dc4e01c ("io_uring: break out of iowq iopoll on teardown")
ed29b0b4fd83 ("io_uring: move to separate directory")
e0deb6a025ae ("io_uring: avoid io-wq -EAGAIN looping for !IOPOLL")
1d5f5ea7cb7d ("io-wq: remove worker to owner tw dependency")
d01905db14eb ("io_uring: clean iowq submit work cancellation")
255657d23704 ("io_uring: clean io_wq_submit_work()'s main loop")
90fa02883f06 ("io_uring: implement async hybrid mode for pollable requests")
3b44b3712c5b ("io_uring: split logic of force_nonblock")
9882131cd9de ("io_uring: kill io_wq_current_is_worker() in iopoll")
9983028e7660 ("io_uring: optimise req->ctx reloads")
5e49c973fc39 ("io_uring: clean up io_import_iovec")
51aac424aef9 ("io_uring: optimise io_import_iovec nonblock passing")
c88598a92a58 ("io_uring: optimise read/write iov state storing")
538941e2681c ("io_uring: encapsulate rw state")
d886e185a128 ("io_uring: control ->async_data with a REQ_F flag")
30d51dd4ad20 ("io_uring: clean up buffer select")
ef05d9ebcc92 ("io_uring: kill off ->inflight_entry field")
6f33b0bc4ea4 ("io_uring: use slist for completion batching")
c450178d9be9 ("io_uring: dedup CQE flushing non-empty checks")
4c928904ff77 ("block: move CONFIG_BLOCK guard to top Makefile")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45500dc4e01c167ee063f3dcc22f51ced5b2b1e9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 7 Sep 2023 13:50:07 +0100
Subject: [PATCH] io_uring: break out of iowq iopoll on teardown
io-wq will retry iopoll even when it failed with -EAGAIN. If that
races with task exit, which sets TIF_NOTIFY_SIGNAL for all its workers,
such workers might potentially infinitely spin retrying iopoll again and
again and each time failing on some allocation / waiting / etc. Don't
keep spinning if io-wq is dying.
Fixes: 561fb04a6a225 ("io_uring: replace workqueue usage with io-wq")
Cc: stable(a)vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 62f345587df5..1ecc8c748768 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -174,6 +174,16 @@ static void io_worker_ref_put(struct io_wq *wq)
complete(&wq->worker_done);
}
+bool io_wq_worker_stopped(void)
+{
+ struct io_worker *worker = current->worker_private;
+
+ if (WARN_ON_ONCE(!io_wq_current_is_worker()))
+ return true;
+
+ return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state);
+}
+
static void io_worker_cancel_cb(struct io_worker *worker)
{
struct io_wq_acct *acct = io_wq_get_acct(worker);
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index 06d9ca90c577..2b2a6406dd8e 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -52,6 +52,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
int io_wq_max_workers(struct io_wq *wq, int *new_count);
+bool io_wq_worker_stopped(void);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0f0ba31c3850..58d8dd34a45f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1975,6 +1975,8 @@ void io_wq_submit_work(struct io_wq_work *work)
if (!needs_poll) {
if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
break;
+ if (io_wq_worker_stopped())
+ break;
cond_resched();
continue;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1bfed23349716a7811645336a7ce42c4b8f250bc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090926-basics-violet-94a6@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
1bfed2334971 ("io_uring/net: don't overflow multishot accept")
d86eaed185e9 ("io_uring: cleanup io_aux_cqe() API")
ea97f6c8558e ("io_uring: add support for multishot timeouts")
a282967c848f ("io_uring: encapsulate task_work state")
13bfa6f15d0b ("io_uring: remove extra tw trylocks")
9d2789ac9d60 ("block/io_uring: pass in issue_flags for uring_cmd task_work handling")
6bb30855560e ("io_uring: if a linked request has REQ_F_FORCE_ASYNC then run it async")
f58680085478 ("io_uring: add reschedule point to handle_tw_list()")
50470fc5723a ("io_uring: return normal tw run linking optimisation")
31f084b7b028 ("io_uring: simplify fallback execution")
b0b7a7d24b66 ("io_uring: return back links tw run optimisation")
c3f4d39ee4bc ("io_uring: optimise deferred tw execution")
360173ab9e1a ("io_uring: move io_run_local_work_locked")
3e5655552a82 ("io_uring: mark io_run_local_work static")
2f413956cc8a ("io_uring: don't set TASK_RUNNING in local tw runner")
140102ae9a9f ("io_uring: move defer tw task checks")
1414d6298584 ("io_uring: kill io_run_task_work_ctx")
f36ba6cf1ab6 ("io_uring: don't iterate cq wait fast path")
0c4fe008c9cb ("io_uring: rearrange defer list checks")
6e5aedb9324a ("io_uring/poll: attempt request issue after racy poll wakeup")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1bfed23349716a7811645336a7ce42c4b8f250bc Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 11 Aug 2023 13:53:41 +0100
Subject: [PATCH] io_uring/net: don't overflow multishot accept
Don't allow overflowing multishot accept CQEs, we want to limit
the grows of the overflow list.
Cc: stable(a)vger.kernel.org
Fixes: 4e86a2c980137 ("io_uring: implement multishot mode for accept")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/7d0d749649244873772623dd7747966f516fe6e2.16917576…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index eb1f51ddcb23..1599493544a5 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1367,7 +1367,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret,
- IORING_CQE_F_MORE, true))
+ IORING_CQE_F_MORE, false))
goto retry;
return -ECANCELED;
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1bfed23349716a7811645336a7ce42c4b8f250bc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090925-saline-prorate-1854@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
1bfed2334971 ("io_uring/net: don't overflow multishot accept")
d86eaed185e9 ("io_uring: cleanup io_aux_cqe() API")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1bfed23349716a7811645336a7ce42c4b8f250bc Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 11 Aug 2023 13:53:41 +0100
Subject: [PATCH] io_uring/net: don't overflow multishot accept
Don't allow overflowing multishot accept CQEs, we want to limit
the grows of the overflow list.
Cc: stable(a)vger.kernel.org
Fixes: 4e86a2c980137 ("io_uring: implement multishot mode for accept")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/7d0d749649244873772623dd7747966f516fe6e2.16917576…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index eb1f51ddcb23..1599493544a5 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1367,7 +1367,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret,
- IORING_CQE_F_MORE, true))
+ IORING_CQE_F_MORE, false))
goto retry;
return -ECANCELED;
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x b2e74db55dd93d6db22a813c9a775b5dbf87c560
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090908-cursor-reshape-7963@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
b2e74db55dd9 ("io_uring/net: don't overflow multishot recv")
d86eaed185e9 ("io_uring: cleanup io_aux_cqe() API")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b2e74db55dd93d6db22a813c9a775b5dbf87c560 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 11 Aug 2023 13:53:42 +0100
Subject: [PATCH] io_uring/net: don't overflow multishot recv
Don't allow overflowing multishot recv CQEs, it might get out of
hand, hurt performance, and in the worst case scenario OOM the task.
Cc: stable(a)vger.kernel.org
Fixes: b3fdea6ecb55c ("io_uring: multishot recv")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/0b295634e8f1b71aa764c984608c22d85f88f75c.16917576…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index 1599493544a5..8c419c01a5db 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -642,7 +642,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (!mshot_finished) {
if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
- *ret, cflags | IORING_CQE_F_MORE, true)) {
+ *ret, cflags | IORING_CQE_F_MORE, false)) {
io_recv_prep_retry(req);
/* Known not-empty or unknown state, retry */
if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b2e74db55dd93d6db22a813c9a775b5dbf87c560
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090909-retool-handled-9615@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
b2e74db55dd9 ("io_uring/net: don't overflow multishot recv")
d86eaed185e9 ("io_uring: cleanup io_aux_cqe() API")
ea97f6c8558e ("io_uring: add support for multishot timeouts")
a282967c848f ("io_uring: encapsulate task_work state")
13bfa6f15d0b ("io_uring: remove extra tw trylocks")
9d2789ac9d60 ("block/io_uring: pass in issue_flags for uring_cmd task_work handling")
6bb30855560e ("io_uring: if a linked request has REQ_F_FORCE_ASYNC then run it async")
f58680085478 ("io_uring: add reschedule point to handle_tw_list()")
50470fc5723a ("io_uring: return normal tw run linking optimisation")
31f084b7b028 ("io_uring: simplify fallback execution")
b0b7a7d24b66 ("io_uring: return back links tw run optimisation")
c3f4d39ee4bc ("io_uring: optimise deferred tw execution")
360173ab9e1a ("io_uring: move io_run_local_work_locked")
3e5655552a82 ("io_uring: mark io_run_local_work static")
2f413956cc8a ("io_uring: don't set TASK_RUNNING in local tw runner")
140102ae9a9f ("io_uring: move defer tw task checks")
1414d6298584 ("io_uring: kill io_run_task_work_ctx")
f36ba6cf1ab6 ("io_uring: don't iterate cq wait fast path")
0c4fe008c9cb ("io_uring: rearrange defer list checks")
6e5aedb9324a ("io_uring/poll: attempt request issue after racy poll wakeup")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b2e74db55dd93d6db22a813c9a775b5dbf87c560 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 11 Aug 2023 13:53:42 +0100
Subject: [PATCH] io_uring/net: don't overflow multishot recv
Don't allow overflowing multishot recv CQEs, it might get out of
hand, hurt performance, and in the worst case scenario OOM the task.
Cc: stable(a)vger.kernel.org
Fixes: b3fdea6ecb55c ("io_uring: multishot recv")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/0b295634e8f1b71aa764c984608c22d85f88f75c.16917576…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index 1599493544a5..8c419c01a5db 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -642,7 +642,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (!mshot_finished) {
if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
- *ret, cflags | IORING_CQE_F_MORE, true)) {
+ *ret, cflags | IORING_CQE_F_MORE, false)) {
io_recv_prep_retry(req);
/* Known not-empty or unknown state, retry */
if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x dc314886cb3d0e4ab2858003e8de2917f8a3ccbd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090938-kilometer-unpinned-f831@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
dc314886cb3d ("io_uring: break iopolling on signal")
ed29b0b4fd83 ("io_uring: move to separate directory")
5ba3c874eb8a ("io_uring: make io_do_iopoll return number of reqs")
87a115fb715b ("io_uring: force_nonspin")
b688f11e86c9 ("io_uring: utilize the io batching infrastructure for more efficient polled IO")
5a72e899ceb4 ("block: add a struct io_comp_batch argument to fops->iopoll()")
013a7f954381 ("block: provide helpers for rq_list manipulation")
afd7de03c526 ("block: remove some blk_mq_hw_ctx debugfs entries")
3e08773c3841 ("block: switch polling to be bio based")
6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED")
d729cf9acb93 ("io_uring: don't sleep when polling for I/O")
ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument")
28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid")
efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal")
c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper")
f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper")
71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO")
4c928904ff77 ("block: move CONFIG_BLOCK guard to top Makefile")
349302da8352 ("block: improve batched tag allocation")
0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc314886cb3d0e4ab2858003e8de2917f8a3ccbd Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 9 Aug 2023 16:20:21 +0100
Subject: [PATCH] io_uring: break iopolling on signal
Don't keep spinning iopoll with a signal set. It'll eventually return
back, e.g. by virtue of need_resched(), but it's not a nice user
experience.
Cc: stable(a)vger.kernel.org
Fixes: def596e9557c9 ("io_uring: support for IO polling")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/eeba551e82cad12af30c3220125eb6cb244cc94c.16915943…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d0888907527d..ad4ffd3a876f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1673,6 +1673,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
break;
nr_events += ret;
ret = 0;
+
+ if (task_sigpending(current))
+ return -EINTR;
} while (nr_events < min && !need_resched());
return ret;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x dc314886cb3d0e4ab2858003e8de2917f8a3ccbd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090938-retriever-jaws-74d8@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
dc314886cb3d ("io_uring: break iopolling on signal")
ed29b0b4fd83 ("io_uring: move to separate directory")
5ba3c874eb8a ("io_uring: make io_do_iopoll return number of reqs")
87a115fb715b ("io_uring: force_nonspin")
b688f11e86c9 ("io_uring: utilize the io batching infrastructure for more efficient polled IO")
5a72e899ceb4 ("block: add a struct io_comp_batch argument to fops->iopoll()")
013a7f954381 ("block: provide helpers for rq_list manipulation")
afd7de03c526 ("block: remove some blk_mq_hw_ctx debugfs entries")
3e08773c3841 ("block: switch polling to be bio based")
6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED")
d729cf9acb93 ("io_uring: don't sleep when polling for I/O")
ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument")
28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid")
efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal")
c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper")
f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper")
71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO")
4c928904ff77 ("block: move CONFIG_BLOCK guard to top Makefile")
349302da8352 ("block: improve batched tag allocation")
0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc314886cb3d0e4ab2858003e8de2917f8a3ccbd Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 9 Aug 2023 16:20:21 +0100
Subject: [PATCH] io_uring: break iopolling on signal
Don't keep spinning iopoll with a signal set. It'll eventually return
back, e.g. by virtue of need_resched(), but it's not a nice user
experience.
Cc: stable(a)vger.kernel.org
Fixes: def596e9557c9 ("io_uring: support for IO polling")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/eeba551e82cad12af30c3220125eb6cb244cc94c.16915943…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d0888907527d..ad4ffd3a876f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1673,6 +1673,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
break;
nr_events += ret;
ret = 0;
+
+ if (task_sigpending(current))
+ return -EINTR;
} while (nr_events < min && !need_resched());
return ret;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 915d975b2ffa58a14bfcf16fafe00c41315949ff
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090947-ream-vagrantly-7b88@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
915d975b2ffa ("net: deal with integer overflows in kmalloc_reserve()")
559ae55cfc33 ("net: skbuff: remove special handling for SLOB")
880ce5f20033 ("net: avoid skb end_offset change in __skb_unclone_keeptruesize()")
0b34d68049b0 ("net: enable usercopy for skb_small_head_cache")
bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head")
5c0e820cbbbe ("net: factorize code in kmalloc_reserve()")
65998d2bf857 ("net: remove osize variable in __alloc_skb()")
115f1a5c42bd ("net: add SKB_HEAD_ALIGN() helper")
12d6c1d3a2ad ("skbuff: Proactively round up to kmalloc bucket size")
4727bab4e9bb ("net: skb: move skb_pp_recycle() to skbuff.c")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 915d975b2ffa58a14bfcf16fafe00c41315949ff Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Thu, 31 Aug 2023 18:37:50 +0000
Subject: [PATCH] net: deal with integer overflows in kmalloc_reserve()
Blamed commit changed:
ptr = kmalloc(size);
if (ptr)
size = ksize(ptr);
to:
size = kmalloc_size_roundup(size);
ptr = kmalloc(size);
This allowed various crash as reported by syzbot [1]
and Kyle Zeng.
Problem is that if @size is bigger than 0x80000001,
kmalloc_size_roundup(size) returns 2^32.
kmalloc_reserve() uses a 32bit variable (obj_size),
so 2^32 is truncated to 0.
kmalloc(0) returns ZERO_SIZE_PTR which is not handled by
skb allocations.
Following trace can be triggered if a netdev->mtu is set
close to 0x7fffffff
We might in the future limit netdev->mtu to more sensible
limit (like KMALLOC_MAX_SIZE).
This patch is based on a syzbot report, and also a report
and tentative fix from Kyle Zeng.
[1]
BUG: KASAN: user-memory-access in __build_skb_around net/core/skbuff.c:294 [inline]
BUG: KASAN: user-memory-access in __alloc_skb+0x3c4/0x6e8 net/core/skbuff.c:527
Write of size 32 at addr 00000000fffffd10 by task syz-executor.4/22554
CPU: 1 PID: 22554 Comm: syz-executor.4 Not tainted 6.1.39-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023
Call trace:
dump_backtrace+0x1c8/0x1f4 arch/arm64/kernel/stacktrace.c:279
show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:286
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x120/0x1a0 lib/dump_stack.c:106
print_report+0xe4/0x4b4 mm/kasan/report.c:398
kasan_report+0x150/0x1ac mm/kasan/report.c:495
kasan_check_range+0x264/0x2a4 mm/kasan/generic.c:189
memset+0x40/0x70 mm/kasan/shadow.c:44
__build_skb_around net/core/skbuff.c:294 [inline]
__alloc_skb+0x3c4/0x6e8 net/core/skbuff.c:527
alloc_skb include/linux/skbuff.h:1316 [inline]
igmpv3_newpack+0x104/0x1088 net/ipv4/igmp.c:359
add_grec+0x81c/0x1124 net/ipv4/igmp.c:534
igmpv3_send_cr net/ipv4/igmp.c:667 [inline]
igmp_ifc_timer_expire+0x1b0/0x1008 net/ipv4/igmp.c:810
call_timer_fn+0x1c0/0x9f0 kernel/time/timer.c:1474
expire_timers kernel/time/timer.c:1519 [inline]
__run_timers+0x54c/0x710 kernel/time/timer.c:1790
run_timer_softirq+0x28/0x4c kernel/time/timer.c:1803
_stext+0x380/0xfbc
____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:79
call_on_irq_stack+0x24/0x4c arch/arm64/kernel/entry.S:891
do_softirq_own_stack+0x20/0x2c arch/arm64/kernel/irq.c:84
invoke_softirq kernel/softirq.c:437 [inline]
__irq_exit_rcu+0x1c0/0x4cc kernel/softirq.c:683
irq_exit_rcu+0x14/0x78 kernel/softirq.c:695
el0_interrupt+0x7c/0x2e0 arch/arm64/kernel/entry-common.c:717
__el0_irq_handler_common+0x18/0x24 arch/arm64/kernel/entry-common.c:724
el0t_64_irq_handler+0x10/0x1c arch/arm64/kernel/entry-common.c:729
el0t_64_irq+0x1a0/0x1a4 arch/arm64/kernel/entry.S:584
Fixes: 12d6c1d3a2ad ("skbuff: Proactively round up to kmalloc bucket size")
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Reported-by: Kyle Zeng <zengyhkyle(a)gmail.com>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 17caf4ea67da..4eaf7ed0d1f4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
bool *pfmemalloc)
{
bool ret_pfmemalloc = false;
- unsigned int obj_size;
+ size_t obj_size;
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
@@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
goto out;
}
- *size = obj_size = kmalloc_size_roundup(obj_size);
+
+ obj_size = kmalloc_size_roundup(obj_size);
+ /* The following cast might truncate high-order bits of obj_size, this
+ * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+ */
+ *size = (unsigned int)obj_size;
+
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
To make handling BIG and LITTLE endian better the offset/len of dynamic
fields of the synthetic events was changed into a structure of:
struct trace_dynamic_info {
#ifdef CONFIG_CPU_BIG_ENDIAN
u16 offset;
u16 len;
#else
u16 len;
u16 offset;
#endif
};
to replace the manual changes of:
data_offset = offset & 0xffff;
data_offest = len << 16;
But if you look closely, the above is:
<len> << 16 | offset
Which in little endian would be in memory:
offset_lo offset_hi len_lo len_hi
and in big endian:
len_hi len_lo offset_hi offset_lo
Which if broken into a structure would be:
struct trace_dynamic_info {
#ifdef CONFIG_CPU_BIG_ENDIAN
u16 len;
u16 offset;
#else
u16 offset;
u16 len;
#endif
};
Which is the opposite of what was defined.
Fix this and just to be safe also add "__packed".
Link: https://lore.kernel.org/all/20230908154417.5172e343@gandalf.local.home/
Link: https://lore.kernel.org/linux-trace-kernel/20230908163929.2c25f3dc@gandalf.…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Fixes: ddeea494a16f3 ("tracing/synthetic: Use union instead of casts")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
include/linux/trace_events.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 12f875e9e69a..21ae37e49319 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -62,13 +62,13 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
/* Used to find the offset and length of dynamic fields in trace events */
struct trace_dynamic_info {
#ifdef CONFIG_CPU_BIG_ENDIAN
- u16 offset;
u16 len;
+ u16 offset;
#else
- u16 len;
u16 offset;
+ u16 len;
#endif
-};
+} __packed;
/*
* The trace entry - the most basic unit of tracing. This is what
--
2.40.1
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The event inject files add events for a specific trace array. For an
instance, if the file is opened and the instance is deleted, reading or
writing to the file will cause a use after free.
Up the ref count of the trace_array when a event inject file is opened.
Link: https://lkml.kernel.org/r/20230907024804.292337868@goodmis.org
Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Zheng Yejian <zhengyejian1(a)huawei.com>
Fixes: 6c3edaf9fd6a ("tracing: Introduce trace event injection")
Tested-by: Linux Kernel Functional Testing <lkft(a)linaro.org>
Tested-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_inject.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index abe805d471eb..8650562bdaa9 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -328,7 +328,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size,
}
const struct file_operations event_inject_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_file_tr,
.read = event_inject_read,
.write = event_inject_write,
+ .release = tracing_release_file_tr,
};
--
2.40.1
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The function tracefs_create_dir() was missing a lockdown check and was
called by the RV code. This gave an inconsistent behavior of this function
returning success while other tracefs functions failed. This caused the
inode being freed by the wrong kmem_cache.
Link: https://lkml.kernel.org/r/20230905182711.692687042@goodmis.org
Link: https://lore.kernel.org/all/202309050916.58201dc6-oliver.sang@intel.com/
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Ajay Kaher <akaher(a)vmware.com>
Cc: Ching-lin Yu <chinglinyu(a)google.com>
Fixes: bf8e602186ec4 ("tracing: Do not create tracefs files if tracefs lockdown is in effect")
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
fs/tracefs/inode.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index de5b72216b1a..3b8dd938b1c8 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -673,6 +673,9 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
*/
struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
{
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
return __create_dir(name, parent, &simple_dir_inode_operations);
}
--
2.40.1
From: Mark Brown <broonie(a)kernel.org>
[ Upstream commit c51592a95f360aabf2b8a5691c550e1749dc41eb ]
The sun8i thermal driver reads calibration data via the nvmem API at
startup, updating the device configuration and not referencing the data
again. Rather than explicitly freeing the nvmem data the driver relies
on devm_ to release it, even though the data is never referenced again.
The allocation is still tracked so it's not leaked but this is notable
when looking at the code and is a little wasteful so let's instead
explicitly free the nvmem after we're done with it.
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Acked-by: Jernej Skrabec <jernej.skrabec(a)gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Link: https://lore.kernel.org/r/20230719-thermal-sun8i-free-nvmem-v1-1-f553d5afef…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/thermal/sun8i_thermal.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index e053b06280172..6a0e809174731 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -285,7 +285,7 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
size_t callen;
int ret = 0;
- calcell = devm_nvmem_cell_get(dev, "calibration");
+ calcell = nvmem_cell_get(dev, "calibration");
if (IS_ERR(calcell)) {
if (PTR_ERR(calcell) == -EPROBE_DEFER)
return -EPROBE_DEFER;
@@ -315,6 +315,8 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
kfree(caldata);
out:
+ if (!IS_ERR(calcell))
+ nvmem_cell_put(calcell);
return ret;
}
--
2.40.1
From: John Ogness <john.ogness(a)linutronix.de>
[ Upstream commit 696ffaf50e1f8dbc66223ff614473f945f5fb8d8 ]
Printing to consoles can be deferred for several reasons:
- explicitly with printk_deferred()
- printk() in NMI context
- recursive printk() calls
The current implementation is not consistent. For printk_deferred(),
irq work is scheduled twice. For NMI und recursive, panic CPU
suppression and caller delays are not properly enforced.
Correct these inconsistencies by consolidating the deferred printing
code so that vprintk_deferred() is the top-level function for
deferred printing and vprintk_emit() will perform whichever irq_work
queueing is appropriate.
Also add kerneldoc for wake_up_klogd() and defer_console_output() to
clarify their differences and appropriate usage.
Signed-off-by: John Ogness <john.ogness(a)linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Reviewed-by: Petr Mladek <pmladek(a)suse.com>
Signed-off-by: Petr Mladek <pmladek(a)suse.com>
Link: https://lore.kernel.org/r/20230717194607.145135-6-john.ogness@linutronix.de
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/printk/printk.c | 35 ++++++++++++++++++++++++++++-------
kernel/printk/printk_safe.c | 9 ++-------
2 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8d856b7c2e5af..8b110b245d92c 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2269,7 +2269,11 @@ asmlinkage int vprintk_emit(int facility, int level,
preempt_enable();
}
- wake_up_klogd();
+ if (in_sched)
+ defer_console_output();
+ else
+ wake_up_klogd();
+
return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);
@@ -3277,11 +3281,33 @@ static void __wake_up_klogd(int val)
preempt_enable();
}
+/**
+ * wake_up_klogd - Wake kernel logging daemon
+ *
+ * Use this function when new records have been added to the ringbuffer
+ * and the console printing of those records has already occurred or is
+ * known to be handled by some other context. This function will only
+ * wake the logging daemon.
+ *
+ * Context: Any context.
+ */
void wake_up_klogd(void)
{
__wake_up_klogd(PRINTK_PENDING_WAKEUP);
}
+/**
+ * defer_console_output - Wake kernel logging daemon and trigger
+ * console printing in a deferred context
+ *
+ * Use this function when new records have been added to the ringbuffer,
+ * this context is responsible for console printing those records, but
+ * the current context is not allowed to perform the console printing.
+ * Trigger an irq_work context to perform the console printing. This
+ * function also wakes the logging daemon.
+ *
+ * Context: Any context.
+ */
void defer_console_output(void)
{
/*
@@ -3298,12 +3324,7 @@ void printk_trigger_flush(void)
int vprintk_deferred(const char *fmt, va_list args)
{
- int r;
-
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
- defer_console_output();
-
- return r;
+ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
}
int _printk_deferred(const char *fmt, ...)
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index ef0f9a2044da1..6d10927a07d83 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -38,13 +38,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
* Use the main logbuf even in NMI. But avoid calling console
* drivers that might have their own locks.
*/
- if (this_cpu_read(printk_context) || in_nmi()) {
- int len;
-
- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
- defer_console_output();
- return len;
- }
+ if (this_cpu_read(printk_context) || in_nmi())
+ return vprintk_deferred(fmt, args);
/* No obstacles. */
return vprintk_default(fmt, args);
--
2.40.1
From: John Ogness <john.ogness(a)linutronix.de>
[ Upstream commit 51a1d258e50e03a0216bf42b6af9ff34ec402ac1 ]
When in a panic situation, non-panic CPUs should avoid holding the
console lock so as not to contend with the panic CPU. This is already
implemented with abandon_console_lock_in_panic(), which is checked
after each printed line. However, non-panic CPUs should also avoid
trying to acquire the console lock during a panic.
Modify console_trylock() to fail and console_lock() to block() when
called from a non-panic CPU during a panic.
Signed-off-by: John Ogness <john.ogness(a)linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Reviewed-by: Petr Mladek <pmladek(a)suse.com>
Signed-off-by: Petr Mladek <pmladek(a)suse.com>
Link: https://lore.kernel.org/r/20230717194607.145135-4-john.ogness@linutronix.de
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/printk/printk.c | 45 ++++++++++++++++++++++++------------------
1 file changed, 26 insertions(+), 19 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index e4f1e7478b521..4b9429f3fd6d8 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2552,6 +2552,25 @@ static int console_cpu_notify(unsigned int cpu)
return 0;
}
+/*
+ * Return true when this CPU should unlock console_sem without pushing all
+ * messages to the console. This reduces the chance that the console is
+ * locked when the panic CPU tries to use it.
+ */
+static bool abandon_console_lock_in_panic(void)
+{
+ if (!panic_in_progress())
+ return false;
+
+ /*
+ * We can use raw_smp_processor_id() here because it is impossible for
+ * the task to be migrated to the panic_cpu, or away from it. If
+ * panic_cpu has already been set, and we're not currently executing on
+ * that CPU, then we never will be.
+ */
+ return atomic_read(&panic_cpu) != raw_smp_processor_id();
+}
+
/**
* console_lock - lock the console system for exclusive use.
*
@@ -2564,6 +2583,10 @@ void console_lock(void)
{
might_sleep();
+ /* On panic, the console_lock must be left to the panic cpu. */
+ while (abandon_console_lock_in_panic())
+ msleep(1000);
+
down_console_sem();
if (console_suspended)
return;
@@ -2582,6 +2605,9 @@ EXPORT_SYMBOL(console_lock);
*/
int console_trylock(void)
{
+ /* On panic, the console_lock must be left to the panic cpu. */
+ if (abandon_console_lock_in_panic())
+ return 0;
if (down_trylock_console_sem())
return 0;
if (console_suspended) {
@@ -2600,25 +2626,6 @@ int is_console_locked(void)
}
EXPORT_SYMBOL(is_console_locked);
-/*
- * Return true when this CPU should unlock console_sem without pushing all
- * messages to the console. This reduces the chance that the console is
- * locked when the panic CPU tries to use it.
- */
-static bool abandon_console_lock_in_panic(void)
-{
- if (!panic_in_progress())
- return false;
-
- /*
- * We can use raw_smp_processor_id() here because it is impossible for
- * the task to be migrated to the panic_cpu, or away from it. If
- * panic_cpu has already been set, and we're not currently executing on
- * that CPU, then we never will be.
- */
- return atomic_read(&panic_cpu) != raw_smp_processor_id();
-}
-
/*
* Check if the given console is currently capable and allowed to print
* records.
--
2.40.1
From: John Ogness <john.ogness(a)linutronix.de>
[ Upstream commit 7b23a66db55ed0a55b020e913f0d6f6d52a1ad2c ]
A semaphore is not NMI-safe, even when using down_trylock(). Both
down_trylock() and up() are using internal spinlocks and up()
might even call wake_up_process().
In the panic() code path it gets even worse because the internal
spinlocks of the semaphore may have been taken by a CPU that has
been stopped.
To reduce the risk of deadlocks caused by the console semaphore in
the panic path, make the following changes:
- First check if any consoles have implemented the unblank()
callback. If not, then there is no reason to take the console
semaphore anyway. (This check is also useful for the non-panic
path since the locking/unlocking of the console lock can be
quite expensive due to console printing.)
- If the panic path is in NMI context, bail out without attempting
to take the console semaphore or calling any unblank() callbacks.
Bailing out is acceptable because console_unblank() would already
bail out if the console semaphore is contended. The alternative of
ignoring the console semaphore and calling the unblank() callbacks
anyway is a bad idea because these callbacks are also not NMI-safe.
If consoles with unblank() callbacks exist and console_unblank() is
called from a non-NMI panic context, it will still attempt a
down_trylock(). This could still result in a deadlock if one of the
stopped CPUs is holding the semaphore internal spinlock. But this
is a risk that the kernel has been (and continues to be) willing
to take.
Signed-off-by: John Ogness <john.ogness(a)linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Reviewed-by: Petr Mladek <pmladek(a)suse.com>
Signed-off-by: Petr Mladek <pmladek(a)suse.com>
Link: https://lore.kernel.org/r/20230717194607.145135-3-john.ogness@linutronix.de
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/printk/printk.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 6a333adce3b33..653ad62ded417 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3045,9 +3045,27 @@ EXPORT_SYMBOL(console_conditional_schedule);
void console_unblank(void)
{
+ bool found_unblank = false;
struct console *c;
int cookie;
+ /*
+ * First check if there are any consoles implementing the unblank()
+ * callback. If not, there is no reason to continue and take the
+ * console lock, which in particular can be dangerous if
+ * @oops_in_progress is set.
+ */
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) {
+ found_unblank = true;
+ break;
+ }
+ }
+ console_srcu_read_unlock(cookie);
+ if (!found_unblank)
+ return;
+
/*
* Stop console printing because the unblank() callback may
* assume the console is not within its write() callback.
@@ -3056,6 +3074,16 @@ void console_unblank(void)
* In that case, attempt a trylock as best-effort.
*/
if (oops_in_progress) {
+ /* Semaphores are not NMI-safe. */
+ if (in_nmi())
+ return;
+
+ /*
+ * Attempting to trylock the console lock can deadlock
+ * if another CPU was stopped while modifying the
+ * semaphore. "Hope and pray" that this is not the
+ * current situation.
+ */
if (down_trylock_console_sem() != 0)
return;
} else
--
2.40.1
When upreving llvm I realised that kexec stopped working on my test
platform.
The reason seems to be that due to PGO there are multiple .text sections
on the purgatory, and kexec does not supports that.
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
---
Changes in v7:
- Fix $SUBJECT of riscv patch
- Rename PGO as Profile-guided optimization
- Link to v6: https://lore.kernel.org/r/20230321-kexec_clang16-v6-0-a2255e81ab45@chromium…
Changes in v6:
- Replace linker script with Makefile rule. Thanks Nick
- Link to v5: https://lore.kernel.org/r/20230321-kexec_clang16-v5-0-5563bf7c4173@chromium…
Changes in v5:
- Add warning when multiple text sections are found. Thanks Simon!
- Add Fixes tag.
- Link to v4: https://lore.kernel.org/r/20230321-kexec_clang16-v4-0-1340518f98e9@chromium…
Changes in v4:
- Add Cc: stable
- Add linker script for x86
- Add a warning when the kernel image has overlapping sections.
- Link to v3: https://lore.kernel.org/r/20230321-kexec_clang16-v3-0-5f016c8d0e87@chromium…
Changes in v3:
- Fix initial value. Thanks Ross!
- Link to v2: https://lore.kernel.org/r/20230321-kexec_clang16-v2-0-d10e5d517869@chromium…
Changes in v2:
- Fix if condition. Thanks Steven!.
- Update Philipp email. Thanks Baoquan.
- Link to v1: https://lore.kernel.org/r/20230321-kexec_clang16-v1-0-a768fc2c7c4d@chromium…
---
Ricardo Ribalda (4):
kexec: Support purgatories with .text.hot sections
x86/purgatory: Remove PGO flags
powerpc/purgatory: Remove PGO flags
riscv/purgatory: Remove PGO flags
arch/powerpc/purgatory/Makefile | 5 +++++
arch/riscv/purgatory/Makefile | 5 +++++
arch/x86/purgatory/Makefile | 5 +++++
kernel/kexec_file.c | 14 +++++++++++++-
4 files changed, 28 insertions(+), 1 deletion(-)
---
base-commit: 58390c8ce1bddb6c623f62e7ed36383e7fa5c02f
change-id: 20230321-kexec_clang16-4510c23d129c
Best regards,
--
Ricardo Ribalda Delgado <ribalda(a)chromium.org>
file_remove_privs might call into notify_change(), which
requires to hold an exclusive lock.
Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF")
Cc: Christoph Hellwig <hch(a)infradead.org>
Cc: Goldwyn Rodrigues <rgoldwyn(a)suse.com>
Cc: Miklos Szeredi <miklos(a)szeredi.hu>
Cc: Dharmendra Singh <dsingh(a)ddn.com>
Cc: David Sterba <dsterba(a)suse.com>
Cc: linux-btrfs(a)vger.kernel.org
Cc: linux-fsdevel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Bernd Schubert <bschubert(a)ddn.com>
---
fs/btrfs/file.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fd03e689a6be..c4b304a2948e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1466,8 +1466,12 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;
- /* If the write DIO is within EOF, use a shared lock */
- if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
+ /* If the write DIO is within EOF, use a shared lock and also only
+ * if security bits will likely not be dropped. Either will need
+ * to be rechecked after the lock was acquired.
+ */
+ if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) &&
+ IS_NOSEC(inode))
ilock_flags |= BTRFS_ILOCK_SHARED;
relock:
@@ -1475,6 +1479,12 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (err < 0)
return err;
+ if (ilock_flags & BTRFS_ILOCK_SHARED && !IS_NOSEC(inode)) {
+ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
+ ilock_flags &= ~BTRFS_ILOCK_SHARED;
+ goto relock;
+ }
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
--
2.39.2
From: Jiri Pirko <jiri(a)nvidia.com>
[ Upstream commit 633d76ad01ad0321a1ace3e5cc4fed06753d7ac4 ]
The checks in question were introduced by:
commit 6b4db2e528f6 ("devlink: Fix use-after-free after a failed reload").
That fixed an issue of reload with mlxsw driver.
Back then, that was a valid fix, because there was a limitation
in place that prevented drivers from registering/unregistering params
when devlink instance was registered.
It was possible to do the fix differently by changing drivers to
register/unregister params in appropriate places making sure the ops
operate only on memory which is allocated and initialized. But that,
as a dependency, would require to remove the limitation mentioned above.
Eventually, this limitation was lifted by:
commit 1d18bb1a4ddd ("devlink: allow registering parameters after the instance")
Also, the alternative fix (which also fixed another issue) was done by:
commit 74cbc3c03c82 ("mlxsw: spectrum_acl_tcam: Move devlink param to TCAM code").
Therefore, the checks are no longer relevant. Each driver should make
sure to have the params registered only when the memory the ops
are working with is allocated and initialized.
So remove the checks.
Signed-off-by: Jiri Pirko <jiri(a)nvidia.com>
Reviewed-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
net/core/devlink.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b4d7a7f749c18..db76c55e1a6d7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4413,7 +4413,7 @@ static int devlink_param_get(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get || devlink->reload_failed)
+ if (!param->get)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -4422,7 +4422,7 @@ static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set || devlink->reload_failed)
+ if (!param->set)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
--
2.40.1
Hi,
Would you be interested in acquiring AWS re:Invent Attendees Data List 2023?
List contains: Company Name, Contact Name, First Name, Middle Name,
Last Name, Title, Address, Street, City, Zip code, State, Country,
Telephone, Email address and more,
No of Contacts: - 40,777
Cost: $1,977
Kind Regards,
Kimberly Taylor
Marketing Coordinator
From: Jiri Pirko <jiri(a)nvidia.com>
[ Upstream commit 633d76ad01ad0321a1ace3e5cc4fed06753d7ac4 ]
The checks in question were introduced by:
commit 6b4db2e528f6 ("devlink: Fix use-after-free after a failed reload").
That fixed an issue of reload with mlxsw driver.
Back then, that was a valid fix, because there was a limitation
in place that prevented drivers from registering/unregistering params
when devlink instance was registered.
It was possible to do the fix differently by changing drivers to
register/unregister params in appropriate places making sure the ops
operate only on memory which is allocated and initialized. But that,
as a dependency, would require to remove the limitation mentioned above.
Eventually, this limitation was lifted by:
commit 1d18bb1a4ddd ("devlink: allow registering parameters after the instance")
Also, the alternative fix (which also fixed another issue) was done by:
commit 74cbc3c03c82 ("mlxsw: spectrum_acl_tcam: Move devlink param to TCAM code").
Therefore, the checks are no longer relevant. Each driver should make
sure to have the params registered only when the memory the ops
are working with is allocated and initialized.
So remove the checks.
Signed-off-by: Jiri Pirko <jiri(a)nvidia.com>
Reviewed-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
net/core/devlink.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b4dabe5d89f72..5bd6330ab4275 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2953,7 +2953,7 @@ static int devlink_param_get(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get || devlink->reload_failed)
+ if (!param->get)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -2962,7 +2962,7 @@ static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set || devlink->reload_failed)
+ if (!param->set)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 2e63972a2de14482d0eae1a03a73e379f1c3f44c ]
The variable crtc->state->event is often protected by the lock
crtc->dev->event_lock when is accessed. However, it is accessed as a
condition of an if statement in exynos_drm_crtc_atomic_disable() without
holding the lock:
if (crtc->state->event && !crtc->state->active)
However, if crtc->state->event is changed to NULL by another thread right
after the conditions of the if statement is checked to be true, a
null-pointer dereference can occur in drm_crtc_send_vblank_event():
e->pipe = pipe;
To fix this possible null-pointer dereference caused by data race, the
spin lock coverage is extended to protect the if statement as well as the
function call to drm_crtc_send_vblank_event().
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Link: https://sites.google.com/view/basscheck/home
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Added relevant link.
Signed-off-by: Inki Dae <inki.dae(a)samsung.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/exynos/exynos_drm_crtc.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 2696289ecc78f..b3e23ace5869c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -43,13 +43,12 @@ static void exynos_drm_crtc_atomic_disable(struct drm_crtc *crtc,
if (exynos_crtc->ops->disable)
exynos_crtc->ops->disable(exynos_crtc);
+ spin_lock_irq(&crtc->dev->event_lock);
if (crtc->state->event && !crtc->state->active) {
- spin_lock_irq(&crtc->dev->event_lock);
drm_crtc_send_vblank_event(crtc, crtc->state->event);
- spin_unlock_irq(&crtc->dev->event_lock);
-
crtc->state->event = NULL;
}
+ spin_unlock_irq(&crtc->dev->event_lock);
}
static int exynos_crtc_atomic_check(struct drm_crtc *crtc,
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 2e63972a2de14482d0eae1a03a73e379f1c3f44c ]
The variable crtc->state->event is often protected by the lock
crtc->dev->event_lock when is accessed. However, it is accessed as a
condition of an if statement in exynos_drm_crtc_atomic_disable() without
holding the lock:
if (crtc->state->event && !crtc->state->active)
However, if crtc->state->event is changed to NULL by another thread right
after the conditions of the if statement is checked to be true, a
null-pointer dereference can occur in drm_crtc_send_vblank_event():
e->pipe = pipe;
To fix this possible null-pointer dereference caused by data race, the
spin lock coverage is extended to protect the if statement as well as the
function call to drm_crtc_send_vblank_event().
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Link: https://sites.google.com/view/basscheck/home
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Added relevant link.
Signed-off-by: Inki Dae <inki.dae(a)samsung.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/exynos/exynos_drm_crtc.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 77ce78986408b..c10eea6db9a80 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -39,13 +39,12 @@ static void exynos_drm_crtc_atomic_disable(struct drm_crtc *crtc,
if (exynos_crtc->ops->disable)
exynos_crtc->ops->disable(exynos_crtc);
+ spin_lock_irq(&crtc->dev->event_lock);
if (crtc->state->event && !crtc->state->active) {
- spin_lock_irq(&crtc->dev->event_lock);
drm_crtc_send_vblank_event(crtc, crtc->state->event);
- spin_unlock_irq(&crtc->dev->event_lock);
-
crtc->state->event = NULL;
}
+ spin_unlock_irq(&crtc->dev->event_lock);
}
static int exynos_crtc_atomic_check(struct drm_crtc *crtc,
--
2.40.1
From: Marek Vasut <marex(a)denx.de>
[ Upstream commit 362fa8f6e6a05089872809f4465bab9d011d05b3 ]
This bridge seems to need the HSE packet, otherwise the image is
shifted up and corrupted at the bottom. This makes the bridge
work with Samsung DSIM on i.MX8MM and i.MX8MP.
Signed-off-by: Marek Vasut <marex(a)denx.de>
Reviewed-by: Sam Ravnborg <sam(a)ravnborg.org>
Signed-off-by: Robert Foss <rfoss(a)kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230615201902.566182-3-marex…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/bridge/tc358762.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/bridge/tc358762.c b/drivers/gpu/drm/bridge/tc358762.c
index 1bfdfc6affafe..21c57d3435687 100644
--- a/drivers/gpu/drm/bridge/tc358762.c
+++ b/drivers/gpu/drm/bridge/tc358762.c
@@ -224,7 +224,7 @@ static int tc358762_probe(struct mipi_dsi_device *dsi)
dsi->lanes = 1;
dsi->format = MIPI_DSI_FMT_RGB888;
dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
- MIPI_DSI_MODE_LPM;
+ MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_VIDEO_HSE;
ret = tc358762_parse_dt(ctx);
if (ret < 0)
--
2.40.1
From: Dongliang Mu <dzm91(a)hust.edu.cn>
[ Upstream commit 061115fbfb2ce5870c9a004d68dc63138c07c782 ]
Smatch reports:
ath_pci_probe() warn: argument 4 to %lx specifier is cast from pointer
ath_ahb_probe() warn: argument 4 to %lx specifier is cast from pointer
Fix it by modifying %lx to %p in the printk format string.
Note that with this change, the pointer address will be printed as a
hashed value by default. This is appropriate because the kernel
should not leak kernel pointers to user space in an informational
message. If someone wants to see the real address for debugging
purposes, this can be achieved with the no_hash_pointers kernel option.
Signed-off-by: Dongliang Mu <dzm91(a)hust.edu.cn>
Acked-by: Toke Høiland-Jørgensen <toke(a)toke.dk>
Signed-off-by: Kalle Valo <quic_kvalo(a)quicinc.com>
Link: https://lore.kernel.org/r/20230723040403.296723-1-dzm91@hust.edu.cn
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/wireless/ath/ath9k/ahb.c | 4 ++--
drivers/net/wireless/ath/ath9k/pci.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireless/ath/ath9k/ahb.c b/drivers/net/wireless/ath/ath9k/ahb.c
index 63019c3de034d..26023e3b4b9df 100644
--- a/drivers/net/wireless/ath/ath9k/ahb.c
+++ b/drivers/net/wireless/ath/ath9k/ahb.c
@@ -136,8 +136,8 @@ static int ath_ahb_probe(struct platform_device *pdev)
ah = sc->sc_ah;
ath9k_hw_name(ah, hw_name, sizeof(hw_name));
- wiphy_info(hw->wiphy, "%s mem=0x%lx, irq=%d\n",
- hw_name, (unsigned long)mem, irq);
+ wiphy_info(hw->wiphy, "%s mem=0x%p, irq=%d\n",
+ hw_name, mem, irq);
return 0;
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index 92b2dd396436a..cb3318bd3cad2 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -993,8 +993,8 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
sc->sc_ah->msi_reg = 0;
ath9k_hw_name(sc->sc_ah, hw_name, sizeof(hw_name));
- wiphy_info(hw->wiphy, "%s mem=0x%lx, irq=%d\n",
- hw_name, (unsigned long)sc->mem, pdev->irq);
+ wiphy_info(hw->wiphy, "%s mem=0x%p, irq=%d\n",
+ hw_name, sc->mem, pdev->irq);
return 0;
--
2.40.1
From: Jiri Pirko <jiri(a)nvidia.com>
[ Upstream commit 633d76ad01ad0321a1ace3e5cc4fed06753d7ac4 ]
The checks in question were introduced by:
commit 6b4db2e528f6 ("devlink: Fix use-after-free after a failed reload").
That fixed an issue of reload with mlxsw driver.
Back then, that was a valid fix, because there was a limitation
in place that prevented drivers from registering/unregistering params
when devlink instance was registered.
It was possible to do the fix differently by changing drivers to
register/unregister params in appropriate places making sure the ops
operate only on memory which is allocated and initialized. But that,
as a dependency, would require to remove the limitation mentioned above.
Eventually, this limitation was lifted by:
commit 1d18bb1a4ddd ("devlink: allow registering parameters after the instance")
Also, the alternative fix (which also fixed another issue) was done by:
commit 74cbc3c03c82 ("mlxsw: spectrum_acl_tcam: Move devlink param to TCAM code").
Therefore, the checks are no longer relevant. Each driver should make
sure to have the params registered only when the memory the ops
are working with is allocated and initialized.
So remove the checks.
Signed-off-by: Jiri Pirko <jiri(a)nvidia.com>
Reviewed-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
net/devlink/leftover.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 790e61b2a9404..fa4705e509e3c 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -3982,7 +3982,7 @@ static int devlink_param_get(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get || devlink->reload_failed)
+ if (!param->get)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -3991,7 +3991,7 @@ static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set || devlink->reload_failed)
+ if (!param->set)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index a402ad772a1e5..c561d35d441bb 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -637,7 +637,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 8d7dc98bad17b..ca01e02af9cba 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 43775c5ce17c5..2f9b226ec4f63 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 3e80eb1a5f35c..590326c200a28 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index bef69e87a0a29..8c34c0ffb1d93 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 09029fe545f14..39e31030e5f49 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Abhishek Mainkar <abmainkar(a)nvidia.com>
[ Upstream commit 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e ]
ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5
According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.
When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.
=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
dump_backtrace+0xe0/0x130
show_stack+0x20/0x60
dump_stack_lvl+0x68/0x84
dump_stack+0x18/0x34
ubsan_epilogue+0x10/0x50
__ubsan_handle_out_of_bounds+0x80/0x90
acpi_ds_exec_end_op+0x1bc/0x6d8
acpi_ps_parse_loop+0x57c/0x618
acpi_ps_parse_aml+0x1e0/0x4b4
acpi_ps_execute_method+0x24c/0x2b8
acpi_ns_evaluate+0x3a8/0x4bc
acpi_evaluate_object+0x15c/0x37c
acpi_evaluate_integer+0x54/0x15c
show_power+0x8c/0x12c [acpi_power_meter]
Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar(a)nvidia.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/acpica/psopcode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 09029fe545f14..39e31030e5f49 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
/* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
- AML_FLAGS_EXEC_0A_0T_1R),
+ AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
/* ACPI 5.0 opcodes */
--
2.40.1
From: Roger Pau Monne <roger.pau(a)citrix.com>
The Processor _PDC buffer bits notify ACPI of the OS capabilities, and
so ACPI can adjust the return of other Processor methods taking the OS
capabilities into account.
When Linux is running as a Xen dom0, it's the hypervisor the entity
in charge of processor power management, and hence Xen needs to make
sure the capabilities reported in the _PDC buffer match the
capabilities of the driver in Xen.
Introduce a small helper to sanitize the buffer when running as Xen
dom0.
When Xen supports HWP, this serves as the equivalent of commit
a21211672c9a ("ACPI / processor: Request native thermal interrupt
handling via _OSC") to avoid SMM crashes. Xen will set bit 12 in the
_PDC bits and the _PDC call will apply it.
[ jandryuk: Mention Xen HWP's need. Move local variables ]
Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason Andryuk <jandryuk(a)gmail.com>
---
v2:
Move local variables in acpi_processor_eval_pdc() to reuse in both conditions.
---
arch/x86/include/asm/xen/hypervisor.h | 6 ++++++
arch/x86/xen/enlighten.c | 19 +++++++++++++++++++
drivers/acpi/processor_pdc.c | 22 ++++++++++++++++------
3 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 5fc35f889cd1..0f88a7e450d3 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -63,4 +63,10 @@ void __init xen_pvh_init(struct boot_params *boot_params);
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
#endif
+#ifdef CONFIG_XEN_DOM0
+void xen_sanitize_pdc(uint32_t *buf);
+#else
+static inline void xen_sanitize_pdc(uint32_t *buf) { BUG(); }
+#endif
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b8db2148c07d..9f7fc11330a3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -346,3 +346,22 @@ void xen_arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(xen_arch_unregister_cpu);
#endif
+
+#ifdef CONFIG_XEN_DOM0
+void xen_sanitize_pdc(uint32_t *buf)
+{
+ struct xen_platform_op op = {
+ .cmd = XENPF_set_processor_pminfo,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u.set_pminfo.id = -1,
+ .u.set_pminfo.type = XEN_PM_PDC,
+ };
+ int ret;
+
+ set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+ ret = HYPERVISOR_platform_op(&op);
+ if (ret)
+ pr_info("sanitize of _PDC buffer bits from Xen failed: %d\n",
+ ret);
+}
+#endif
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 18fb04523f93..9393dd4a3158 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -122,6 +122,11 @@ static acpi_status
acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
{
acpi_status status = AE_OK;
+ union acpi_object *obj;
+ u32 *buffer = NULL;
+
+ obj = pdc_in->pointer;
+ buffer = (u32 *)(obj->buffer.pointer);
if (boot_option_idle_override == IDLE_NOMWAIT) {
/*
@@ -129,14 +134,19 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
* mode will be disabled in the parameter of _PDC object.
* Of course C1_FFH access mode will also be disabled.
*/
- union acpi_object *obj;
- u32 *buffer = NULL;
-
- obj = pdc_in->pointer;
- buffer = (u32 *)(obj->buffer.pointer);
buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
-
}
+
+ if (xen_initial_domain()) {
+ /*
+ * When Linux is running as Xen dom0, the hypervisor is the
+ * entity in charge of the processor power management, and so
+ * Xen needs to check the OS capabilities reported in the _PDC
+ * buffer matches what the hypervisor driver supports.
+ */
+ xen_sanitize_pdc(buffer);
+ }
+
status = acpi_evaluate_object(handle, "_PDC", pdc_in, NULL);
if (ACPI_FAILURE(status))
--
2.41.0
Hi Greg and Sasha,
Please consider applying the following commits to 6.1 (they all picked
cleanly for me):
630ae80ea1dd ("tools lib subcmd: Add install target")
77dce6890a2a ("tools lib subcmd: Make install_headers clearer")
5d890591db6b ("tools lib subcmd: Add dependency test to install_headers")
0e43662e61f2 ("tools/resolve_btfids: Use pkg-config to locate libelf")
af03299d8536 ("tools/resolve_btfids: Install subcmd headers")
13e07691a16f ("tools/resolve_btfids: Alter how HOSTCC is forced")
56a2df7615fa ("tools/resolve_btfids: Compile resolve_btfids as host program")
e0975ab92f24 ("tools/resolve_btfids: Tidy HOST_OVERRIDES")
2531ba0e4ae6 ("tools/resolve_btfids: Pass HOSTCFLAGS as EXTRA_CFLAGS to prepare targets")
edd75c802855 ("tools/resolve_btfids: Fix setting HOSTCFLAGS")
The most critical change is 13e07691a16f, which resolves a missing
EXTRA_CFLAGS to the libsubcmd build. Without that EXTRA_CFLAGS, the
Android hermetic toolchain kernel build fails on host distributions
using glibc 2.38 and newer. The majority of those commits are strictly
needed due to dependency/fixes requirements, the few that are not still
seem to be worth bringing in for ease of backporting the rest and do not
appear to cause any problems.
I proposed another solution downstream, which may be more palatable if
people have concerns about this list of changes and the risk of
regressions, but Ian seemed to have some concerns on that thread around
that path and suggested this series of backports instead:
https://android-review.googlesource.com/c/kernel/common/+/2745896
While the number of patches seems large, the final changes are pretty
well self-contained.
Cheers,
Nathan
On Thu, Sep 07, 2023 at 02:17:16PM +0000, Maximilian Heyne wrote:
> With the recent removal of vm_dev from devres its memory is only freed
> via the callback virtio_mmio_release_dev. However, this only takes
> effect after device_add is called by register_virtio_device. Until then
> it's an unmanaged resource and must be explicitly freed on error exit.
>
> This bug was discovered and resolved using Coverity Static Analysis
> Security Testing (SAST) by Synopsys, Inc.
>
> Cc: <stable(a)vger.kernel.org>
> Fixes: 55c91fedd03d ("virtio-mmio: don't break lifecycle of vm_dev")
> Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com>
Tested-by: Catalin Marinas <catalin.marinas(a)arm.com>
Thanks.
--
Catalin
http://linux-ax25.org has been down for nearly a year. Its official
replacement is https://linux-ax25.in-berlin.de.
Update the documentation to point there instead. And acknowledge that
while the linux-hams list isn't entirely dead, it isn't what most would
call 'active'. Remove that word.
Link: https://marc.info/?m=166792551600315
Cc: stable(a)vger.kernel.org
Signed-off-by: Peter Lafreniere <peter(a)n8pjl.ca>
---
Documentation/networking/ax25.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst
index f060cfb1445a..605e72c6c877 100644
--- a/Documentation/networking/ax25.rst
+++ b/Documentation/networking/ax25.rst
@@ -7,9 +7,9 @@ AX.25
To use the amateur radio protocols within Linux you will need to get a
suitable copy of the AX.25 Utilities. More detailed information about
AX.25, NET/ROM and ROSE, associated programs and utilities can be
-found on http://www.linux-ax25.org.
+found on https://linux-ax25.in-berlin.de.
-There is an active mailing list for discussing Linux amateur radio matters
+There is a mailing list for discussing Linux amateur radio matters
called linux-hams(a)vger.kernel.org. To subscribe to it, send a message to
majordomo(a)vger.kernel.org with the words "subscribe linux-hams" in the body
of the message, the subject field is ignored. You don't need to be
--
2.42.0
This is the start of the stable review cycle for the 6.5.2 release.
There are 34 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Sep 2023 18:29:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.5.2-rc1.…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.5.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.5.2-rc1
Mario Limonciello <mario.limonciello(a)amd.com>
pinctrl: amd: Don't show `Invalid config param` errors
Marco Felsch <m.felsch(a)pengutronix.de>
usb: typec: tcpci: clear the fault status bit
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Brian Foster <bfoster(a)redhat.com>
tracing: Zero the pipe cpumask on alloc to avoid spurious -EBUSY
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
dt-bindings: sc16is7xx: Add property to change GPIO function
Badhri Jagan Sridharan <badhri(a)google.com>
tcpm: Avoid soft reset when partner does not support get_status
Juerg Haefliger <juerg.haefliger(a)canonical.com>
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Wang Ming <machel(a)vivo.com>
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix bug when first setting GPIO direction
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix broken port 0 uart init
Johan Hovold <johan+linaro(a)kernel.org>
serial: qcom-geni: fix opp vote on shutdown
Sven Eckelmann <sven(a)narfation.org>
wifi: ath11k: Cleanup mac80211 references on failure during tx_complete
Sven Eckelmann <sven(a)narfation.org>
wifi: ath11k: Don't drop tx_status when peer cannot be found
Sascha Hauer <s.hauer(a)pengutronix.de>
wifi: rtw88: usb: kill and free rx urbs on probe failure
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: fix skb leak by txs missing in AMSDU
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: do not support one stream on secondary antenna only
Nam Cao <namcaov(a)gmail.com>
staging: rtl8712: fix race condition
Aaron Armstrong Skomra <aaron.skomra(a)wacom.com>
HID: wacom: remove the battery when the EKR is off
Xu Yang <xu.yang_2(a)nxp.com>
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
Luke Lu <luke.lu(a)libre.computer>
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Takashi Iwai <tiwai(a)suse.de>
ALSA: usb-audio: Fix init call orders for UAC1
Slark Xiao <slark_xiao(a)163.com>
USB: serial: option: add FOXCONN T99W368/T99W373 product
Martin Kohn <m.kohn(a)welotec.com>
USB: serial: option: add Quectel EM05G variant (0x030e)
Christoph Hellwig <hch(a)lst.de>
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Christoph Hellwig <hch(a)lst.de>
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
Christoph Hellwig <hch(a)lst.de>
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
Christoph Hellwig <hch(a)lst.de>
mmc: au1xmmc: force non-modular build and remove symbol_get usage
Arnd Bergmann <arnd(a)arndb.de>
ARM: pxa: remove use of symbol_get()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: reduce descriptor size if remaining bytes is less than request size
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix slub overflow in ksmbd_decode_ntlmssp_auth_blob()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix wrong DataOffset validation of create context
Gao Xiang <xiang(a)kernel.org>
erofs: ensure that the post-EOF tails are all zeroed
Lang Yu <Lang.Yu(a)amd.com>
drm/amdgpu: correct vmhub index in GMC v10/11
-------------
Diffstat:
.../devicetree/bindings/serial/nxp,sc16is7xx.txt | 46 ++++++++++++++++++++++
Makefile | 4 +-
arch/arm/mach-pxa/sharpsl_pm.c | 2 -
arch/arm/mach-pxa/spitz.c | 14 +------
arch/mips/alchemy/devboards/db1000.c | 8 +---
arch/mips/alchemy/devboards/db1200.c | 19 +--------
arch/mips/alchemy/devboards/db1300.c | 10 +----
drivers/firmware/stratix10-svc.c | 2 +-
drivers/fsi/fsi-master-ast-cf.c | 1 +
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 +-
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 +-
drivers/hid/wacom.h | 1 +
drivers/hid/wacom_sys.c | 25 ++++++++++--
drivers/hid/wacom_wac.c | 1 +
drivers/hid/wacom_wac.h | 1 +
drivers/mmc/host/Kconfig | 5 ++-
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +-
drivers/net/wireless/ath/ath11k/dp_tx.c | 10 ++---
.../net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +++-
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +-
drivers/net/wireless/realtek/rtw88/usb.c | 5 ++-
drivers/pinctrl/pinctrl-amd.c | 4 +-
drivers/rtc/rtc-ds1685.c | 2 +-
drivers/staging/rtl8712/os_intfs.c | 1 +
drivers/staging/rtl8712/usb_intf.c | 1 -
drivers/tty/serial/qcom_geni_serial.c | 5 +++
drivers/tty/serial/sc16is7xx.c | 17 +++++++-
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +++--
drivers/usb/chipidea/usbmisc_imx.c | 6 ++-
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 +++
drivers/usb/serial/option.c | 7 ++++
drivers/usb/typec/tcpm/tcpci.c | 4 ++
drivers/usb/typec/tcpm/tcpm.c | 7 ++++
fs/erofs/zdata.c | 2 +
fs/nilfs2/alloc.c | 3 +-
fs/nilfs2/inode.c | 7 +++-
fs/smb/server/auth.c | 3 ++
fs/smb/server/oplock.c | 2 +-
fs/smb/server/smb2pdu.c | 2 +-
fs/smb/server/smb2pdu.h | 2 +-
fs/smb/server/transport_rdma.c | 25 ++++++++----
include/linux/usb/tcpci.h | 1 +
kernel/module/main.c | 14 +++++--
kernel/trace/trace.c | 4 +-
sound/usb/stream.c | 11 +++++-
45 files changed, 220 insertions(+), 99 deletions(-)
From: Duoming Zhou <duoming(a)zju.edu.cn>
The watchdog_timer can schedule tx_timeout_task and watchdog_work
can also arm watchdog_timer. The process is shown below:
----------- timer schedules work ------------
cyttsp4_watchdog_timer() //timer handler
schedule_work(&cd->watchdog_work)
----------- work arms timer ------------
cyttsp4_watchdog_work() //workqueue callback function
cyttsp4_start_wd_timer()
mod_timer(&cd->watchdog_timer, ...)
Although del_timer_sync() and cancel_work_sync() are called in
cyttsp4_remove(), the timer and workqueue could still be rearmed.
As a result, the possible use after free bugs could happen. The
process is shown below:
(cleanup routine) | (timer and workqueue routine)
cyttsp4_remove() | cyttsp4_watchdog_timer() //timer
cyttsp4_stop_wd_timer() | schedule_work()
del_timer_sync() |
| cyttsp4_watchdog_work() //worker
| cyttsp4_start_wd_timer()
| mod_timer()
cancel_work_sync() |
| cyttsp4_watchdog_timer() //timer
| schedule_work()
del_timer_sync() |
kfree(cd) //FREE |
| cyttsp4_watchdog_work() // reschedule!
| cd-> //USE
This patch changes del_timer_sync() to timer_shutdown_sync(),
which could prevent rearming of the timer from the workqueue.
Cc: stable(a)vger.kernel.org
Fixes: CVE-2023-4134
Fixes: 17fb1563d69b ("Input: cyttsp4 - add core driver for Cypress TMA4XX touchscreen devices")
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
Link: https://lore.kernel.org/r/20230421082919.8471-1-duoming@zju.edu.cn
Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Signed-off-by: Denis Efremov (Oracle) <efremov(a)linux.com>
---
I've only added Cc: stable and Fixes tag.
drivers/input/touchscreen/cyttsp4_core.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c
index dccbcb942fe5..f999265896f4 100644
--- a/drivers/input/touchscreen/cyttsp4_core.c
+++ b/drivers/input/touchscreen/cyttsp4_core.c
@@ -1263,9 +1263,8 @@ static void cyttsp4_stop_wd_timer(struct cyttsp4 *cd)
* Ensure we wait until the watchdog timer
* running on a different CPU finishes
*/
- del_timer_sync(&cd->watchdog_timer);
+ timer_shutdown_sync(&cd->watchdog_timer);
cancel_work_sync(&cd->watchdog_work);
- del_timer_sync(&cd->watchdog_timer);
}
static void cyttsp4_watchdog_timer(struct timer_list *t)
--
2.42.0
From: "Ritesh Harjani (IBM)" <ritesh.list(a)gmail.com>
[ Upstream commit eee2d2e6ea5550118170dbd5bb1316ceb38455fb ]
folio_next_index() returns an unsigned long value which left shifted
by PAGE_SHIFT could possibly cause an overflow on 32-bit system. Instead
use folio_pos(folio) + folio_size(folio), which does this correctly.
Suggested-by: Matthew Wilcox <willy(a)infradead.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list(a)gmail.com>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/iomap/buffered-io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index aa8967cca1a31..4dc4bbc4be10a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -932,7 +932,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
* the end of this data range, not the end of the folio.
*/
*punch_start_byte = min_t(loff_t, end_byte,
- folio_next_index(folio) << PAGE_SHIFT);
+ folio_pos(folio) + folio_size(folio));
}
/* move offset to start of next folio in range */
--
2.40.1
From: "Ritesh Harjani (IBM)" <ritesh.list(a)gmail.com>
[ Upstream commit eee2d2e6ea5550118170dbd5bb1316ceb38455fb ]
folio_next_index() returns an unsigned long value which left shifted
by PAGE_SHIFT could possibly cause an overflow on 32-bit system. Instead
use folio_pos(folio) + folio_size(folio), which does this correctly.
Suggested-by: Matthew Wilcox <willy(a)infradead.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list(a)gmail.com>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/iomap/buffered-io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 063133ec77f49..5e5bffa384976 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -929,7 +929,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
* the end of this data range, not the end of the folio.
*/
*punch_start_byte = min_t(loff_t, end_byte,
- folio_next_index(folio) << PAGE_SHIFT);
+ folio_pos(folio) + folio_size(folio));
}
/* move offset to start of next folio in range */
--
2.40.1
commit 0bdf399 upstream.
This fix applies to all stable kernel versions 4.19+.
BPF programs that run on connect can rewrite the connect address. For
the connect system call this isn't a problem, because a copy of the address
is made when it is moved into kernel space. However, kernel_connect
simply passes through the address it is given, so the caller may observe
its address value unexpectedly change.
A practical example where this is problematic is where NFS is combined
with a system such as Cilium which implements BPF-based load balancing.
A common pattern in software-defined storage systems is to have an NFS
mount that connects to a persistent virtual IP which in turn maps to an
ephemeral server IP. This is usually done to achieve high availability:
if your server goes down you can quickly spin up a replacement and remap
the virtual IP to that endpoint. With BPF-based load balancing, mounts
will forget the virtual IP address when the address rewrite occurs
because a pointer to the only copy of that address is passed down the
stack. Server failover then breaks, because clients have forgotten the
virtual IP address. Reconnects fail and mounts remain broken. This patch
was tested by setting up a scenario like this and ensuring that NFS
reconnects worked after applying the patch.
Signed-off-by: Jordan Rife <jrife(a)google.com>
---
net/socket.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/net/socket.c b/net/socket.c
index ce70c01eb2f3e..db9d908198f21 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3468,7 +3468,11 @@ EXPORT_SYMBOL(kernel_accept);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
- return sock->ops->connect(sock, addr, addrlen, flags);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, flags);
}
EXPORT_SYMBOL(kernel_connect);
--
2.42.0.283.g2d96d420d3-goog
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
[ Upstream commit ccbe77f7e45dfb4420f7f531b650c00c6e9c7507 ]
Syzkaller reports a memory leak:
BUG: memory leak
unreferenced object 0xffff88810b279e00 (size 96):
comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff ..........'.....
08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ..'.............
backtrace:
[<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
[<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline]
[<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378
[<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593
[<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619
[<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897
[<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910
[<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline]
[<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline]
[<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline]
[<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856
[<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
[<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
[<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
autofs_wait_queue structs should be freed if their wait_ctr becomes zero.
Otherwise they will be lost.
In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new
waitqueue struct is allocated in autofs_wait(), its initial wait_ctr
equals 2. After that wait_event_killable() is interrupted (it returns
-ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not
satisfied. Actually, this condition can be satisfied when
autofs_wait_release() or autofs_catatonic_mode() is called and, what is
also important, wait_ctr is decremented in those places. Upon the exit of
autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process
begins: kill_sb calls autofs_catatonic_mode(), which should have freed the
waitqueues, but it only decrements its usage counter to zero which is not
a correct behaviour.
edit:imk
This description is of course not correct. The umount performed as a result
of an expire is a umount of a mount that has been automounted, it's not the
autofs mount itself. They happen independently, usually after everything
mounted within the autofs file system has been expired away. If everything
hasn't been expired away the automount daemon can still exit leaving mounts
in place. But expires done in both cases will result in a notification that
calls autofs_wait_release() with a result status. The problem case is the
summary execution of of the automount daemon. In this case any waiting
processes won't be woken up until either they are terminated or the mount
is umounted.
end edit: imk
So in catatonic mode we should free waitqueues which counter becomes zero.
edit: imk
Initially I was concerned that the calling of autofs_wait_release() and
autofs_catatonic_mode() was not mutually exclusive but that can't be the
case (obviously) because the queue entry (or entries) is removed from the
list when either of these two functions are called. Consequently the wait
entry will be freed by only one of these functions or by the woken process
in autofs_wait() depending on the order of the calls.
end edit: imk
Reported-by: syzbot+5e53f70e69ff0c0a1c0c(a)syzkaller.appspotmail.com
Suggested-by: Takeshi Misawa <jeliantsurux(a)gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov(a)ispras.ru>
Signed-off-by: Ian Kent <raven(a)themaw.net>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: autofs(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Message-Id: <169112719161.7590.6700123246297365841.stgit(a)donald.themaw.net>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/autofs4/waitq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 961a12dc6dc81..5863532675e3c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -42,8 +42,9 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
--
2.40.1
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
[ Upstream commit ccbe77f7e45dfb4420f7f531b650c00c6e9c7507 ]
Syzkaller reports a memory leak:
BUG: memory leak
unreferenced object 0xffff88810b279e00 (size 96):
comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff ..........'.....
08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ..'.............
backtrace:
[<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
[<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline]
[<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378
[<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593
[<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619
[<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897
[<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910
[<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline]
[<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline]
[<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline]
[<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856
[<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
[<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
[<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
autofs_wait_queue structs should be freed if their wait_ctr becomes zero.
Otherwise they will be lost.
In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new
waitqueue struct is allocated in autofs_wait(), its initial wait_ctr
equals 2. After that wait_event_killable() is interrupted (it returns
-ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not
satisfied. Actually, this condition can be satisfied when
autofs_wait_release() or autofs_catatonic_mode() is called and, what is
also important, wait_ctr is decremented in those places. Upon the exit of
autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process
begins: kill_sb calls autofs_catatonic_mode(), which should have freed the
waitqueues, but it only decrements its usage counter to zero which is not
a correct behaviour.
edit:imk
This description is of course not correct. The umount performed as a result
of an expire is a umount of a mount that has been automounted, it's not the
autofs mount itself. They happen independently, usually after everything
mounted within the autofs file system has been expired away. If everything
hasn't been expired away the automount daemon can still exit leaving mounts
in place. But expires done in both cases will result in a notification that
calls autofs_wait_release() with a result status. The problem case is the
summary execution of of the automount daemon. In this case any waiting
processes won't be woken up until either they are terminated or the mount
is umounted.
end edit: imk
So in catatonic mode we should free waitqueues which counter becomes zero.
edit: imk
Initially I was concerned that the calling of autofs_wait_release() and
autofs_catatonic_mode() was not mutually exclusive but that can't be the
case (obviously) because the queue entry (or entries) is removed from the
list when either of these two functions are called. Consequently the wait
entry will be freed by only one of these functions or by the woken process
in autofs_wait() depending on the order of the calls.
end edit: imk
Reported-by: syzbot+5e53f70e69ff0c0a1c0c(a)syzkaller.appspotmail.com
Suggested-by: Takeshi Misawa <jeliantsurux(a)gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov(a)ispras.ru>
Signed-off-by: Ian Kent <raven(a)themaw.net>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: autofs(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Message-Id: <169112719161.7590.6700123246297365841.stgit(a)donald.themaw.net>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/autofs/waitq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index f6385c6ef0a56..44ba0cd4ebc4f 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -35,8 +35,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
--
2.40.1
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
[ Upstream commit ccbe77f7e45dfb4420f7f531b650c00c6e9c7507 ]
Syzkaller reports a memory leak:
BUG: memory leak
unreferenced object 0xffff88810b279e00 (size 96):
comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff ..........'.....
08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ..'.............
backtrace:
[<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
[<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline]
[<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378
[<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593
[<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619
[<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897
[<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910
[<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline]
[<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline]
[<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline]
[<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856
[<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
[<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
[<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
autofs_wait_queue structs should be freed if their wait_ctr becomes zero.
Otherwise they will be lost.
In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new
waitqueue struct is allocated in autofs_wait(), its initial wait_ctr
equals 2. After that wait_event_killable() is interrupted (it returns
-ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not
satisfied. Actually, this condition can be satisfied when
autofs_wait_release() or autofs_catatonic_mode() is called and, what is
also important, wait_ctr is decremented in those places. Upon the exit of
autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process
begins: kill_sb calls autofs_catatonic_mode(), which should have freed the
waitqueues, but it only decrements its usage counter to zero which is not
a correct behaviour.
edit:imk
This description is of course not correct. The umount performed as a result
of an expire is a umount of a mount that has been automounted, it's not the
autofs mount itself. They happen independently, usually after everything
mounted within the autofs file system has been expired away. If everything
hasn't been expired away the automount daemon can still exit leaving mounts
in place. But expires done in both cases will result in a notification that
calls autofs_wait_release() with a result status. The problem case is the
summary execution of of the automount daemon. In this case any waiting
processes won't be woken up until either they are terminated or the mount
is umounted.
end edit: imk
So in catatonic mode we should free waitqueues which counter becomes zero.
edit: imk
Initially I was concerned that the calling of autofs_wait_release() and
autofs_catatonic_mode() was not mutually exclusive but that can't be the
case (obviously) because the queue entry (or entries) is removed from the
list when either of these two functions are called. Consequently the wait
entry will be freed by only one of these functions or by the woken process
in autofs_wait() depending on the order of the calls.
end edit: imk
Reported-by: syzbot+5e53f70e69ff0c0a1c0c(a)syzkaller.appspotmail.com
Suggested-by: Takeshi Misawa <jeliantsurux(a)gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov(a)ispras.ru>
Signed-off-by: Ian Kent <raven(a)themaw.net>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: autofs(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Message-Id: <169112719161.7590.6700123246297365841.stgit(a)donald.themaw.net>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/autofs/waitq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index b04c528b19d34..1230bdf329898 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
--
2.40.1
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
[ Upstream commit ccbe77f7e45dfb4420f7f531b650c00c6e9c7507 ]
Syzkaller reports a memory leak:
BUG: memory leak
unreferenced object 0xffff88810b279e00 (size 96):
comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff ..........'.....
08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ..'.............
backtrace:
[<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
[<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline]
[<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378
[<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593
[<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619
[<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897
[<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910
[<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline]
[<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline]
[<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline]
[<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856
[<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
[<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
[<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
autofs_wait_queue structs should be freed if their wait_ctr becomes zero.
Otherwise they will be lost.
In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new
waitqueue struct is allocated in autofs_wait(), its initial wait_ctr
equals 2. After that wait_event_killable() is interrupted (it returns
-ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not
satisfied. Actually, this condition can be satisfied when
autofs_wait_release() or autofs_catatonic_mode() is called and, what is
also important, wait_ctr is decremented in those places. Upon the exit of
autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process
begins: kill_sb calls autofs_catatonic_mode(), which should have freed the
waitqueues, but it only decrements its usage counter to zero which is not
a correct behaviour.
edit:imk
This description is of course not correct. The umount performed as a result
of an expire is a umount of a mount that has been automounted, it's not the
autofs mount itself. They happen independently, usually after everything
mounted within the autofs file system has been expired away. If everything
hasn't been expired away the automount daemon can still exit leaving mounts
in place. But expires done in both cases will result in a notification that
calls autofs_wait_release() with a result status. The problem case is the
summary execution of of the automount daemon. In this case any waiting
processes won't be woken up until either they are terminated or the mount
is umounted.
end edit: imk
So in catatonic mode we should free waitqueues which counter becomes zero.
edit: imk
Initially I was concerned that the calling of autofs_wait_release() and
autofs_catatonic_mode() was not mutually exclusive but that can't be the
case (obviously) because the queue entry (or entries) is removed from the
list when either of these two functions are called. Consequently the wait
entry will be freed by only one of these functions or by the woken process
in autofs_wait() depending on the order of the calls.
end edit: imk
Reported-by: syzbot+5e53f70e69ff0c0a1c0c(a)syzkaller.appspotmail.com
Suggested-by: Takeshi Misawa <jeliantsurux(a)gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov(a)ispras.ru>
Signed-off-by: Ian Kent <raven(a)themaw.net>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: autofs(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Message-Id: <169112719161.7590.6700123246297365841.stgit(a)donald.themaw.net>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/autofs/waitq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 5ced859dac539..dd479198310e8 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
--
2.40.1
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
[ Upstream commit ccbe77f7e45dfb4420f7f531b650c00c6e9c7507 ]
Syzkaller reports a memory leak:
BUG: memory leak
unreferenced object 0xffff88810b279e00 (size 96):
comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff ..........'.....
08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ..'.............
backtrace:
[<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
[<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline]
[<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378
[<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593
[<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619
[<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897
[<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910
[<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline]
[<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline]
[<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline]
[<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856
[<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
[<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
[<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
autofs_wait_queue structs should be freed if their wait_ctr becomes zero.
Otherwise they will be lost.
In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new
waitqueue struct is allocated in autofs_wait(), its initial wait_ctr
equals 2. After that wait_event_killable() is interrupted (it returns
-ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not
satisfied. Actually, this condition can be satisfied when
autofs_wait_release() or autofs_catatonic_mode() is called and, what is
also important, wait_ctr is decremented in those places. Upon the exit of
autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process
begins: kill_sb calls autofs_catatonic_mode(), which should have freed the
waitqueues, but it only decrements its usage counter to zero which is not
a correct behaviour.
edit:imk
This description is of course not correct. The umount performed as a result
of an expire is a umount of a mount that has been automounted, it's not the
autofs mount itself. They happen independently, usually after everything
mounted within the autofs file system has been expired away. If everything
hasn't been expired away the automount daemon can still exit leaving mounts
in place. But expires done in both cases will result in a notification that
calls autofs_wait_release() with a result status. The problem case is the
summary execution of of the automount daemon. In this case any waiting
processes won't be woken up until either they are terminated or the mount
is umounted.
end edit: imk
So in catatonic mode we should free waitqueues which counter becomes zero.
edit: imk
Initially I was concerned that the calling of autofs_wait_release() and
autofs_catatonic_mode() was not mutually exclusive but that can't be the
case (obviously) because the queue entry (or entries) is removed from the
list when either of these two functions are called. Consequently the wait
entry will be freed by only one of these functions or by the woken process
in autofs_wait() depending on the order of the calls.
end edit: imk
Reported-by: syzbot+5e53f70e69ff0c0a1c0c(a)syzkaller.appspotmail.com
Suggested-by: Takeshi Misawa <jeliantsurux(a)gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov(a)ispras.ru>
Signed-off-by: Ian Kent <raven(a)themaw.net>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: autofs(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Message-Id: <169112719161.7590.6700123246297365841.stgit(a)donald.themaw.net>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/autofs/waitq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 54c1f8b8b0757..efdc76732faed 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name - wq->offset);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
--
2.40.1
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 9876cfe8ec1cb3c88de31f4d58d57b0e7e22bcc4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090700-zippy-raven-214b@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
9876cfe8ec1c ("memfd: replace ratcheting feature from vm.memfd_noexec with hierarchy")
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
3efd33b75358 ("kernel: pid_namespace: remove unused set_memfd_noexec_scope()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9876cfe8ec1cb3c88de31f4d58d57b0e7e22bcc4 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Mon, 14 Aug 2023 18:41:00 +1000
Subject: [PATCH] memfd: replace ratcheting feature from vm.memfd_noexec with
hierarchy
This sysctl has the very unusual behaviour of not allowing any user (even
CAP_SYS_ADMIN) to reduce the restriction setting, meaning that if you were
to set this sysctl to a more restrictive option in the host pidns you
would need to reboot your machine in order to reset it.
The justification given in [1] is that this is a security feature and thus
it should not be possible to disable. Aside from the fact that we have
plenty of security-related sysctls that can be disabled after being
enabled (fs.protected_symlinks for instance), the protection provided by
the sysctl is to stop users from being able to create a binary and then
execute it. A user with CAP_SYS_ADMIN can trivially do this without
memfd_create(2):
% cat mount-memfd.c
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <linux/mount.h>
#define SHELLCODE "#!/bin/echo this file was executed from this totally private tmpfs:"
int main(void)
{
int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
assert(fsfd >= 0);
assert(!fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 2));
int dfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
assert(dfd >= 0);
int execfd = openat(dfd, "exe", O_CREAT | O_RDWR | O_CLOEXEC, 0782);
assert(execfd >= 0);
assert(write(execfd, SHELLCODE, strlen(SHELLCODE)) == strlen(SHELLCODE));
assert(!close(execfd));
char *execpath = NULL;
char *argv[] = { "bad-exe", NULL }, *envp[] = { NULL };
execfd = openat(dfd, "exe", O_PATH | O_CLOEXEC);
assert(execfd >= 0);
assert(asprintf(&execpath, "/proc/self/fd/%d", execfd) > 0);
assert(!execve(execpath, argv, envp));
}
% ./mount-memfd
this file was executed from this totally private tmpfs: /proc/self/fd/5
%
Given that it is possible for CAP_SYS_ADMIN users to create executable
binaries without memfd_create(2) and without touching the host filesystem
(not to mention the many other things a CAP_SYS_ADMIN process would be
able to do that would be equivalent or worse), it seems strange to cause a
fair amount of headache to admins when there doesn't appear to be an
actual security benefit to blocking this. There appear to be concerns
about confused-deputy-esque attacks[2] but a confused deputy that can
write to arbitrary sysctls is a bigger security issue than executable
memfds.
/* New API */
The primary requirement from the original author appears to be more based
on the need to be able to restrict an entire system in a hierarchical
manner[3], such that child namespaces cannot re-enable executable memfds.
So, implement that behaviour explicitly -- the vm.memfd_noexec scope is
evaluated up the pidns tree to &init_pid_ns and you have the most
restrictive value applied to you. The new lower limit you can set
vm.memfd_noexec is whatever limit applies to your parent.
Note that a pidns will inherit a copy of the parent pidns's effective
vm.memfd_noexec setting at unshare() time. This matches the existing
behaviour, and it also ensures that a pidns will never have its
vm.memfd_noexec setting *lowered* behind its back (but it will be raised
if the parent raises theirs).
/* Backwards Compatibility */
As the previous version of the sysctl didn't allow you to lower the
setting at all, there are no backwards compatibility issues with this
aspect of the change.
However it should be noted that now that the setting is completely
hierarchical. Previously, a cloned pidns would just copy the current
pidns setting, meaning that if the parent's vm.memfd_noexec was changed it
wouldn't propoagate to existing pid namespaces. Now, the restriction
applies recursively. This is a uAPI change, however:
* The sysctl is very new, having been merged in 6.3.
* Several aspects of the sysctl were broken up until this patchset and
the other patchset by Jeff Xu last month.
And thus it seems incredibly unlikely that any real users would run into
this issue. In the worst case, if this causes userspace isues we could
make it so that modifying the setting follows the hierarchical rules but
the restriction checking uses the cached copy.
[1]: https://lore.kernel.org/CABi2SkWnAgHK1i6iqSqPMYuNEhtHBkO8jUuCvmG3RmUB5TKHJw…
[2]: https://lore.kernel.org/CALmYWFs_dNCzw_pW1yRAo4bGCPEtykroEQaowNULp7svwMLjOg…
[3]: https://lore.kernel.org/CALmYWFuahdUF7cT4cm7_TGLqPanuHXJ-hVSfZt7vpTnc18DPrw…
Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-4-7ff9e3e1…
Fixes: 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Dominique Martinet <asmadeus(a)codewreck.org>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 53974d79d98e..f9f9931e02d6 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -39,7 +39,6 @@ struct pid_namespace {
int reboot; /* group exit code if this pidns was rebooted */
struct ns_common ns;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
- /* sysctl for vm.memfd_noexec */
int memfd_noexec_scope;
#endif
} __randomize_layout;
@@ -56,6 +55,23 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ int scope = MEMFD_NOEXEC_SCOPE_EXEC;
+
+ for (; ns; ns = ns->parent)
+ scope = max(scope, READ_ONCE(ns->memfd_noexec_scope));
+
+ return scope;
+}
+#else
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ return 0;
+}
+#endif
+
extern struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *ns);
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
@@ -70,6 +86,11 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ return 0;
+}
+
static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *ns)
{
diff --git a/kernel/pid.c b/kernel/pid.c
index 6a1d23a11026..fee14a4486a3 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -83,6 +83,9 @@ struct pid_namespace init_pid_ns = {
#ifdef CONFIG_PID_NS
.ns.ops = &pidns_operations,
#endif
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+ .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
+#endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 0bf44afe04dd..619972c78774 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -110,9 +110,9 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
ns->pid_allocated = PIDNS_ADDING;
-
- initialize_memfd_noexec_scope(ns);
-
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+ ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
+#endif
return ns;
out_free_idr:
diff --git a/kernel/pid_sysctl.h b/kernel/pid_sysctl.h
index b26e027fc9cd..2ee41a3a1dfd 100644
--- a/kernel/pid_sysctl.h
+++ b/kernel/pid_sysctl.h
@@ -5,33 +5,30 @@
#include <linux/pid_namespace.h>
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
-static inline void initialize_memfd_noexec_scope(struct pid_namespace *ns)
-{
- ns->memfd_noexec_scope =
- task_active_pid_ns(current)->memfd_noexec_scope;
-}
-
static int pid_mfd_noexec_dointvec_minmax(struct ctl_table *table,
int write, void *buf, size_t *lenp, loff_t *ppos)
{
struct pid_namespace *ns = task_active_pid_ns(current);
struct ctl_table table_copy;
+ int err, scope, parent_scope;
if (write && !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
table_copy = *table;
- if (ns != &init_pid_ns)
- table_copy.data = &ns->memfd_noexec_scope;
- /*
- * set minimum to current value, the effect is only bigger
- * value is accepted.
- */
- if (*(int *)table_copy.data > *(int *)table_copy.extra1)
- table_copy.extra1 = table_copy.data;
+ /* You cannot set a lower enforcement value than your parent. */
+ parent_scope = pidns_memfd_noexec_scope(ns->parent);
+ /* Equivalent to pidns_memfd_noexec_scope(ns). */
+ scope = max(READ_ONCE(ns->memfd_noexec_scope), parent_scope);
+
+ table_copy.data = &scope;
+ table_copy.extra1 = &parent_scope;
- return proc_dointvec_minmax(&table_copy, write, buf, lenp, ppos);
+ err = proc_dointvec_minmax(&table_copy, write, buf, lenp, ppos);
+ if (!err && write)
+ WRITE_ONCE(ns->memfd_noexec_scope, scope);
+ return err;
}
static struct ctl_table pid_ns_ctl_table_vm[] = {
@@ -51,7 +48,6 @@ static inline void register_pid_ns_sysctl_table_vm(void)
register_sysctl("vm", pid_ns_ctl_table_vm);
}
#else
-static inline void initialize_memfd_noexec_scope(struct pid_namespace *ns) {}
static inline void register_pid_ns_sysctl_table_vm(void) {}
#endif
diff --git a/mm/memfd.c b/mm/memfd.c
index aa46521057ab..1cad1904fc26 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -271,7 +271,8 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
static int check_sysctl_memfd_noexec(unsigned int *flags)
{
#ifdef CONFIG_SYSCTL
- int sysctl = task_active_pid_ns(current)->memfd_noexec_scope;
+ struct pid_namespace *ns = task_active_pid_ns(current);
+ int sysctl = pidns_memfd_noexec_scope(ns);
if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x 9876cfe8ec1cb3c88de31f4d58d57b0e7e22bcc4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090753-unashamed-carnival-1476@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
9876cfe8ec1c ("memfd: replace ratcheting feature from vm.memfd_noexec with hierarchy")
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9876cfe8ec1cb3c88de31f4d58d57b0e7e22bcc4 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Mon, 14 Aug 2023 18:41:00 +1000
Subject: [PATCH] memfd: replace ratcheting feature from vm.memfd_noexec with
hierarchy
This sysctl has the very unusual behaviour of not allowing any user (even
CAP_SYS_ADMIN) to reduce the restriction setting, meaning that if you were
to set this sysctl to a more restrictive option in the host pidns you
would need to reboot your machine in order to reset it.
The justification given in [1] is that this is a security feature and thus
it should not be possible to disable. Aside from the fact that we have
plenty of security-related sysctls that can be disabled after being
enabled (fs.protected_symlinks for instance), the protection provided by
the sysctl is to stop users from being able to create a binary and then
execute it. A user with CAP_SYS_ADMIN can trivially do this without
memfd_create(2):
% cat mount-memfd.c
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <linux/mount.h>
#define SHELLCODE "#!/bin/echo this file was executed from this totally private tmpfs:"
int main(void)
{
int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
assert(fsfd >= 0);
assert(!fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 2));
int dfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
assert(dfd >= 0);
int execfd = openat(dfd, "exe", O_CREAT | O_RDWR | O_CLOEXEC, 0782);
assert(execfd >= 0);
assert(write(execfd, SHELLCODE, strlen(SHELLCODE)) == strlen(SHELLCODE));
assert(!close(execfd));
char *execpath = NULL;
char *argv[] = { "bad-exe", NULL }, *envp[] = { NULL };
execfd = openat(dfd, "exe", O_PATH | O_CLOEXEC);
assert(execfd >= 0);
assert(asprintf(&execpath, "/proc/self/fd/%d", execfd) > 0);
assert(!execve(execpath, argv, envp));
}
% ./mount-memfd
this file was executed from this totally private tmpfs: /proc/self/fd/5
%
Given that it is possible for CAP_SYS_ADMIN users to create executable
binaries without memfd_create(2) and without touching the host filesystem
(not to mention the many other things a CAP_SYS_ADMIN process would be
able to do that would be equivalent or worse), it seems strange to cause a
fair amount of headache to admins when there doesn't appear to be an
actual security benefit to blocking this. There appear to be concerns
about confused-deputy-esque attacks[2] but a confused deputy that can
write to arbitrary sysctls is a bigger security issue than executable
memfds.
/* New API */
The primary requirement from the original author appears to be more based
on the need to be able to restrict an entire system in a hierarchical
manner[3], such that child namespaces cannot re-enable executable memfds.
So, implement that behaviour explicitly -- the vm.memfd_noexec scope is
evaluated up the pidns tree to &init_pid_ns and you have the most
restrictive value applied to you. The new lower limit you can set
vm.memfd_noexec is whatever limit applies to your parent.
Note that a pidns will inherit a copy of the parent pidns's effective
vm.memfd_noexec setting at unshare() time. This matches the existing
behaviour, and it also ensures that a pidns will never have its
vm.memfd_noexec setting *lowered* behind its back (but it will be raised
if the parent raises theirs).
/* Backwards Compatibility */
As the previous version of the sysctl didn't allow you to lower the
setting at all, there are no backwards compatibility issues with this
aspect of the change.
However it should be noted that now that the setting is completely
hierarchical. Previously, a cloned pidns would just copy the current
pidns setting, meaning that if the parent's vm.memfd_noexec was changed it
wouldn't propoagate to existing pid namespaces. Now, the restriction
applies recursively. This is a uAPI change, however:
* The sysctl is very new, having been merged in 6.3.
* Several aspects of the sysctl were broken up until this patchset and
the other patchset by Jeff Xu last month.
And thus it seems incredibly unlikely that any real users would run into
this issue. In the worst case, if this causes userspace isues we could
make it so that modifying the setting follows the hierarchical rules but
the restriction checking uses the cached copy.
[1]: https://lore.kernel.org/CABi2SkWnAgHK1i6iqSqPMYuNEhtHBkO8jUuCvmG3RmUB5TKHJw…
[2]: https://lore.kernel.org/CALmYWFs_dNCzw_pW1yRAo4bGCPEtykroEQaowNULp7svwMLjOg…
[3]: https://lore.kernel.org/CALmYWFuahdUF7cT4cm7_TGLqPanuHXJ-hVSfZt7vpTnc18DPrw…
Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-4-7ff9e3e1…
Fixes: 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Dominique Martinet <asmadeus(a)codewreck.org>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 53974d79d98e..f9f9931e02d6 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -39,7 +39,6 @@ struct pid_namespace {
int reboot; /* group exit code if this pidns was rebooted */
struct ns_common ns;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
- /* sysctl for vm.memfd_noexec */
int memfd_noexec_scope;
#endif
} __randomize_layout;
@@ -56,6 +55,23 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ int scope = MEMFD_NOEXEC_SCOPE_EXEC;
+
+ for (; ns; ns = ns->parent)
+ scope = max(scope, READ_ONCE(ns->memfd_noexec_scope));
+
+ return scope;
+}
+#else
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ return 0;
+}
+#endif
+
extern struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *ns);
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
@@ -70,6 +86,11 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ return 0;
+}
+
static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *ns)
{
diff --git a/kernel/pid.c b/kernel/pid.c
index 6a1d23a11026..fee14a4486a3 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -83,6 +83,9 @@ struct pid_namespace init_pid_ns = {
#ifdef CONFIG_PID_NS
.ns.ops = &pidns_operations,
#endif
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+ .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
+#endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 0bf44afe04dd..619972c78774 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -110,9 +110,9 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
ns->pid_allocated = PIDNS_ADDING;
-
- initialize_memfd_noexec_scope(ns);
-
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+ ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
+#endif
return ns;
out_free_idr:
diff --git a/kernel/pid_sysctl.h b/kernel/pid_sysctl.h
index b26e027fc9cd..2ee41a3a1dfd 100644
--- a/kernel/pid_sysctl.h
+++ b/kernel/pid_sysctl.h
@@ -5,33 +5,30 @@
#include <linux/pid_namespace.h>
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
-static inline void initialize_memfd_noexec_scope(struct pid_namespace *ns)
-{
- ns->memfd_noexec_scope =
- task_active_pid_ns(current)->memfd_noexec_scope;
-}
-
static int pid_mfd_noexec_dointvec_minmax(struct ctl_table *table,
int write, void *buf, size_t *lenp, loff_t *ppos)
{
struct pid_namespace *ns = task_active_pid_ns(current);
struct ctl_table table_copy;
+ int err, scope, parent_scope;
if (write && !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
table_copy = *table;
- if (ns != &init_pid_ns)
- table_copy.data = &ns->memfd_noexec_scope;
- /*
- * set minimum to current value, the effect is only bigger
- * value is accepted.
- */
- if (*(int *)table_copy.data > *(int *)table_copy.extra1)
- table_copy.extra1 = table_copy.data;
+ /* You cannot set a lower enforcement value than your parent. */
+ parent_scope = pidns_memfd_noexec_scope(ns->parent);
+ /* Equivalent to pidns_memfd_noexec_scope(ns). */
+ scope = max(READ_ONCE(ns->memfd_noexec_scope), parent_scope);
+
+ table_copy.data = &scope;
+ table_copy.extra1 = &parent_scope;
- return proc_dointvec_minmax(&table_copy, write, buf, lenp, ppos);
+ err = proc_dointvec_minmax(&table_copy, write, buf, lenp, ppos);
+ if (!err && write)
+ WRITE_ONCE(ns->memfd_noexec_scope, scope);
+ return err;
}
static struct ctl_table pid_ns_ctl_table_vm[] = {
@@ -51,7 +48,6 @@ static inline void register_pid_ns_sysctl_table_vm(void)
register_sysctl("vm", pid_ns_ctl_table_vm);
}
#else
-static inline void initialize_memfd_noexec_scope(struct pid_namespace *ns) {}
static inline void register_pid_ns_sysctl_table_vm(void) {}
#endif
diff --git a/mm/memfd.c b/mm/memfd.c
index aa46521057ab..1cad1904fc26 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -271,7 +271,8 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
static int check_sysctl_memfd_noexec(unsigned int *flags)
{
#ifdef CONFIG_SYSCTL
- int sysctl = task_active_pid_ns(current)->memfd_noexec_scope;
+ struct pid_namespace *ns = task_active_pid_ns(current);
+ int sysctl = pidns_memfd_noexec_scope(ns);
if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 202e14222fadb246dfdf182e67de1518e86a1e20
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090747-scant-detest-aeed@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 202e14222fadb246dfdf182e67de1518e86a1e20 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Mon, 14 Aug 2023 18:40:58 +1000
Subject: [PATCH] memfd: do not -EACCES old memfd_create() users with
vm.memfd_noexec=2
Given the difficulty of auditing all of userspace to figure out whether
every memfd_create() user has switched to passing MFD_EXEC and
MFD_NOEXEC_SEAL flags, it seems far less distruptive to make it possible
for older programs that don't make use of executable memfds to run under
vm.memfd_noexec=2. Otherwise, a small dependency change can result in
spurious errors. For programs that don't use executable memfds, passing
MFD_NOEXEC_SEAL is functionally a no-op and thus having the same
In addition, every failure under vm.memfd_noexec=2 needs to print to the
kernel log so that userspace can figure out where the error came from.
The concerns about pr_warn_ratelimited() spam that caused the switch to
pr_warn_once()[1,2] do not apply to the vm.memfd_noexec=2 case.
This is a user-visible API change, but as it allows programs to do
something that would be blocked before, and the sysctl itself was broken
and recently released, it seems unlikely this will cause any issues.
[1]: https://lore.kernel.org/Y5yS8wCnuYGLHMj4@x1n/
[2]: https://lore.kernel.org/202212161233.85C9783FB@keescook/
Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-2-7ff9e3e1…
Fixes: 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Dominique Martinet <asmadeus(a)codewreck.org>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c758809d5bcf..53974d79d98e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -17,18 +17,10 @@
struct fs_pin;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
-/*
- * sysctl for vm.memfd_noexec
- * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
- * acts like MFD_EXEC was set.
- * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
- * acts like MFD_NOEXEC_SEAL was set.
- * 2: memfd_create() without MFD_NOEXEC_SEAL will be
- * rejected.
- */
-#define MEMFD_NOEXEC_SCOPE_EXEC 0
-#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1
-#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2
+/* modes for vm.memfd_noexec sysctl */
+#define MEMFD_NOEXEC_SCOPE_EXEC 0 /* MFD_EXEC implied if unset */
+#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 /* MFD_NOEXEC_SEAL implied if unset */
+#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 /* same as 1, except MFD_EXEC rejected */
#endif
struct pid_namespace {
diff --git a/mm/memfd.c b/mm/memfd.c
index 0bdbd2335af7..d65485c762de 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -271,30 +271,22 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
static int check_sysctl_memfd_noexec(unsigned int *flags)
{
#ifdef CONFIG_SYSCTL
- char comm[TASK_COMM_LEN];
- int sysctl = MEMFD_NOEXEC_SCOPE_EXEC;
- struct pid_namespace *ns;
-
- ns = task_active_pid_ns(current);
- if (ns)
- sysctl = ns->memfd_noexec_scope;
+ int sysctl = task_active_pid_ns(current)->memfd_noexec_scope;
if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- if (sysctl == MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
+ if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
*flags |= MFD_NOEXEC_SEAL;
else
*flags |= MFD_EXEC;
}
- if (*flags & MFD_EXEC && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
- pr_warn_once(
- "memfd_create(): MFD_NOEXEC_SEAL is enforced, pid=%d '%s'\n",
- task_pid_nr(current), get_task_comm(comm, current));
-
+ if (!(*flags & MFD_NOEXEC_SEAL) && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
+ pr_err_ratelimited(
+ "%s[%d]: memfd_create() requires MFD_NOEXEC_SEAL with vm.memfd_noexec=%d\n",
+ current->comm, task_pid_nr(current), sysctl);
return -EACCES;
}
#endif
-
return 0;
}
@@ -302,7 +294,6 @@ SYSCALL_DEFINE2(memfd_create,
const char __user *, uname,
unsigned int, flags)
{
- char comm[TASK_COMM_LEN];
unsigned int *file_seals;
struct file *file;
int fd, error;
@@ -325,12 +316,13 @@ SYSCALL_DEFINE2(memfd_create,
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
pr_warn_once(
- "memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL, pid=%d '%s'\n",
- task_pid_nr(current), get_task_comm(comm, current));
+ "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
+ current->comm, task_pid_nr(current));
}
- if (check_sysctl_memfd_noexec(&flags) < 0)
- return -EACCES;
+ error = check_sysctl_memfd_noexec(&flags);
+ if (error < 0)
+ return error;
/* length includes terminating zero */
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 8eb49204f9ea..8b7390ad81d1 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1145,11 +1145,23 @@ static void test_sysctl_child(void)
printf("%s sysctl 2\n", memfd_str);
sysctl_assert_write("2");
- mfd_fail_new("kern_memfd_sysctl_2",
- MFD_CLOEXEC | MFD_ALLOW_SEALING);
- mfd_fail_new("kern_memfd_sysctl_2_MFD_EXEC",
- MFD_CLOEXEC | MFD_EXEC);
- fd = mfd_assert_new("", 0, MFD_NOEXEC_SEAL);
+ mfd_fail_new("kern_memfd_sysctl_2_exec",
+ MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec_seal",
+ mfd_def_size,
+ MFD_NOEXEC_SEAL | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
close(fd);
sysctl_fail_write("0");
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x 202e14222fadb246dfdf182e67de1518e86a1e20
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090745-depict-kinswoman-9083@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 202e14222fadb246dfdf182e67de1518e86a1e20 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Mon, 14 Aug 2023 18:40:58 +1000
Subject: [PATCH] memfd: do not -EACCES old memfd_create() users with
vm.memfd_noexec=2
Given the difficulty of auditing all of userspace to figure out whether
every memfd_create() user has switched to passing MFD_EXEC and
MFD_NOEXEC_SEAL flags, it seems far less distruptive to make it possible
for older programs that don't make use of executable memfds to run under
vm.memfd_noexec=2. Otherwise, a small dependency change can result in
spurious errors. For programs that don't use executable memfds, passing
MFD_NOEXEC_SEAL is functionally a no-op and thus having the same
In addition, every failure under vm.memfd_noexec=2 needs to print to the
kernel log so that userspace can figure out where the error came from.
The concerns about pr_warn_ratelimited() spam that caused the switch to
pr_warn_once()[1,2] do not apply to the vm.memfd_noexec=2 case.
This is a user-visible API change, but as it allows programs to do
something that would be blocked before, and the sysctl itself was broken
and recently released, it seems unlikely this will cause any issues.
[1]: https://lore.kernel.org/Y5yS8wCnuYGLHMj4@x1n/
[2]: https://lore.kernel.org/202212161233.85C9783FB@keescook/
Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-2-7ff9e3e1…
Fixes: 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Dominique Martinet <asmadeus(a)codewreck.org>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c758809d5bcf..53974d79d98e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -17,18 +17,10 @@
struct fs_pin;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
-/*
- * sysctl for vm.memfd_noexec
- * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
- * acts like MFD_EXEC was set.
- * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
- * acts like MFD_NOEXEC_SEAL was set.
- * 2: memfd_create() without MFD_NOEXEC_SEAL will be
- * rejected.
- */
-#define MEMFD_NOEXEC_SCOPE_EXEC 0
-#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1
-#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2
+/* modes for vm.memfd_noexec sysctl */
+#define MEMFD_NOEXEC_SCOPE_EXEC 0 /* MFD_EXEC implied if unset */
+#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 /* MFD_NOEXEC_SEAL implied if unset */
+#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 /* same as 1, except MFD_EXEC rejected */
#endif
struct pid_namespace {
diff --git a/mm/memfd.c b/mm/memfd.c
index 0bdbd2335af7..d65485c762de 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -271,30 +271,22 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
static int check_sysctl_memfd_noexec(unsigned int *flags)
{
#ifdef CONFIG_SYSCTL
- char comm[TASK_COMM_LEN];
- int sysctl = MEMFD_NOEXEC_SCOPE_EXEC;
- struct pid_namespace *ns;
-
- ns = task_active_pid_ns(current);
- if (ns)
- sysctl = ns->memfd_noexec_scope;
+ int sysctl = task_active_pid_ns(current)->memfd_noexec_scope;
if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- if (sysctl == MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
+ if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
*flags |= MFD_NOEXEC_SEAL;
else
*flags |= MFD_EXEC;
}
- if (*flags & MFD_EXEC && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
- pr_warn_once(
- "memfd_create(): MFD_NOEXEC_SEAL is enforced, pid=%d '%s'\n",
- task_pid_nr(current), get_task_comm(comm, current));
-
+ if (!(*flags & MFD_NOEXEC_SEAL) && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
+ pr_err_ratelimited(
+ "%s[%d]: memfd_create() requires MFD_NOEXEC_SEAL with vm.memfd_noexec=%d\n",
+ current->comm, task_pid_nr(current), sysctl);
return -EACCES;
}
#endif
-
return 0;
}
@@ -302,7 +294,6 @@ SYSCALL_DEFINE2(memfd_create,
const char __user *, uname,
unsigned int, flags)
{
- char comm[TASK_COMM_LEN];
unsigned int *file_seals;
struct file *file;
int fd, error;
@@ -325,12 +316,13 @@ SYSCALL_DEFINE2(memfd_create,
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
pr_warn_once(
- "memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL, pid=%d '%s'\n",
- task_pid_nr(current), get_task_comm(comm, current));
+ "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
+ current->comm, task_pid_nr(current));
}
- if (check_sysctl_memfd_noexec(&flags) < 0)
- return -EACCES;
+ error = check_sysctl_memfd_noexec(&flags);
+ if (error < 0)
+ return error;
/* length includes terminating zero */
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 8eb49204f9ea..8b7390ad81d1 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1145,11 +1145,23 @@ static void test_sysctl_child(void)
printf("%s sysctl 2\n", memfd_str);
sysctl_assert_write("2");
- mfd_fail_new("kern_memfd_sysctl_2",
- MFD_CLOEXEC | MFD_ALLOW_SEALING);
- mfd_fail_new("kern_memfd_sysctl_2_MFD_EXEC",
- MFD_CLOEXEC | MFD_EXEC);
- fd = mfd_assert_new("", 0, MFD_NOEXEC_SEAL);
+ mfd_fail_new("kern_memfd_sysctl_2_exec",
+ MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec_seal",
+ mfd_def_size,
+ MFD_NOEXEC_SEAL | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
close(fd);
sysctl_fail_write("0");
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 434ed3350f57c03a9654fe0619755cc137a58935
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090742-velocity-perjury-fa80@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 434ed3350f57c03a9654fe0619755cc137a58935 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Mon, 14 Aug 2023 18:40:59 +1000
Subject: [PATCH] memfd: improve userspace warnings for missing exec-related
flags
In order to incentivise userspace to switch to passing MFD_EXEC and
MFD_NOEXEC_SEAL, we need to provide a warning on each attempt to call
memfd_create() without the new flags. pr_warn_once() is not useful
because on most systems the one warning is burned up during the boot
process (on my system, systemd does this within the first second of boot)
and thus userspace will in practice never see the warnings to push them to
switch to the new flags.
The original patchset[1] used pr_warn_ratelimited(), however there were
concerns about the degree of spam in the kernel log[2,3]. The resulting
inability to detect every case was flagged as an issue at the time[4].
While we could come up with an alternative rate-limiting scheme such as
only outputting the message if vm.memfd_noexec has been modified, or only
outputting the message once for a given task, these alternatives have
downsides that don't make sense given how low-stakes a single kernel
warning message is. Switching to pr_info_ratelimited() instead should be
fine -- it's possible some monitoring tool will be unhappy with a stream
of warning-level messages but there's already plenty of info-level message
spam in dmesg.
[1]: https://lore.kernel.org/20221215001205.51969-4-jeffxu@google.com/
[2]: https://lore.kernel.org/202212161233.85C9783FB@keescook/
[3]: https://lore.kernel.org/Y5yS8wCnuYGLHMj4@x1n/
[4]: https://lore.kernel.org/f185bb42-b29c-977e-312e-3349eea15383@linuxfoundatio…
Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-3-7ff9e3e1…
Fixes: 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Dominique Martinet <asmadeus(a)codewreck.org>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/memfd.c b/mm/memfd.c
index d65485c762de..aa46521057ab 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -315,7 +315,7 @@ SYSCALL_DEFINE2(memfd_create,
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_warn_once(
+ pr_info_ratelimited(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x 434ed3350f57c03a9654fe0619755cc137a58935
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090740-subfloor-legal-0e0a@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 434ed3350f57c03a9654fe0619755cc137a58935 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar(a)cyphar.com>
Date: Mon, 14 Aug 2023 18:40:59 +1000
Subject: [PATCH] memfd: improve userspace warnings for missing exec-related
flags
In order to incentivise userspace to switch to passing MFD_EXEC and
MFD_NOEXEC_SEAL, we need to provide a warning on each attempt to call
memfd_create() without the new flags. pr_warn_once() is not useful
because on most systems the one warning is burned up during the boot
process (on my system, systemd does this within the first second of boot)
and thus userspace will in practice never see the warnings to push them to
switch to the new flags.
The original patchset[1] used pr_warn_ratelimited(), however there were
concerns about the degree of spam in the kernel log[2,3]. The resulting
inability to detect every case was flagged as an issue at the time[4].
While we could come up with an alternative rate-limiting scheme such as
only outputting the message if vm.memfd_noexec has been modified, or only
outputting the message once for a given task, these alternatives have
downsides that don't make sense given how low-stakes a single kernel
warning message is. Switching to pr_info_ratelimited() instead should be
fine -- it's possible some monitoring tool will be unhappy with a stream
of warning-level messages but there's already plenty of info-level message
spam in dmesg.
[1]: https://lore.kernel.org/20221215001205.51969-4-jeffxu@google.com/
[2]: https://lore.kernel.org/202212161233.85C9783FB@keescook/
[3]: https://lore.kernel.org/Y5yS8wCnuYGLHMj4@x1n/
[4]: https://lore.kernel.org/f185bb42-b29c-977e-312e-3349eea15383@linuxfoundatio…
Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-3-7ff9e3e1…
Fixes: 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Dominique Martinet <asmadeus(a)codewreck.org>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/memfd.c b/mm/memfd.c
index d65485c762de..aa46521057ab 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -315,7 +315,7 @@ SYSCALL_DEFINE2(memfd_create,
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_warn_once(
+ pr_info_ratelimited(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 2562d67b1bdf91c7395b0225d60fdeb26b4bc5a0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090735-lyrically-fabulous-bbb1@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
2562d67b1bdf ("revert "memfd: improve userspace warnings for missing exec-related flags".")
434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2562d67b1bdf91c7395b0225d60fdeb26b4bc5a0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm(a)linux-foundation.org>
Date: Sat, 2 Sep 2023 15:59:31 -0700
Subject: [PATCH] revert "memfd: improve userspace warnings for missing
exec-related flags".
This warning is telling userspace developers to pass MFD_EXEC and
MFD_NOEXEC_SEAL to memfd_create(). Commit 434ed3350f57 ("memfd: improve
userspace warnings for missing exec-related flags") made the warning more
frequent and visible in the hope that this would accelerate the fixing of
errant userspace.
But the overall effect is to generate far too much dmesg noise.
Fixes: 434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
Reported-by: Damian Tometzki <dtometzki(a)fedoraproject.org>
Closes: https://lkml.kernel.org/r/ZPFzCSIgZ4QuHsSC@fedora.fritz.box
Cc: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/memfd.c b/mm/memfd.c
index 1cad1904fc26..2dba2cb6f0d0 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create,
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_info_ratelimited(
+ pr_warn_once(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x 2562d67b1bdf91c7395b0225d60fdeb26b4bc5a0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023090734-sudoku-catalyze-ef01@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
2562d67b1bdf ("revert "memfd: improve userspace warnings for missing exec-related flags".")
434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
202e14222fad ("memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2")
badbbcd76545 ("selftests/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
72de25913022 ("mm/memfd: sysctl: fix MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2562d67b1bdf91c7395b0225d60fdeb26b4bc5a0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm(a)linux-foundation.org>
Date: Sat, 2 Sep 2023 15:59:31 -0700
Subject: [PATCH] revert "memfd: improve userspace warnings for missing
exec-related flags".
This warning is telling userspace developers to pass MFD_EXEC and
MFD_NOEXEC_SEAL to memfd_create(). Commit 434ed3350f57 ("memfd: improve
userspace warnings for missing exec-related flags") made the warning more
frequent and visible in the hope that this would accelerate the fixing of
errant userspace.
But the overall effect is to generate far too much dmesg noise.
Fixes: 434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
Reported-by: Damian Tometzki <dtometzki(a)fedoraproject.org>
Closes: https://lkml.kernel.org/r/ZPFzCSIgZ4QuHsSC@fedora.fritz.box
Cc: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/memfd.c b/mm/memfd.c
index 1cad1904fc26..2dba2cb6f0d0 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create,
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_info_ratelimited(
+ pr_warn_once(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
Tegra audio graph card has many DAI links which connects internal
AHUB modules and external audio codecs. Since these are DPCM links,
hw_params() call in the machine driver happens for each connected
BE link and PLLA is updated every time. This is not really needed
for all links as only I/O link DAIs derive respective clocks from
PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock
updates to DAIs over I/O links.
This found to be fixing a DMIC clock discrepancy which is suspected
to happen because of back to back quick PLLA and PLLA_OUT0 rate
updates. This was observed on Jetson TX2 platform where DMIC clock
ended up with unexpected value.
Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sameer Pujar <spujar(a)nvidia.com>
---
sound/soc/tegra/tegra_audio_graph_card.c | 30 ++++++++++++++----------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c
index 1f2c5018bf5a..4737e776d383 100644
--- a/sound/soc/tegra/tegra_audio_graph_card.c
+++ b/sound/soc/tegra/tegra_audio_graph_card.c
@@ -10,6 +10,7 @@
#include <linux/platform_device.h>
#include <sound/graph_card.h>
#include <sound/pcm_params.h>
+#include <sound/soc-dai.h>
#define MAX_PLLA_OUT0_DIV 128
@@ -44,6 +45,21 @@ struct tegra_audio_cdata {
unsigned int plla_out0_rates[NUM_RATE_TYPE];
};
+static bool need_clk_update(struct snd_soc_dai *dai)
+{
+ if (snd_soc_dai_is_dummy(dai) ||
+ !dai->driver->ops ||
+ !dai->driver->name)
+ return false;
+
+ if (strstr(dai->driver->name, "I2S") ||
+ strstr(dai->driver->name, "DMIC") ||
+ strstr(dai->driver->name, "DSPK"))
+ return true;
+
+ return false;
+}
+
/* Setup PLL clock as per the given sample rate */
static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params)
@@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream,
struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
int err;
- /*
- * This gets called for each DAI link (FE or BE) when DPCM is used.
- * We may not want to update PLLA rate for each call. So PLLA update
- * must be restricted to external I/O links (I2S, DMIC or DSPK) since
- * they actually depend on it. I/O modules update their clocks in
- * hw_param() of their respective component driver and PLLA rate
- * update here helps them to derive appropriate rates.
- *
- * TODO: When more HW accelerators get added (like sample rate
- * converter, volume gain controller etc., which don't really
- * depend on PLLA) we need a better way to filter here.
- */
- if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) {
+ if (need_clk_update(cpu_dai)) {
err = tegra_audio_graph_update_pll(substream, params);
if (err)
return err;
--
2.17.1
From: Vladislav Efanov <VEfanov(a)ispras.ru>
commit 1e0d4adf17e7ef03281d7b16555e7c1508c8ed2d upstream
Bits, which are related to Bitmap Descriptor logical blocks,
are not reset when buffer headers are allocated for them. As the
result, these logical blocks can be treated as free and
be used for other blocks.This can cause usage of one buffer header
for several types of data. UDF issues WARNING in this situation:
WARNING: CPU: 0 PID: 2703 at fs/udf/inode.c:2014
__udf_add_aext+0x685/0x7d0 fs/udf/inode.c:2014
RIP: 0010:__udf_add_aext+0x685/0x7d0 fs/udf/inode.c:2014
Call Trace:
udf_setup_indirect_aext+0x573/0x880 fs/udf/inode.c:1980
udf_add_aext+0x208/0x2e0 fs/udf/inode.c:2067
udf_insert_aext fs/udf/inode.c:2233 [inline]
udf_update_extents fs/udf/inode.c:1181 [inline]
inode_getblk+0x1981/0x3b70 fs/udf/inode.c:885
Found by Linux Verification Center (linuxtesting.org) with syzkaller.
[JK: Somewhat cleaned up the boundary checks]
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Vladislav Efanov <VEfanov(a)ispras.ru>
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
Syzkaller reports this problem in 5.10 stable release. The problem has
been fixed by the following patch which can be cleanly applied to the
5.10/5.15/6.1 branches.
fs/udf/balloc.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 8e597db4d971..ef50fd263315 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -36,18 +36,41 @@ static int read_block_bitmap(struct super_block *sb,
unsigned long bitmap_nr)
{
struct buffer_head *bh = NULL;
- int retval = 0;
+ int i;
+ int max_bits, off, count;
struct kernel_lb_addr loc;
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
+ bitmap->s_block_bitmap[bitmap_nr] = bh;
if (!bh)
- retval = -EIO;
+ return -EIO;
- bitmap->s_block_bitmap[bitmap_nr] = bh;
- return retval;
+ /* Check consistency of Space Bitmap buffer. */
+ max_bits = sb->s_blocksize * 8;
+ if (!bitmap_nr) {
+ off = sizeof(struct spaceBitmapDesc) << 3;
+ count = min(max_bits - off, bitmap->s_nr_groups);
+ } else {
+ /*
+ * Rough check if bitmap number is too big to have any bitmap
+ * blocks reserved.
+ */
+ if (bitmap_nr >
+ (bitmap->s_nr_groups >> (sb->s_blocksize_bits + 3)) + 2)
+ return 0;
+ off = 0;
+ count = bitmap->s_nr_groups - bitmap_nr * max_bits +
+ (sizeof(struct spaceBitmapDesc) << 3);
+ count = min(count, max_bits);
+ }
+
+ for (i = 0; i < count; i++)
+ if (udf_test_bit(i + off, bh->b_data))
+ return -EFSCORRUPTED;
+ return 0;
}
static int __load_block_bitmap(struct super_block *sb,
--
2.34.1
Hi,
Could you please apply commit b51ba4fe2e13 ("powerpc/32s: Fix assembler
warning about r0") to kernels 4.14/4.19/5.4/5.9 so that we avoid having
the related warning.
Thanks
Christophe
Hi,
Could you please apply commit 98ecc6768e8f ("powerpc/32: Include
.branch_lt in data section") to kernels 4.14 and 4.19 so that we avoid
having the related warnings.
Thanks
Christophe
Hi,
The following patch fixes a regression reported by Michael Larabel on an
Acer Phoenix laptop where there is a black screen in GNOME with kernel 6.5.
It's marked CC to stable, but I checked the stable queue and didn't see
it so I wanted to make sure it wasn't missed.
a7c0cad0dc06 ("drm/amd/display: ensure async flips are only accepted for
fast updates")
Reported-by: Michael Larabel <Michael(a)MichaelLarabel.com>
Thanks!
Tegra audio graph card has many DAI links which connects internal
AHUB modules and external audio codecs. Since these are DPCM links,
hw_params() call in the machine driver happens for each connected
BE link and PLLA is updated every time. This is not really needed
for all links as only I/O link DAIs derive respective clocks from
PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock
updates to DAIs over I/O links.
Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sameer Pujar <spujar(a)nvidia.com>
---
sound/soc/tegra/tegra_audio_graph_card.c | 30 ++++++++++++++----------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c
index 1f2c5018bf5a..4737e776d383 100644
--- a/sound/soc/tegra/tegra_audio_graph_card.c
+++ b/sound/soc/tegra/tegra_audio_graph_card.c
@@ -10,6 +10,7 @@
#include <linux/platform_device.h>
#include <sound/graph_card.h>
#include <sound/pcm_params.h>
+#include <sound/soc-dai.h>
#define MAX_PLLA_OUT0_DIV 128
@@ -44,6 +45,21 @@ struct tegra_audio_cdata {
unsigned int plla_out0_rates[NUM_RATE_TYPE];
};
+static bool need_clk_update(struct snd_soc_dai *dai)
+{
+ if (snd_soc_dai_is_dummy(dai) ||
+ !dai->driver->ops ||
+ !dai->driver->name)
+ return false;
+
+ if (strstr(dai->driver->name, "I2S") ||
+ strstr(dai->driver->name, "DMIC") ||
+ strstr(dai->driver->name, "DSPK"))
+ return true;
+
+ return false;
+}
+
/* Setup PLL clock as per the given sample rate */
static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params)
@@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream,
struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
int err;
- /*
- * This gets called for each DAI link (FE or BE) when DPCM is used.
- * We may not want to update PLLA rate for each call. So PLLA update
- * must be restricted to external I/O links (I2S, DMIC or DSPK) since
- * they actually depend on it. I/O modules update their clocks in
- * hw_param() of their respective component driver and PLLA rate
- * update here helps them to derive appropriate rates.
- *
- * TODO: When more HW accelerators get added (like sample rate
- * converter, volume gain controller etc., which don't really
- * depend on PLLA) we need a better way to filter here.
- */
- if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) {
+ if (need_clk_update(cpu_dai)) {
err = tegra_audio_graph_update_pll(substream, params);
if (err)
return err;
--
2.17.1
This commit has no direct upstream equivalent.
After commit d48016d74836 ("mm,ima,kexec,of: use memblock_free_late from
ima_free_kexec_buffer") in 5.15, there is a modpost warning for certain
configurations:
WARNING: modpost: vmlinux.o(.text+0xb14064): Section mismatch in reference from the function ima_free_kexec_buffer() to the function .init.text:__memblock_free_late()
The function ima_free_kexec_buffer() references
the function __init __memblock_free_late().
This is often because ima_free_kexec_buffer lacks a __init
annotation or the annotation of __memblock_free_late is wrong.
In mainline, there is no issue because ima_free_kexec_buffer() is marked
as __init, which was done as part of commit b69a2afd5afc ("x86/kexec:
Carry forward IMA measurement log on kexec") in 6.0, which is not
suitable for stable.
Mark ima_free_kexec_buffer() and its single caller
ima_load_kexec_buffer() as __init in 5.15, as ima_load_kexec_buffer() is
only called from ima_init(), which is __init, clearing up the warning.
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
drivers/of/kexec.c | 2 +-
include/linux/of.h | 2 +-
security/integrity/ima/ima.h | 2 +-
security/integrity/ima/ima_kexec.c | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 3a07cc58e7d7..d10fd54415c2 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -165,7 +165,7 @@ int ima_get_kexec_buffer(void **addr, size_t *size)
/**
* ima_free_kexec_buffer - free memory used by the IMA buffer
*/
-int ima_free_kexec_buffer(void)
+int __init ima_free_kexec_buffer(void)
{
int ret;
unsigned long addr;
diff --git a/include/linux/of.h b/include/linux/of.h
index 140671cb746a..6f15e8b0f9d1 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -574,7 +574,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
unsigned long initrd_len,
const char *cmdline, size_t extra_fdt_size);
int ima_get_kexec_buffer(void **addr, size_t *size);
-int ima_free_kexec_buffer(void);
+int __init ima_free_kexec_buffer(void);
#else /* CONFIG_OF */
static inline void of_core_init(void)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index be965a8715e4..0afe413dda68 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -122,7 +122,7 @@ struct ima_kexec_hdr {
extern const int read_idmap[];
#ifdef CONFIG_HAVE_IMA_KEXEC
-void ima_load_kexec_buffer(void);
+void __init ima_load_kexec_buffer(void);
#else
static inline void ima_load_kexec_buffer(void) {}
#endif /* CONFIG_HAVE_IMA_KEXEC */
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index f799cc278a9a..f3b10851bbbf 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -137,7 +137,7 @@ void ima_add_kexec_buffer(struct kimage *image)
/*
* Restore the measurement list from the previous kernel.
*/
-void ima_load_kexec_buffer(void)
+void __init ima_load_kexec_buffer(void)
{
void *kexec_buffer = NULL;
size_t kexec_buffer_size = 0;
---
base-commit: 8f790700c974345ab78054e109beddd84539f319
change-id: 20230905-5-15-of-kexec-modpost-warning-6a6b48f30ebf
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
Hi,
As part of the mitigation for the iTLB multihit vulnerability, KVM creates
a worker thread in KVM_CREATE_VM ioctl(). This thread calls
cgroup_attach_task_all() which takes cgroup_threadgroup_rwsem for writing
which may incur 100ms+ latency since upstream commit
6a010a49b63ac8465851a79185d8deff966f8e1a.
However, if the CPU is not vulnerable to iTLB multihit one could just
disable the mitigation (and the worker thread creation) with the
newly added KVM module parameter nx_huge_pages=never. This avoids the issue
altogether.
While there's an alternative solution for this issue already supported
in 6.1-stable (ie. cgroup's favordynmods), disabling the mitigation in
KVM is probably preferable if the workload is not impacted by dynamic
cgroup operations since one doesn't need to decide between the trade-off
in using favordynmods, the thread creation code path is avoided at
KVM_CREATE_VM and you avoid creating a thread which does nothing.
Tests performed:
* Measured KVM_CREATE_VM latency and confirmed it goes down to less than 1ms
* We've been performing latency measurements internally w/ this parameter
for some weeks now
Christophe JAILLET (1):
KVM: x86/mmu: Use kstrtobool() instead of strtobool()
Sean Christopherson (1):
KVM: x86/mmu: Add "never" option to allow sticky disabling of
nx_huge_pages
arch/x86/kvm/mmu/mmu.c | 42 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 37 insertions(+), 5 deletions(-)
--
2.40.1
From: Joey Gouly <joey.gouly(a)arm.com>
commit 387d828adffcf1eb949f3141079c479793c59aac upstream.
Import the latest version of the Arm Optimized Routines strncmp function based
on the upstream code of string/aarch64/strncmp.S at commit 189dfefe37d5 from:
https://github.com/ARM-software/optimized-routines
This latest version includes MTE support.
Note that for simplicity Arm have chosen to contribute this code to Linux under
GPLv2 rather than the original MIT OR Apache-2.0 WITH LLVM-exception license.
Arm is the sole copyright holder for this code.
Signed-off-by: Joey Gouly <joey.gouly(a)arm.com>
Cc: Robin Murphy <robin.murphy(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Acked-by: Mark Rutland <mark.rutland(a)arm.com>
Acked-by: Catalin Marinas <catalin.marinas(a)arm.com>
Link: https://lore.kernel.org/r/20220301101435.19327-3-joey.gouly@arm.com
(cherry picked from commit 387d828adffcf1eb949f3141079c479793c59aac)
Cc: <stable(a)vger.kernel.org> # 5.15.y only
Fixes: 020b199bc70d ("arm64: Import latest version of Cortex Strings' strncmp")
Reported-by: John Hsu <John.Hsu(a)mediatek.com>
Link: https://lore.kernel.org/all/e9f30f7d5b7d72a3521da31ab2002b49a26f542e.camel@…
Signed-off-by: Will Deacon <will(a)kernel.org>
---
This is a clean cherry-pick of the latest MTE-safe strncmp()
implementation for arm64 which landed in v5.18 and somewhat accidentally
fixed an out-of-bounds read introduced in v5.14.
An alternative would be to disable the optimised code altogether, but
given that this is self-contained and applies cleanly, I'd favour being
consistent with more recent kernels.
arch/arm64/lib/strncmp.S | 244 +++++++++++++++++++++++----------------
1 file changed, 146 insertions(+), 98 deletions(-)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index e42bcfcd37e6..a4884b97e9a8 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2013-2021, Arm Limited.
+ * Copyright (c) 2013-2022, Arm Limited.
*
* Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/st…
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/st…
*/
#include <linux/linkage.h>
@@ -11,14 +11,14 @@
/* Assumptions:
*
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
*/
#define L(label) .L ## label
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
/* Parameters and result. */
#define src1 x0
@@ -39,10 +39,24 @@
#define tmp3 x10
#define zeroones x11
#define pos x12
-#define limit_wd x13
-#define mask x14
-#define endloop x15
+#define mask x13
+#define endloop x14
#define count mask
+#define offset pos
+#define neg_offset x15
+
+/* Define endian dependent shift operations.
+ On big-endian early bytes are at MSB and on little-endian LSB.
+ LS_FW means shifting towards early bytes.
+ LS_BK means shifting towards later bytes.
+ */
+#ifdef __AARCH64EB__
+#define LS_FW lsl
+#define LS_BK lsr
+#else
+#define LS_FW lsr
+#define LS_BK lsl
+#endif
SYM_FUNC_START_WEAK_PI(strncmp)
cbz limit, L(ret0)
@@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp)
and count, src1, #7
b.ne L(misaligned8)
cbnz count, L(mutual_align)
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
@@ -64,30 +75,45 @@ L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
L(start_realigned):
- subs limit_wd, limit_wd, #1
+ subs limit, limit, #8
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
+ csinv endloop, diff, xzr, hi /* Last Dword or differences. */
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
ccmp endloop, #0, #0, eq
b.eq L(loop_aligned)
/* End of main loop */
- /* Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, L(not_limit)
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq L(not_limit)
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
+L(full_check):
+#ifndef __AARCH64EB__
+ orr syndrome, diff, has_nul
+ add limit, limit, 8 /* Rewind limit to before last subs. */
+L(syndrome_check):
+ /* Limit was reached. Check if the NUL byte or the difference
+ is before the limit. */
+ rev syndrome, syndrome
+ rev data1, data1
+ clz pos, syndrome
+ rev data2, data2
+ lsl data1, data1, pos
+ cmp limit, pos, lsr #3
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ csel result, result, xzr, hi
+ ret
#else
- lsl mask, mask, limit
-#endif
+ /* Not reached the limit, must have found the end or a diff. */
+ tbz limit, #63, L(not_limit)
+ add tmp1, limit, 8
+ cbz limit, L(not_limit)
+
+ lsl limit, tmp1, #3 /* Bits -> bytes. */
+ mov mask, #~0
+ lsr mask, mask, limit
bic data1, data1, mask
bic data2, data2, mask
@@ -95,25 +121,6 @@ L(start_realigned):
orr has_nul, has_nul, mask
L(not_limit):
- orr syndrome, diff, has_nul
-
-#ifndef __AARCH64EB__
- rev syndrome, syndrome
- rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
- rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
/* For big-endian we cannot use the trick with the syndrome value
as carry-propagation can corrupt the upper bits if the trailing
bytes in the string contain 0x01. */
@@ -134,10 +141,11 @@ L(not_limit):
rev has_nul, has_nul
orr syndrome, diff, has_nul
clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
+ /* The most-significant-non-zero bit of the syndrome marks either the
+ first bit that is different, or the top bit of the first zero byte.
Shifting left now will bring the critical information into the
top bits. */
+L(end_quick):
lsl data1, data1, pos
lsl data2, data2, pos
/* But we need to zero-extend (char is unsigned) the value and then
@@ -159,22 +167,12 @@ L(mutual_align):
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
ldr data2, [src2], #8
mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#endif
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, count
- add tmp3, tmp3, count
+ LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
+ /* Adjust the limit and ensure it doesn't overflow. */
+ adds limit, limit, count
+ csinv limit, limit, xzr, lo
orr data1, data1, tmp2
orr data2, data2, tmp2
- add limit_wd, limit_wd, tmp3, lsr #3
b L(start_realigned)
.p2align 4
@@ -197,13 +195,11 @@ L(done):
/* Align the SRC1 to a dword by doing a bytewise compare and then do
the dword loop. */
L(try_misaligned_words):
- lsr limit_wd, limit, #3
- cbz count, L(do_misaligned)
+ cbz count, L(src1_aligned)
neg count, count
and count, count, #7
sub limit, limit, count
- lsr limit_wd, limit, #3
L(page_end_loop):
ldrb data1w, [src1], #1
@@ -214,48 +210,100 @@ L(page_end_loop):
subs count, count, #1
b.hi L(page_end_loop)
-L(do_misaligned):
- /* Prepare ourselves for the next page crossing. Unlike the aligned
- loop, we fetch 1 less dword because we risk crossing bounds on
- SRC2. */
- mov count, #8
- subs limit_wd, limit_wd, #1
- b.lo L(done_loop)
+ /* The following diagram explains the comparison of misaligned strings.
+ The bytes are shown in natural order. For little-endian, it is
+ reversed in the registers. The "x" bytes are before the string.
+ The "|" separates data that is loaded at one time.
+ src1 | a a a a a a a a | b b b c c c c c | . . .
+ src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
+
+ After shifting in each step, the data looks like this:
+ STEP_A STEP_B STEP_C
+ data1 a a a a a a a a b b b c c c c c b b b c c c c c
+ data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
+
+ The bytes with "0" are eliminated from the syndrome via mask.
+
+ Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
+ time from SRC2. The comparison happens in 3 steps. After each step
+ the loop can exit, or read from SRC1 or SRC2. */
+L(src1_aligned):
+ /* Calculate offset from 8 byte alignment to string start in bits. No
+ need to mask offset since shifts are ignoring upper bits. */
+ lsl offset, src2, #3
+ bic src2, src2, #0xf
+ mov mask, -1
+ neg neg_offset, offset
+ ldr data1, [src1], #8
+ ldp tmp1, tmp2, [src2], #16
+ LS_BK mask, mask, neg_offset
+ and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
+ /* Skip the first compare if data in tmp1 is irrelevant. */
+ tbnz offset, 6, L(misaligned_mid_loop)
+
L(loop_misaligned):
- and tmp2, src2, #0xff8
- eor tmp2, tmp2, #0xff8
- cbz tmp2, L(page_end_loop)
+ /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
+ LS_FW data2, tmp1, offset
+ LS_BK tmp1, tmp2, neg_offset
+ subs limit, limit, #8
+ orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
+ sub has_nul, data1, zeroones
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr tmp3, data1, #REP8_7f
+ csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
+ orr tmp3, endloop, has_nul
+ cbnz tmp3, L(full_check)
ldr data1, [src1], #8
- ldr data2, [src2], #8
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
- subs limit_wd, limit_wd, #1
- b.pl L(loop_misaligned)
+L(misaligned_mid_loop):
+ /* STEP_B: Compare first part of data1 to second part of tmp2. */
+ LS_FW data2, tmp2, offset
+#ifdef __AARCH64EB__
+ /* For big-endian we do a byte reverse to avoid carry-propagation
+ problem described above. This way we can reuse the has_nul in the
+ next step and also use syndrome value trick at the end. */
+ rev tmp3, data1
+ #define data1_fixed tmp3
+#else
+ #define data1_fixed data1
+#endif
+ sub has_nul, data1_fixed, zeroones
+ orr tmp3, data1_fixed, #REP8_7f
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
+#ifdef __AARCH64EB__
+ rev has_nul, has_nul
+#endif
+ cmp limit, neg_offset, lsr #3
+ orr syndrome, diff, has_nul
+ bic syndrome, syndrome, mask /* Ignore later bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
-L(done_loop):
- /* We found a difference or a NULL before the limit was reached. */
- and limit, limit, #7
- cbz limit, L(not_limit)
- /* Read the last word. */
- sub src1, src1, 8
- sub src2, src2, 8
- ldr data1, [src1, limit]
- ldr data2, [src2, limit]
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
+ /* STEP_C: Compare second part of data1 to first part of tmp1. */
+ ldp tmp1, tmp2, [src2], #16
+ cmp limit, #8
+ LS_BK data2, tmp1, neg_offset
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ and syndrome, syndrome, mask /* Ignore earlier bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
+
+ ldr data1, [src1], #8
+ sub limit, limit, #8
+ b L(loop_misaligned)
+
+#ifdef __AARCH64EB__
+L(syndrome_check):
+ clz pos, syndrome
+ cmp pos, limit, lsl #3
+ b.lo L(end_quick)
+#endif
L(ret0):
mov result, #0
ret
-
SYM_FUNC_END_PI(strncmp)
EXPORT_SYMBOL_NOHWKASAN(strncmp)
--
2.42.0.283.g2d96d420d3-goog
Hi stable team, JFYI, the recently mainline commit 8f7f35e5aa6f21 ("tpm:
Enable hwrng only for Pluton on AMD CPUs") from Jarkko contains a stable
tag, but it might be worth picking up rather sooner than later, as it
fixes a regression that seems to annoy quite a few users of 6.1.y, 6.4.y
and 6.5; that's why at least Fedora is already working on picking the
fix up ahead of the stable-tree.
Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.
There is a missing backport in the stables 6.1.x and 5.15.x that
combined with a backported patch as a dependency in the QAT driver
causes a kernel crash at boot under certain conditions.
In 6.1/5.15, the function pkcs1pad_create() in rsa-pkcs1pad.c [1] sets the
reqsize of its akcipher_instance using the value in the akcipher_alg of
the selected akcipher implementation. This assumes that the reqsize
field has been set for the akcipher implementation when the akcipher_alg
has been instantiated. The reqsize field is then used to allocate to
allocate memory for pkcs1pad requests.
In commit 80e62ad58db0 ("crypto: qat - Use helper to set reqsize"), the
reqsize for the rsa implementation in the QAT driver is moved from being
set in the akcipher_alg to being set when the tfm is initialized. This
means that the implementation of rsa-pkcs1pad won’t allocate any space
for the akcipher request when using the QAT driver.
This issue occurs only when CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not
set. When the crypto self-test is run, the correct value of the reqsize
is stored in the akcipher_alg in the qat driver by the first call to
akcipher_set_reqsize() and then when pkcs1pad_create() is executed, it
finds the correct value.
Options:
1. Cherry-pick 5b11d1a360ea ("crypto: rsa-pkcs1pad - Use helper to set
reqsize") to both 6.1.x and 5.15.x trees.
2. Revert upstream commit 80e62ad58db0 ("crypto: qat - Use helper
to set reqsize").
In 6.1 revert da1729e6619c414f34ce679247721603ebb957dc
In 5.15 revert 3894f5880f968f81c6f3ed37d96bdea01441a8b7
Option #1 is preferred as the same problem might be impacting other
akcipher implementations besides QAT. Option #2 is just specific to the
QAT driver.
@Herbert, can you have a quick look in case I missed something? I tried
both options in 6.1.51 and they appear to resolve the problem.
Thanks,
[1] https://elixir.bootlin.com/linux/v6.1.51/source/crypto/rsa-pkcs1pad.c#L673
--
Giovanni
From: Doug Smythies <dsmythies(a)telus.net>
commit d51847acb018d83186e4af67bc93f9a00a8644f7 upstream.
This fix applies to all stable kernel versions 5.18+.
The intel_pstate CPU frequency scaling driver does not
use policy->cur and it is 0.
When the CPU frequency is outdated arch_freq_get_on_cpu()
will default to the nominal clock frequency when its call to
cpufreq_quick_getpolicy_cur returns the never updated 0.
Thus, the listed frequency might be outside of currently
set limits. Some users are complaining about the high
reported frequency, albeit stale, when their system is
idle and/or it is above the reduced maximum they have set.
This patch will maintain policy_cur for the intel_pstate
driver at the current minimum CPU frequency.
Reported-by: Yang Jie <yang.jie(a)linux.intel.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217597
Signed-off-by: Doug Smythies <dsmythies(a)telus.net>
[ rjw: White space damage fixes and comment adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Keyon Jie <yang.jie(a)linux.intel.com>
---
drivers/cpufreq/intel_pstate.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d51f90f55c05..fbe3a4098743 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2574,6 +2574,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
intel_pstate_clear_update_util_hook(policy->cpu);
intel_pstate_hwp_set(policy->cpu);
}
+ /*
+ * policy->cur is never updated with the intel_pstate driver, but it
+ * is used as a stale frequency value. So, keep it within limits.
+ */
+ policy->cur = policy->min;
mutex_unlock(&intel_pstate_limits_lock);
--
2.34.1
Stable Group,
Please apply commit 4a032827daa8 ("of: property: Simplify of_link_to_phandle()")
to the 6.1.y stable branch. It was originally part of a series that
was only partially applied to 6.1. Being partially applied left 6.1.y
in a state where a bunch of peripherals were deferred indefinitely on
the am3517-evm.
wl12xx_buf platform: supplier 48002000.scm not ready
wl12xx_vmmc2 platform: supplier wl12xx_buf not ready
48050000.dss platform: supplier display@0 not ready
48064800.ehci platform: supplier hsusb1_phy not ready
backlight platform: supplier 48002000.scm not ready
display@0 platform: supplier backlight not ready
dmtimer-pwm@11 platform: supplier 48002000.scm not ready
hsusb1_phy platform: supplier 48002000.scm not ready
gpio-leds platform: supplier 48002000.scm not ready
480b4000.mmc platform: supplier wl12xx_vmmc2 not ready
With the above commit applied, it appears to address most of the
deferred peripherals.
Fixes: eaf9b5612a47 ("driver core: fw_devlink: Don't purge child
fwnode's consumer links")
Signed-off-by: Adam Ford <aford173(a)gmail.com>
Dzień dobry,
możemy zaproponować Państwu laptopy, komputery stacjonarne, monitory, drukarki (fabrycznie nowy sprzęt) i inne rozwiązania sprzętowe w znacznie niższej cenie i możliwością bezpłatnej konfiguracji według potrzeb użytkowników (Ram, dysk, modem WWAN).
Zapewniamy różne formy finansowania – leasing, najem długoterminowy czy odroczony termin płatności. Gwarantujemy szybką reakcję na zapotrzebowanie i profesjonalny serwis posprzedażowy.
Chcieliby Państwo sprawdzić co możemy zaoferować?
Z pozdrowieniami
Adam Halbert
The MAC address is stored at offset 0x107 in the EEPROM, like correctly
stated in the comment. Add a two bytes reserved field right before the
MAC address to shift it from offset 0x105 to 0x107.
With this the MAC address returned from my RTL8723du wifi stick can be
correctly decoded as "Shenzhen Four Seas Global Link Network Technology
Co., Ltd."
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Reported-by: Yanik Fuchs <Yanik.fuchs(a)mbv.ch>
Cc: stable(a)vger.kernel.org
---
drivers/net/wireless/realtek/rtw88/rtw8723d.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 3642a2c7f80c9..2434e2480cbe2 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -46,6 +46,7 @@ struct rtw8723du_efuse {
u8 vender_id[2]; /* 0x100 */
u8 product_id[2]; /* 0x102 */
u8 usb_option; /* 0x104 */
+ u8 res5[2]; /* 0x105 */
u8 mac_addr[ETH_ALEN]; /* 0x107 */
};
--
2.39.2
Commit 254986e324ad ("drm/radeon: Use the drm suballocation manager implementation.")
made the fence wait in amdgpu_sa_bo_new() interruptible but there is no
code to handle an interrupt. This caused the kernel to randomly explode
in high-VRAM-pressure situations so make it uninterruptible again.
Fixes: 254986e324ad ("drm/radeon: Use the drm suballocation manager implementation.")
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2769
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
CC: stable(a)vger.kernel.org # 6.4+
CC: Simon Pilkington <simonp.git(a)gmail.com>
---
drivers/gpu/drm/radeon/radeon_sa.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index c87a57c9c592..22dd8b445685 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -123,7 +123,7 @@ int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager,
unsigned int size, unsigned int align)
{
struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
- GFP_KERNEL, true, align);
+ GFP_KERNEL, false, align);
if (IS_ERR(sa)) {
*sa_bo = NULL;
--
2.41.0
Tony et al
,
I am trying to run the 6.1.y branch on an AM3517-EVM.
There are two GPT that throw an error:
ti-sysc: probe of 48318000.target-module failed with error -16
ti-sysc: probe of 49032000.target-module failed with error -16
I did some minor investigation and found sysc_check_active_timer() is
returning the busy condition.
I tracked this back a bit further and found that if I revert commit
a12315d6d270 ("bus: ti-sysc: Make omap3 gpt12 quirk handling SoC
specific"), this error condition goes away.
It almost looks to me like sysc_check_active_timer is defaulting to
-EBUSY when the SoC is not 3430, but the sysc_soc_match[] doesn't
appear to match to AM3517.
I think the proper solution is to treat the AM35* as 3430. Do you
agree with that approach?
If so, I'll submit a patch with a fixes tag. I am also wondering how
far back I should mark the fixes tag.
adam
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The event inject files add events for a specific trace array. For an
instance, if the file is opened and the instance is deleted, reading or
writing to the file will cause a use after free.
Up the ref count of the trace_array when a event inject file is opened.
Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
Cc: stable(a)vger.kernel.org
Fixes: 6c3edaf9fd6a ("tracing: Introduce trace event injection")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_inject.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index abe805d471eb..8650562bdaa9 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -328,7 +328,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size,
}
const struct file_operations event_inject_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_file_tr,
.read = event_inject_read,
.write = event_inject_write,
+ .release = tracing_release_file_tr,
};
--
2.40.1
Greetings,
I saw your profile and decided that you could cooperate with me in this
proposition. My client Mr. William who died as a result of a heart-related
condition. I contacted you to assist in getting the fund left behind as the
next-of-kin
Kindly indicate your interest.
Best Regards,
Johnson Martins.
From: Xiubo Li <xiubli(a)redhat.com>
When truncating the inode the MDS will acquire the xlock for the
ifile Locker, which will revoke the 'Frwsxl' caps from the clients.
But when the client just releases and flushes the 'Fw' caps to MDS,
for exmaple, and once the MDS receives the caps flushing msg it
just thought the revocation has finished. Then the MDS will continue
truncating the inode and then issued the truncate notification to
all the clients. While just before the clients receives the cap
flushing ack they receive the truncation notification, the clients
will detecte that the 'issued | dirty' is still holding the 'Fw'
caps.
Cc: stable(a)vger.kernel.org
URL: https://tracker.ceph.com/issues/56693
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
fs/ceph/inode.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ea6f966dacd5..8017b9e5864f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -769,9 +769,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
ci->i_truncate_seq = truncate_seq;
/* the MDS should have revoked these caps */
- WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_RD |
- CEPH_CAP_FILE_WR |
+ WARN_ON_ONCE(issued & (CEPH_CAP_FILE_RD |
CEPH_CAP_FILE_LAZYIO));
/*
* If we hold relevant caps, or in the case where we're
--
2.41.0
The quilt patch titled
Subject: mm/vmalloc: add a safer version of find_vm_area() for debug
has been removed from the -mm tree. Its filename was
mm-vmalloc-add-a-safer-version-of-find_vm_area-for-debug.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Joel Fernandes (Google)" <joel(a)joelfernandes.org>
Subject: mm/vmalloc: add a safer version of find_vm_area() for debug
Date: Mon, 4 Sep 2023 18:08:04 +0000
It is unsafe to dump vmalloc area information when trying to do so from
some contexts. Add a safer trylock version of the same function to do a
best-effort VMA finding and use it from vmalloc_dump_obj().
[applied test robot feedback on unused function fix.]
[applied Uladzislau feedback on locking.]
Link: https://lkml.kernel.org/r/20230904180806.1002832-1-joel@joelfernandes.org
Fixes: 98f180837a89 ("mm: Make mem_dump_obj() handle vmalloc() memory")
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Reviewed-by: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Reported-by: Zhen Lei <thunder.leizhen(a)huaweicloud.com>
Cc: Paul E. McKenney <paulmck(a)kernel.org>
Cc: Zqiang <qiang.zhang1211(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
--- a/mm/vmalloc.c~mm-vmalloc-add-a-safer-version-of-find_vm_area-for-debug
+++ a/mm/vmalloc.c
@@ -4278,14 +4278,32 @@ void pcpu_free_vm_areas(struct vm_struct
#ifdef CONFIG_PRINTK
bool vmalloc_dump_obj(void *object)
{
- struct vm_struct *vm;
void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+ const void *caller;
+ struct vm_struct *vm;
+ struct vmap_area *va;
+ unsigned long addr;
+ unsigned int nr_pages;
+
+ if (!spin_trylock(&vmap_area_lock))
+ return false;
+ va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
+ if (!va) {
+ spin_unlock(&vmap_area_lock);
+ return false;
+ }
- vm = find_vm_area(objp);
- if (!vm)
+ vm = va->vm;
+ if (!vm) {
+ spin_unlock(&vmap_area_lock);
return false;
+ }
+ addr = (unsigned long)vm->addr;
+ caller = vm->caller;
+ nr_pages = vm->nr_pages;
+ spin_unlock(&vmap_area_lock);
pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
- vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+ nr_pages, addr, caller);
return true;
}
#endif
_
Patches currently in -mm which might be from joel(a)joelfernandes.org are
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 3d7d72a34e05b23e21bafc8bfb861e73c86b31f3
Gitweb: https://git.kernel.org/tip/3d7d72a34e05b23e21bafc8bfb861e73c86b31f3
Author: Jack Wang <jinpu.wang(a)ionos.com>
AuthorDate: Wed, 06 Sep 2023 15:17:12 +02:00
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitterDate: Wed, 06 Sep 2023 23:55:09 +02:00
x86/sgx: Break up long non-preemptible delays in sgx_vepc_release()
On large enclaves we hit the softlockup warning with following call trace:
xa_erase()
sgx_vepc_release()
__fput()
task_work_run()
do_exit()
The latency issue is similar to the one fixed in:
8795359e35bc ("x86/sgx: Silence softlockup detection when releasing large enclaves")
The test system has 64GB of enclave memory, and all is assigned to a single VM.
Release of 'vepc' takes a longer time and causes long latencies, which triggers
the softlockup warning.
Add cond_resched() to give other tasks a chance to run and reduce
latencies, which also avoids the softlockup detector.
[ mingo: Rewrote the changelog. ]
Fixes: 540745ddbc70 ("x86/sgx: Introduce virtual EPC for use by KVM guests")
Reported-by: Yu Zhang <yu.zhang(a)ionos.com>
Signed-off-by: Jack Wang <jinpu.wang(a)ionos.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: Yu Zhang <yu.zhang(a)ionos.com>
Reviewed-by: Jarkko Sakkinen <jarkko(a)kernel.org>
Reviewed-by: Kai Huang <kai.huang(a)intel.com>
Acked-by: Haitao Huang <haitao.huang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/cpu/sgx/virt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index c3e37ea..7aaa365 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -204,6 +204,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
continue;
xa_erase(&vepc->page_array, index);
+ cond_resched();
}
/*
@@ -222,6 +223,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
list_add_tail(&epc_page->list, &secs_pages);
xa_erase(&vepc->page_array, index);
+ cond_resched();
}
/*
@@ -243,6 +245,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
if (sgx_vepc_free_page(epc_page))
list_add_tail(&epc_page->list, &secs_pages);
+ cond_resched();
}
if (!list_empty(&secs_pages))
We hit softlocup with following call trace:
? asm_sysvec_apic_timer_interrupt+0x16/0x20
xa_erase+0x21/0xb0
? sgx_free_epc_page+0x20/0x50
sgx_vepc_release+0x75/0x220
__fput+0x89/0x250
task_work_run+0x59/0x90
do_exit+0x337/0x9a0
Similar like commit 8795359e35bc ("x86/sgx: Silence softlockup detection
when releasing large enclaves"). The test system has 64GB of enclave memory,
and all assigned to a single VM. Release vepc take longer time and triggers
the softlockup warning.
Add cond_resched() to give other tasks a chance to run and placate
the softlockup detector.
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Haitao Huang <haitao.huang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Cc: x86(a)kernel.org
Fixes: 540745ddbc70 ("x86/sgx: Introduce virtual EPC for use by KVM guests")
Reported-by: Yu Zhang <yu.zhang(a)ionos.com>
Tested-by: Yu Zhang <yu.zhang(a)ionos.com>
Acked-by: Haitao Huang <haitao.huang(a)linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko(a)kernel.org>
Reviewed-by: Kai Huang <kai.huang(a)intel.com>
Signed-off-by: Jack Wang <jinpu.wang(a)ionos.com>
---
v4: add rob from Kai.
arch/x86/kernel/cpu/sgx/virt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index c3e37eaec8ec..7aaa3652e31d 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -204,6 +204,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
continue;
xa_erase(&vepc->page_array, index);
+ cond_resched();
}
/*
@@ -222,6 +223,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
list_add_tail(&epc_page->list, &secs_pages);
xa_erase(&vepc->page_array, index);
+ cond_resched();
}
/*
@@ -243,6 +245,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
if (sgx_vepc_free_page(epc_page))
list_add_tail(&epc_page->list, &secs_pages);
+ cond_resched();
}
if (!list_empty(&secs_pages))
--
2.34.1
I'm announcing the release of the 6.1.52 kernel.
All users of the 6.1 kernel series must upgrade.
The updated 6.1.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.1.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt | 46 +++++++++++++
Makefile | 2
arch/arm/mach-pxa/sharpsl_pm.c | 2
arch/arm/mach-pxa/spitz.c | 14 ---
arch/mips/alchemy/devboards/db1000.c | 8 --
arch/mips/alchemy/devboards/db1200.c | 19 -----
arch/mips/alchemy/devboards/db1300.c | 10 --
drivers/bluetooth/btsdio.c | 1
drivers/firmware/stratix10-svc.c | 2
drivers/fsi/fsi-master-ast-cf.c | 1
drivers/hid/wacom.h | 1
drivers/hid/wacom_sys.c | 25 +++++--
drivers/hid/wacom_wac.c | 1
drivers/hid/wacom_wac.h | 1
drivers/mmc/host/Kconfig | 5 -
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2
drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2
drivers/pinctrl/pinctrl-amd.c | 4 -
drivers/rtc/rtc-ds1685.c | 2
drivers/staging/rtl8712/os_intfs.c | 1
drivers/staging/rtl8712/usb_intf.c | 1
drivers/tty/serial/qcom_geni_serial.c | 5 +
drivers/tty/serial/sc16is7xx.c | 17 ++++
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +-
drivers/usb/chipidea/usbmisc_imx.c | 6 +
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 +
drivers/usb/serial/option.c | 7 +
drivers/usb/typec/tcpm/tcpci.c | 4 +
drivers/usb/typec/tcpm/tcpm.c | 7 +
fs/erofs/zdata.c | 2
fs/nilfs2/alloc.c | 3
fs/nilfs2/inode.c | 7 +
fs/nilfs2/segment.c | 5 +
fs/smb/server/auth.c | 3
fs/smb/server/oplock.c | 2
fs/smb/server/smb2pdu.c | 2
fs/smb/server/smb2pdu.h | 2
fs/smb/server/transport_rdma.c | 25 +++++--
include/linux/usb/tcpci.h | 1
kernel/module/main.c | 14 +++
sound/usb/stream.c | 11 ++-
42 files changed, 208 insertions(+), 88 deletions(-)
Aaron Armstrong Skomra (1):
HID: wacom: remove the battery when the EKR is off
Arnd Bergmann (1):
ARM: pxa: remove use of symbol_get()
Badhri Jagan Sridharan (1):
tcpm: Avoid soft reset when partner does not support get_status
Christoph Hellwig (4):
mmc: au1xmmc: force non-modular build and remove symbol_get usage
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Deren Wu (2):
wifi: mt76: mt7921: do not support one stream on secondary antenna only
wifi: mt76: mt7921: fix skb leak by txs missing in AMSDU
Gao Xiang (1):
erofs: ensure that the post-EOF tails are all zeroed
Greg Kroah-Hartman (1):
Linux 6.1.52
Hugo Villeneuve (3):
serial: sc16is7xx: fix broken port 0 uart init
serial: sc16is7xx: fix bug when first setting GPIO direction
dt-bindings: sc16is7xx: Add property to change GPIO function
Johan Hovold (1):
serial: qcom-geni: fix opp vote on shutdown
Juerg Haefliger (1):
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Luke Lu (1):
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Marco Felsch (1):
usb: typec: tcpci: clear the fault status bit
Mario Limonciello (1):
pinctrl: amd: Don't show `Invalid config param` errors
Martin Kohn (1):
USB: serial: option: add Quectel EM05G variant (0x030e)
Nam Cao (1):
staging: rtl8712: fix race condition
Namjae Jeon (4):
ksmbd: fix wrong DataOffset validation of create context
ksmbd: fix slub overflow in ksmbd_decode_ntlmssp_auth_blob()
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
ksmbd: reduce descriptor size if remaining bytes is less than request size
Ryusuke Konishi (2):
nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers()
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Slark Xiao (1):
USB: serial: option: add FOXCONN T99W368/T99W373 product
Takashi Iwai (1):
ALSA: usb-audio: Fix init call orders for UAC1
Wang Ming (1):
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Xu Yang (1):
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
Zheng Wang (1):
Bluetooth: btsdio: fix use after free bug in btsdio_remove due to race condition
I'm announcing the release of the 5.15.131 kernel.
All users of the 5.15 kernel series must upgrade.
The updated 5.15.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.15.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
arch/arm/mach-pxa/sharpsl_pm.c | 2
arch/arm/mach-pxa/spitz.c | 14 -
arch/mips/alchemy/devboards/db1000.c | 8
arch/mips/alchemy/devboards/db1200.c | 19 --
arch/mips/alchemy/devboards/db1300.c | 10 -
drivers/bluetooth/btsdio.c | 1
drivers/firmware/stratix10-svc.c | 2
drivers/fsi/fsi-master-ast-cf.c | 1
drivers/hid/wacom.h | 1
drivers/hid/wacom_sys.c | 25 ++
drivers/hid/wacom_wac.c | 1
drivers/hid/wacom_wac.h | 1
drivers/mmc/host/Kconfig | 5
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2
drivers/pinctrl/pinctrl-amd.c | 4
drivers/rtc/rtc-ds1685.c | 2
drivers/staging/rtl8712/os_intfs.c | 1
drivers/staging/rtl8712/usb_intf.c | 1
drivers/tty/serial/qcom_geni_serial.c | 5
drivers/tty/serial/sc16is7xx.c | 17 +
drivers/usb/chipidea/ci_hdrc_imx.c | 10 -
drivers/usb/chipidea/usbmisc_imx.c | 6
drivers/usb/dwc3/dwc3-meson-g12a.c | 6
drivers/usb/serial/option.c | 7
drivers/usb/typec/tcpm/tcpci.c | 7
drivers/usb/typec/tcpm/tcpci.h | 209 ----------------------
drivers/usb/typec/tcpm/tcpci_maxim.c | 3
drivers/usb/typec/tcpm/tcpci_mt6360.c | 3
drivers/usb/typec/tcpm/tcpci_rt1711h.c | 2
drivers/usb/typec/tcpm/tcpm.c | 7
fs/erofs/zdata.c | 2
fs/ksmbd/oplock.c | 2
fs/ksmbd/smb2pdu.c | 2
fs/ksmbd/smb2pdu.h | 2
fs/nilfs2/alloc.c | 3
fs/nilfs2/inode.c | 7
fs/nilfs2/segment.c | 5
include/linux/usb/tcpci.h | 211 +++++++++++++++++++++++
kernel/module.c | 14 +
sound/usb/stream.c | 11 +
42 files changed, 350 insertions(+), 295 deletions(-)
Aaron Armstrong Skomra (1):
HID: wacom: remove the battery when the EKR is off
Arnd Bergmann (1):
ARM: pxa: remove use of symbol_get()
Badhri Jagan Sridharan (1):
tcpm: Avoid soft reset when partner does not support get_status
Christoph Hellwig (4):
mmc: au1xmmc: force non-modular build and remove symbol_get usage
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Deren Wu (1):
wifi: mt76: mt7921: do not support one stream on secondary antenna only
Gao Xiang (1):
erofs: ensure that the post-EOF tails are all zeroed
Greg Kroah-Hartman (1):
Linux 5.15.131
Hugo Villeneuve (2):
serial: sc16is7xx: fix broken port 0 uart init
serial: sc16is7xx: fix bug when first setting GPIO direction
Johan Hovold (1):
serial: qcom-geni: fix opp vote on shutdown
Juerg Haefliger (1):
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Luke Lu (1):
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Marco Felsch (1):
usb: typec: tcpci: clear the fault status bit
Mario Limonciello (1):
pinctrl: amd: Don't show `Invalid config param` errors
Martin Kohn (1):
USB: serial: option: add Quectel EM05G variant (0x030e)
Nam Cao (1):
staging: rtl8712: fix race condition
Namjae Jeon (2):
ksmbd: fix wrong DataOffset validation of create context
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
Ryusuke Konishi (2):
nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers()
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Slark Xiao (1):
USB: serial: option: add FOXCONN T99W368/T99W373 product
Takashi Iwai (1):
ALSA: usb-audio: Fix init call orders for UAC1
Wang Ming (1):
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Xin Ji (1):
usb: typec: tcpci: move tcpci.h to include/linux/usb/
Xu Yang (1):
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
Zheng Wang (1):
Bluetooth: btsdio: fix use after free bug in btsdio_remove due to race condition
I'm announcing the release of the 6.4.15 kernel.
All users of the 6.4 kernel series must upgrade.
The updated 6.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.4.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt | 46 +++++++++++++
Makefile | 2
arch/arm/mach-pxa/sharpsl_pm.c | 2
arch/arm/mach-pxa/spitz.c | 14 ---
arch/mips/alchemy/devboards/db1000.c | 8 --
arch/mips/alchemy/devboards/db1200.c | 19 -----
arch/mips/alchemy/devboards/db1300.c | 10 --
drivers/firmware/stratix10-svc.c | 2
drivers/fsi/fsi-master-ast-cf.c | 1
drivers/hid/wacom.h | 1
drivers/hid/wacom_sys.c | 25 +++++--
drivers/hid/wacom_wac.c | 1
drivers/hid/wacom_wac.h | 1
drivers/mmc/host/Kconfig | 5 -
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2
drivers/net/wireless/ath/ath11k/dp_tx.c | 10 +-
drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2
drivers/net/wireless/realtek/rtw88/usb.c | 5 +
drivers/pinctrl/pinctrl-amd.c | 4 -
drivers/rtc/rtc-ds1685.c | 2
drivers/staging/rtl8712/os_intfs.c | 1
drivers/staging/rtl8712/usb_intf.c | 1
drivers/tty/serial/qcom_geni_serial.c | 5 +
drivers/tty/serial/sc16is7xx.c | 17 ++++
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +-
drivers/usb/chipidea/usbmisc_imx.c | 6 +
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 +
drivers/usb/serial/option.c | 7 +
drivers/usb/typec/tcpm/tcpci.c | 4 +
drivers/usb/typec/tcpm/tcpm.c | 7 +
fs/erofs/zdata.c | 2
fs/nilfs2/alloc.c | 3
fs/nilfs2/inode.c | 7 +
fs/smb/server/auth.c | 3
fs/smb/server/oplock.c | 2
fs/smb/server/smb2pdu.c | 2
fs/smb/server/smb2pdu.h | 2
fs/smb/server/transport_rdma.c | 25 +++++--
include/linux/usb/tcpci.h | 1
kernel/module/main.c | 14 +++
sound/usb/stream.c | 11 ++-
42 files changed, 211 insertions(+), 94 deletions(-)
Aaron Armstrong Skomra (1):
HID: wacom: remove the battery when the EKR is off
Arnd Bergmann (1):
ARM: pxa: remove use of symbol_get()
Badhri Jagan Sridharan (1):
tcpm: Avoid soft reset when partner does not support get_status
Christoph Hellwig (4):
mmc: au1xmmc: force non-modular build and remove symbol_get usage
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Deren Wu (2):
wifi: mt76: mt7921: do not support one stream on secondary antenna only
wifi: mt76: mt7921: fix skb leak by txs missing in AMSDU
Gao Xiang (1):
erofs: ensure that the post-EOF tails are all zeroed
Greg Kroah-Hartman (1):
Linux 6.4.15
Hugo Villeneuve (3):
serial: sc16is7xx: fix broken port 0 uart init
serial: sc16is7xx: fix bug when first setting GPIO direction
dt-bindings: sc16is7xx: Add property to change GPIO function
Johan Hovold (1):
serial: qcom-geni: fix opp vote on shutdown
Juerg Haefliger (1):
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Luke Lu (1):
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Marco Felsch (1):
usb: typec: tcpci: clear the fault status bit
Mario Limonciello (1):
pinctrl: amd: Don't show `Invalid config param` errors
Martin Kohn (1):
USB: serial: option: add Quectel EM05G variant (0x030e)
Nam Cao (1):
staging: rtl8712: fix race condition
Namjae Jeon (4):
ksmbd: fix wrong DataOffset validation of create context
ksmbd: fix slub overflow in ksmbd_decode_ntlmssp_auth_blob()
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
ksmbd: reduce descriptor size if remaining bytes is less than request size
Ryusuke Konishi (1):
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Sascha Hauer (1):
wifi: rtw88: usb: kill and free rx urbs on probe failure
Slark Xiao (1):
USB: serial: option: add FOXCONN T99W368/T99W373 product
Sven Eckelmann (2):
wifi: ath11k: Don't drop tx_status when peer cannot be found
wifi: ath11k: Cleanup mac80211 references on failure during tx_complete
Takashi Iwai (1):
ALSA: usb-audio: Fix init call orders for UAC1
Wang Ming (1):
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Xu Yang (1):
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
I'm announcing the release of the 6.5.2 kernel.
All users of the 6.5 kernel series must upgrade.
The updated 6.5.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.5.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt | 46 +++++++++++++
Makefile | 2
arch/arm/mach-pxa/sharpsl_pm.c | 2
arch/arm/mach-pxa/spitz.c | 14 ---
arch/mips/alchemy/devboards/db1000.c | 8 --
arch/mips/alchemy/devboards/db1200.c | 19 -----
arch/mips/alchemy/devboards/db1300.c | 10 --
drivers/firmware/stratix10-svc.c | 2
drivers/fsi/fsi-master-ast-cf.c | 1
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 -
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 -
drivers/hid/wacom.h | 1
drivers/hid/wacom_sys.c | 25 +++++--
drivers/hid/wacom_wac.c | 1
drivers/hid/wacom_wac.h | 1
drivers/mmc/host/Kconfig | 5 -
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2
drivers/net/wireless/ath/ath11k/dp_tx.c | 10 +-
drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2
drivers/net/wireless/realtek/rtw88/usb.c | 5 +
drivers/pinctrl/pinctrl-amd.c | 4 -
drivers/rtc/rtc-ds1685.c | 2
drivers/staging/rtl8712/os_intfs.c | 1
drivers/staging/rtl8712/usb_intf.c | 1
drivers/tty/serial/qcom_geni_serial.c | 5 +
drivers/tty/serial/sc16is7xx.c | 17 ++++
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +-
drivers/usb/chipidea/usbmisc_imx.c | 6 +
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 +
drivers/usb/serial/option.c | 7 +
drivers/usb/typec/tcpm/tcpci.c | 4 +
drivers/usb/typec/tcpm/tcpm.c | 7 +
fs/erofs/zdata.c | 2
fs/nilfs2/alloc.c | 3
fs/nilfs2/inode.c | 7 +
fs/smb/server/auth.c | 3
fs/smb/server/oplock.c | 2
fs/smb/server/smb2pdu.c | 2
fs/smb/server/smb2pdu.h | 2
fs/smb/server/transport_rdma.c | 25 +++++--
include/linux/usb/tcpci.h | 1
kernel/module/main.c | 14 +++
kernel/trace/trace.c | 4 -
sound/usb/stream.c | 11 ++-
45 files changed, 219 insertions(+), 98 deletions(-)
Aaron Armstrong Skomra (1):
HID: wacom: remove the battery when the EKR is off
Arnd Bergmann (1):
ARM: pxa: remove use of symbol_get()
Badhri Jagan Sridharan (1):
tcpm: Avoid soft reset when partner does not support get_status
Brian Foster (1):
tracing: Zero the pipe cpumask on alloc to avoid spurious -EBUSY
Christoph Hellwig (4):
mmc: au1xmmc: force non-modular build and remove symbol_get usage
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Deren Wu (2):
wifi: mt76: mt7921: do not support one stream on secondary antenna only
wifi: mt76: mt7921: fix skb leak by txs missing in AMSDU
Gao Xiang (1):
erofs: ensure that the post-EOF tails are all zeroed
Greg Kroah-Hartman (1):
Linux 6.5.2
Hugo Villeneuve (3):
serial: sc16is7xx: fix broken port 0 uart init
serial: sc16is7xx: fix bug when first setting GPIO direction
dt-bindings: sc16is7xx: Add property to change GPIO function
Johan Hovold (1):
serial: qcom-geni: fix opp vote on shutdown
Juerg Haefliger (1):
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Lang Yu (1):
drm/amdgpu: correct vmhub index in GMC v10/11
Luke Lu (1):
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Marco Felsch (1):
usb: typec: tcpci: clear the fault status bit
Mario Limonciello (1):
pinctrl: amd: Don't show `Invalid config param` errors
Martin Kohn (1):
USB: serial: option: add Quectel EM05G variant (0x030e)
Nam Cao (1):
staging: rtl8712: fix race condition
Namjae Jeon (4):
ksmbd: fix wrong DataOffset validation of create context
ksmbd: fix slub overflow in ksmbd_decode_ntlmssp_auth_blob()
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
ksmbd: reduce descriptor size if remaining bytes is less than request size
Ryusuke Konishi (1):
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Sascha Hauer (1):
wifi: rtw88: usb: kill and free rx urbs on probe failure
Slark Xiao (1):
USB: serial: option: add FOXCONN T99W368/T99W373 product
Sven Eckelmann (2):
wifi: ath11k: Don't drop tx_status when peer cannot be found
wifi: ath11k: Cleanup mac80211 references on failure during tx_complete
Takashi Iwai (1):
ALSA: usb-audio: Fix init call orders for UAC1
Wang Ming (1):
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Xu Yang (1):
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
System runs at minimum performance, once powercap RAPL package domain
enabled flag is changed from 1 to 0 to 1.
Setting RAPL package domain enabled flag to 0, results in setting of
power limit 4 (PL4) MSR 0x601 to 0. This implies disabling PL4 limit.
The PL4 limit controls the peak power. So setting 0, results in some
undesirable performance, which depends on hardware implementation.
Even worse, when the enabled flag is set to 1 again. This will set PL4
MSR value to 0x01, which means reduce peak power to 0.125W. This will
force system to run at the lowest possible performance on every PL4
supported system.
Setting enabled flag should only affect the "enable" bit, not other
bits. Here it is changing power limit.
This is caused by a change which assumes that there is an enable bit in
the PL4 MSR like other power limits. Although PL4 enable/disable bit is
present with TPMI RAPL interface, it is not present with the MSR
interface.
There is a rapl_primitive_info defined for non existent PL4 enable bit
and then it is used with the commit 9050a9cd5e4c ("powercap: intel_rapl:
Cleanup Power Limits support") to enable PL4. This is wrong, hence remove
this rapl primitive for PL4. Also in the function
rapl_detect_powerlimit(), PL_ENABLE is used to check for the presence of
power limits. Replace PL_ENABLE with PL_LIMIT, as PL_LIMIT must be
present. Without this change, PL4 controls will not be available in the
sysfs once rapl primitive for PL4 is removed.
Fixes: 9050a9cd5e4c ("powercap: intel_rapl: Cleanup Power Limits support")
Suggested-by: Zhang Rui <rui.zhang(a)intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar(a)intel.com>
Cc: stable(a)vger.kernel.org # v6.5+
---
v2
- Remove RAPL primitive for PL4 instead as suggedted by Rui
- Replace PL_ENABLE with PL_LIMIT for domain detect
- Update change log and header
drivers/powercap/intel_rapl_common.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 5c2e6d5eea2a..40a2cc649c79 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -658,8 +658,6 @@ static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
[PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
- RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
[TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
[TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
@@ -1458,7 +1456,7 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
}
}
- if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
+ if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
rd->rpl[i].name = NULL;
}
}
--
2.34.1
System runs at minimum performance, once powercap RAPL package domain
"enabled" flag is toggled.
Setting RAPL package domain enabled flag to 0, results in setting of
power limit 4 (PL4) MSR 0x601 to 0. This implies disabling PL4 limit.
The PL4 limit controls the peak power. This can significantly change
the performance. Even worse, when the enabled flag is set to 1 again.
This will set PL4 MSR value to 0x01, which means reduce peak power to
0.125W. This will force the system to run at the lowest possible
performance.
This is caused by a change which assumes that there is an enable bit
in the PL4 MSR like other power limits.
In functions set_floor_freq_default() and rapl_remove_package(), call
rapl_write_pl_data with PL_ENABLE and PL_CLAMP for only power limit 1
and 2. Similarly don't read PL_ENABLE for PL4 to check the presence of
power limit 4. Power limit 4 support is based on CPU model in this
driver. No additional checks can be done.
Fixes: 9050a9cd5e4c ("powercap: intel_rapl: Cleanup Power Limits support")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v6.5+
---
drivers/powercap/intel_rapl_common.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 5c2e6d5eea2a..0afedf7ad872 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -184,8 +184,6 @@ static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
case POWER_LIMIT4:
if (prim == PL_LIMIT)
return POWER_LIMIT4;
- if (prim == PL_ENABLE)
- return PL4_ENABLE;
/* PL4 would be around two times PL2, use same prim as PL2. */
if (prim == PL_MAX_POWER)
return MAX_POWER;
@@ -1033,17 +1031,13 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
{
- int i;
-
/* always enable clamp such that p-state can go below OS requested
* range. power capping priority over guranteed frequency.
*/
rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
- for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
- rapl_write_pl_data(rd, i, PL_ENABLE, mode);
- rapl_write_pl_data(rd, i, PL_CLAMP, mode);
- }
+ rapl_write_pl_data(rd, POWER_LIMIT2, PL_ENABLE, mode);
+ rapl_write_pl_data(rd, POWER_LIMIT2, PL_CLAMP, mode);
}
static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
@@ -1458,7 +1452,7 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
}
}
- if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
+ if (i != POWER_LIMIT4 && rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
rd->rpl[i].name = NULL;
}
}
@@ -1510,7 +1504,7 @@ void rapl_remove_package(struct rapl_package *rp)
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
int i;
- for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+ for (i = POWER_LIMIT1; i <= POWER_LIMIT2; i++) {
rapl_write_pl_data(rd, i, PL_ENABLE, 0);
rapl_write_pl_data(rd, i, PL_CLAMP, 0);
}
--
2.34.1
This is the start of the stable review cycle for the 5.15.131 release.
There are 28 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Sep 2023 18:29:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.131-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.131-rc1
Marco Felsch <m.felsch(a)pengutronix.de>
usb: typec: tcpci: clear the fault status bit
Xin Ji <xji(a)analogixsemi.com>
usb: typec: tcpci: move tcpci.h to include/linux/usb/
Mario Limonciello <mario.limonciello(a)amd.com>
pinctrl: amd: Don't show `Invalid config param` errors
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers()
Badhri Jagan Sridharan <badhri(a)google.com>
tcpm: Avoid soft reset when partner does not support get_status
Juerg Haefliger <juerg.haefliger(a)canonical.com>
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Wang Ming <machel(a)vivo.com>
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix bug when first setting GPIO direction
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix broken port 0 uart init
Johan Hovold <johan+linaro(a)kernel.org>
serial: qcom-geni: fix opp vote on shutdown
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: do not support one stream on secondary antenna only
Zheng Wang <zyytlz.wz(a)163.com>
Bluetooth: btsdio: fix use after free bug in btsdio_remove due to race condition
Nam Cao <namcaov(a)gmail.com>
staging: rtl8712: fix race condition
Aaron Armstrong Skomra <aaron.skomra(a)wacom.com>
HID: wacom: remove the battery when the EKR is off
Xu Yang <xu.yang_2(a)nxp.com>
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
Luke Lu <luke.lu(a)libre.computer>
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Takashi Iwai <tiwai(a)suse.de>
ALSA: usb-audio: Fix init call orders for UAC1
Slark Xiao <slark_xiao(a)163.com>
USB: serial: option: add FOXCONN T99W368/T99W373 product
Martin Kohn <m.kohn(a)welotec.com>
USB: serial: option: add Quectel EM05G variant (0x030e)
Christoph Hellwig <hch(a)lst.de>
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Christoph Hellwig <hch(a)lst.de>
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
Christoph Hellwig <hch(a)lst.de>
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
Christoph Hellwig <hch(a)lst.de>
mmc: au1xmmc: force non-modular build and remove symbol_get usage
Arnd Bergmann <arnd(a)arndb.de>
ARM: pxa: remove use of symbol_get()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix wrong DataOffset validation of create context
Gao Xiang <hsiangkao(a)linux.alibaba.com>
erofs: ensure that the post-EOF tails are all zeroed
-------------
Diffstat:
Makefile | 4 ++--
arch/arm/mach-pxa/sharpsl_pm.c | 2 --
arch/arm/mach-pxa/spitz.c | 14 +-----------
arch/mips/alchemy/devboards/db1000.c | 8 +------
arch/mips/alchemy/devboards/db1200.c | 19 ++--------------
arch/mips/alchemy/devboards/db1300.c | 10 +--------
drivers/bluetooth/btsdio.c | 1 +
drivers/firmware/stratix10-svc.c | 2 +-
drivers/fsi/fsi-master-ast-cf.c | 1 +
drivers/hid/wacom.h | 1 +
drivers/hid/wacom_sys.c | 25 ++++++++++++++++++----
drivers/hid/wacom_wac.c | 1 +
drivers/hid/wacom_wac.h | 1 +
drivers/mmc/host/Kconfig | 5 +++--
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +-
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +-
drivers/pinctrl/pinctrl-amd.c | 4 ++--
drivers/rtc/rtc-ds1685.c | 2 +-
drivers/staging/rtl8712/os_intfs.c | 1 +
drivers/staging/rtl8712/usb_intf.c | 1 -
drivers/tty/serial/qcom_geni_serial.c | 5 +++++
drivers/tty/serial/sc16is7xx.c | 17 ++++++++++++++-
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +++++----
drivers/usb/chipidea/usbmisc_imx.c | 6 ++++--
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 ++++++
drivers/usb/serial/option.c | 7 ++++++
drivers/usb/typec/tcpm/tcpci.c | 7 ++++--
drivers/usb/typec/tcpm/tcpci_maxim.c | 3 +--
drivers/usb/typec/tcpm/tcpci_mt6360.c | 3 +--
drivers/usb/typec/tcpm/tcpci_rt1711h.c | 2 +-
drivers/usb/typec/tcpm/tcpm.c | 7 ++++++
fs/erofs/zdata.c | 2 ++
fs/ksmbd/oplock.c | 2 +-
fs/ksmbd/smb2pdu.c | 2 +-
fs/ksmbd/smb2pdu.h | 2 +-
fs/nilfs2/alloc.c | 3 ++-
fs/nilfs2/inode.c | 7 ++++--
fs/nilfs2/segment.c | 5 +++++
.../usb/typec/tcpm => include/linux/usb}/tcpci.h | 2 ++
kernel/module.c | 14 +++++++++---
sound/usb/stream.c | 11 +++++++++-
41 files changed, 142 insertions(+), 87 deletions(-)
From: Zi Yan <ziy(a)nvidia.com>
When dealing with hugetlb pages, manipulating struct page pointers
directly can get to wrong struct page, since struct page is not guaranteed
to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle
it properly.
Fixes: eeb0efd071d8 ("mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
---
mm/memory_hotplug.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1b03f4ec6fd2..3b301c4023ff 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1689,7 +1689,7 @@ static int scan_movable_pages(unsigned long start, unsigned long end,
*/
if (HPageMigratable(head))
goto found;
- skip = compound_nr(head) - (page - head);
+ skip = compound_nr(head) - (pfn - page_to_pfn(head));
pfn += skip - 1;
}
return -ENOENT;
--
2.40.1
This is the start of the stable review cycle for the 6.4.15 release.
There are 32 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Sep 2023 18:29:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.4.15-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.4.15-rc1
Mario Limonciello <mario.limonciello(a)amd.com>
pinctrl: amd: Don't show `Invalid config param` errors
Marco Felsch <m.felsch(a)pengutronix.de>
usb: typec: tcpci: clear the fault status bit
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
dt-bindings: sc16is7xx: Add property to change GPIO function
Badhri Jagan Sridharan <badhri(a)google.com>
tcpm: Avoid soft reset when partner does not support get_status
Juerg Haefliger <juerg.haefliger(a)canonical.com>
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Wang Ming <machel(a)vivo.com>
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix bug when first setting GPIO direction
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix broken port 0 uart init
Johan Hovold <johan+linaro(a)kernel.org>
serial: qcom-geni: fix opp vote on shutdown
Sven Eckelmann <sven(a)narfation.org>
wifi: ath11k: Cleanup mac80211 references on failure during tx_complete
Sven Eckelmann <sven(a)narfation.org>
wifi: ath11k: Don't drop tx_status when peer cannot be found
Sascha Hauer <s.hauer(a)pengutronix.de>
wifi: rtw88: usb: kill and free rx urbs on probe failure
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: fix skb leak by txs missing in AMSDU
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: do not support one stream on secondary antenna only
Nam Cao <namcaov(a)gmail.com>
staging: rtl8712: fix race condition
Aaron Armstrong Skomra <aaron.skomra(a)wacom.com>
HID: wacom: remove the battery when the EKR is off
Xu Yang <xu.yang_2(a)nxp.com>
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
Luke Lu <luke.lu(a)libre.computer>
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Takashi Iwai <tiwai(a)suse.de>
ALSA: usb-audio: Fix init call orders for UAC1
Slark Xiao <slark_xiao(a)163.com>
USB: serial: option: add FOXCONN T99W368/T99W373 product
Martin Kohn <m.kohn(a)welotec.com>
USB: serial: option: add Quectel EM05G variant (0x030e)
Christoph Hellwig <hch(a)lst.de>
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Christoph Hellwig <hch(a)lst.de>
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
Christoph Hellwig <hch(a)lst.de>
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
Christoph Hellwig <hch(a)lst.de>
mmc: au1xmmc: force non-modular build and remove symbol_get usage
Arnd Bergmann <arnd(a)arndb.de>
ARM: pxa: remove use of symbol_get()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: reduce descriptor size if remaining bytes is less than request size
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix slub overflow in ksmbd_decode_ntlmssp_auth_blob()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix wrong DataOffset validation of create context
Gao Xiang <xiang(a)kernel.org>
erofs: ensure that the post-EOF tails are all zeroed
-------------
Diffstat:
.../devicetree/bindings/serial/nxp,sc16is7xx.txt | 46 ++++++++++++++++++++++
Makefile | 4 +-
arch/arm/mach-pxa/sharpsl_pm.c | 2 -
arch/arm/mach-pxa/spitz.c | 14 +------
arch/mips/alchemy/devboards/db1000.c | 8 +---
arch/mips/alchemy/devboards/db1200.c | 19 +--------
arch/mips/alchemy/devboards/db1300.c | 10 +----
drivers/firmware/stratix10-svc.c | 2 +-
drivers/fsi/fsi-master-ast-cf.c | 1 +
drivers/hid/wacom.h | 1 +
drivers/hid/wacom_sys.c | 25 ++++++++++--
drivers/hid/wacom_wac.c | 1 +
drivers/hid/wacom_wac.h | 1 +
drivers/mmc/host/Kconfig | 5 ++-
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +-
drivers/net/wireless/ath/ath11k/dp_tx.c | 10 ++---
.../net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +++-
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +-
drivers/net/wireless/realtek/rtw88/usb.c | 5 ++-
drivers/pinctrl/pinctrl-amd.c | 4 +-
drivers/rtc/rtc-ds1685.c | 2 +-
drivers/staging/rtl8712/os_intfs.c | 1 +
drivers/staging/rtl8712/usb_intf.c | 1 -
drivers/tty/serial/qcom_geni_serial.c | 5 +++
drivers/tty/serial/sc16is7xx.c | 17 +++++++-
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +++--
drivers/usb/chipidea/usbmisc_imx.c | 6 ++-
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 +++
drivers/usb/serial/option.c | 7 ++++
drivers/usb/typec/tcpm/tcpci.c | 4 ++
drivers/usb/typec/tcpm/tcpm.c | 7 ++++
fs/erofs/zdata.c | 2 +
fs/nilfs2/alloc.c | 3 +-
fs/nilfs2/inode.c | 7 +++-
fs/smb/server/auth.c | 3 ++
fs/smb/server/oplock.c | 2 +-
fs/smb/server/smb2pdu.c | 2 +-
fs/smb/server/smb2pdu.h | 2 +-
fs/smb/server/transport_rdma.c | 25 ++++++++----
include/linux/usb/tcpci.h | 1 +
kernel/module/main.c | 14 +++++--
sound/usb/stream.c | 11 +++++-
42 files changed, 212 insertions(+), 95 deletions(-)
This is the start of the stable review cycle for the 6.1.52 release.
There are 31 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Sep 2023 18:29:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.1.52-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.1.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.1.52-rc1
Mario Limonciello <mario.limonciello(a)amd.com>
pinctrl: amd: Don't show `Invalid config param` errors
Marco Felsch <m.felsch(a)pengutronix.de>
usb: typec: tcpci: clear the fault status bit
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers()
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
dt-bindings: sc16is7xx: Add property to change GPIO function
Badhri Jagan Sridharan <badhri(a)google.com>
tcpm: Avoid soft reset when partner does not support get_status
Juerg Haefliger <juerg.haefliger(a)canonical.com>
fsi: master-ast-cf: Add MODULE_FIRMWARE macro
Wang Ming <machel(a)vivo.com>
firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix bug when first setting GPIO direction
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: fix broken port 0 uart init
Johan Hovold <johan+linaro(a)kernel.org>
serial: qcom-geni: fix opp vote on shutdown
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: fix skb leak by txs missing in AMSDU
Deren Wu <deren.wu(a)mediatek.com>
wifi: mt76: mt7921: do not support one stream on secondary antenna only
Zheng Wang <zyytlz.wz(a)163.com>
Bluetooth: btsdio: fix use after free bug in btsdio_remove due to race condition
Nam Cao <namcaov(a)gmail.com>
staging: rtl8712: fix race condition
Aaron Armstrong Skomra <aaron.skomra(a)wacom.com>
HID: wacom: remove the battery when the EKR is off
Xu Yang <xu.yang_2(a)nxp.com>
usb: chipidea: imx: improve logic if samsung,picophy-* parameter is 0
Luke Lu <luke.lu(a)libre.computer>
usb: dwc3: meson-g12a: do post init to fix broken usb after resumption
Takashi Iwai <tiwai(a)suse.de>
ALSA: usb-audio: Fix init call orders for UAC1
Slark Xiao <slark_xiao(a)163.com>
USB: serial: option: add FOXCONN T99W368/T99W373 product
Martin Kohn <m.kohn(a)welotec.com>
USB: serial: option: add Quectel EM05G variant (0x030e)
Christoph Hellwig <hch(a)lst.de>
modules: only allow symbol_get of EXPORT_SYMBOL_GPL modules
Christoph Hellwig <hch(a)lst.de>
rtc: ds1685: use EXPORT_SYMBOL_GPL for ds1685_rtc_poweroff
Christoph Hellwig <hch(a)lst.de>
net: enetc: use EXPORT_SYMBOL_GPL for enetc_phc_index
Christoph Hellwig <hch(a)lst.de>
mmc: au1xmmc: force non-modular build and remove symbol_get usage
Arnd Bergmann <arnd(a)arndb.de>
ARM: pxa: remove use of symbol_get()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: reduce descriptor size if remaining bytes is less than request size
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: replace one-element array with flex-array member in struct smb2_ea_info
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix slub overflow in ksmbd_decode_ntlmssp_auth_blob()
Namjae Jeon <linkinjeon(a)kernel.org>
ksmbd: fix wrong DataOffset validation of create context
Gao Xiang <xiang(a)kernel.org>
erofs: ensure that the post-EOF tails are all zeroed
-------------
Diffstat:
.../devicetree/bindings/serial/nxp,sc16is7xx.txt | 46 ++++++++++++++++++++++
Makefile | 4 +-
arch/arm/mach-pxa/sharpsl_pm.c | 2 -
arch/arm/mach-pxa/spitz.c | 14 +------
arch/mips/alchemy/devboards/db1000.c | 8 +---
arch/mips/alchemy/devboards/db1200.c | 19 +--------
arch/mips/alchemy/devboards/db1300.c | 10 +----
drivers/bluetooth/btsdio.c | 1 +
drivers/firmware/stratix10-svc.c | 2 +-
drivers/fsi/fsi-master-ast-cf.c | 1 +
drivers/hid/wacom.h | 1 +
drivers/hid/wacom_sys.c | 25 ++++++++++--
drivers/hid/wacom_wac.c | 1 +
drivers/hid/wacom_wac.h | 1 +
drivers/mmc/host/Kconfig | 5 ++-
drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +-
.../net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +++-
drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +-
drivers/pinctrl/pinctrl-amd.c | 4 +-
drivers/rtc/rtc-ds1685.c | 2 +-
drivers/staging/rtl8712/os_intfs.c | 1 +
drivers/staging/rtl8712/usb_intf.c | 1 -
drivers/tty/serial/qcom_geni_serial.c | 5 +++
drivers/tty/serial/sc16is7xx.c | 17 +++++++-
drivers/usb/chipidea/ci_hdrc_imx.c | 10 +++--
drivers/usb/chipidea/usbmisc_imx.c | 6 ++-
drivers/usb/dwc3/dwc3-meson-g12a.c | 6 +++
drivers/usb/serial/option.c | 7 ++++
drivers/usb/typec/tcpm/tcpci.c | 4 ++
drivers/usb/typec/tcpm/tcpm.c | 7 ++++
fs/erofs/zdata.c | 2 +
fs/nilfs2/alloc.c | 3 +-
fs/nilfs2/inode.c | 7 +++-
fs/nilfs2/segment.c | 5 +++
fs/smb/server/auth.c | 3 ++
fs/smb/server/oplock.c | 2 +-
fs/smb/server/smb2pdu.c | 2 +-
fs/smb/server/smb2pdu.h | 2 +-
fs/smb/server/transport_rdma.c | 25 ++++++++----
include/linux/usb/tcpci.h | 1 +
kernel/module/main.c | 14 +++++--
sound/usb/stream.c | 11 +++++-
42 files changed, 209 insertions(+), 89 deletions(-)
With the recent removal of vm_dev from devres its memory is only freed
via the callback virtio_mmio_release_dev. However, this only takes
effect after device_add is called by register_virtio_device. Until then
it's an unmanaged resource and must be explicitly freed on error exit.
This bug was discovered and resolved using Coverity Static Analysis
Security Testing (SAST) by Synopsys, Inc.
Cc: stable(a)vger.kernel.org
Fixes: 55c91fedd03d ("virtio-mmio: don't break lifecycle of vm_dev")
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
---
Please note that I have only compile tested this code.
drivers/virtio/virtio_mmio.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 97760f611295..b2a48d07e973 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -631,13 +631,16 @@ static int virtio_mmio_probe(struct platform_device *pdev)
spin_lock_init(&vm_dev->lock);
vm_dev->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(vm_dev->base))
+ if (IS_ERR(vm_dev->base)) {
+ kfree(vm_dev);
return PTR_ERR(vm_dev->base);
+ }
/* Check magic value */
magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
+ kfree(vm_dev);
return -ENODEV;
}
@@ -646,6 +649,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
if (vm_dev->version < 1 || vm_dev->version > 2) {
dev_err(&pdev->dev, "Version %ld not supported!\n",
vm_dev->version);
+ kfree(vm_dev);
return -ENXIO;
}
@@ -655,6 +659,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
* virtio-mmio device with an ID 0 is a (dummy) placeholder
* with no function. End probing now with no error reported.
*/
+ kfree(vm_dev);
return -ENODEV;
}
vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
--
2.40.1
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
From: Zi Yan <ziy(a)nvidia.com>
__flush_dcache_pages() is called during hugetlb migration via
migrate_pages() -> migrate_hugetlbs() -> unmap_and_move_huge_page()
-> move_to_new_folio() -> flush_dcache_folio(). And with hugetlb and
without sparsemem vmemmap, struct page is not guaranteed to be contiguous
beyond a section. Use nth_page() instead.
Fixes: 15fa3e8e3269 ("mips: implement the new page table range API")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
arch/mips/mm/cache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 02042100e267..7f830634dbe7 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -117,7 +117,7 @@ void __flush_dcache_pages(struct page *page, unsigned int nr)
* get faulted into the tlb (and thus flushed) anyways.
*/
for (i = 0; i < nr; i++) {
- addr = (unsigned long)kmap_local_page(page + i);
+ addr = (unsigned long)kmap_local_page(nth_page(page, i));
flush_data_cache_page(addr);
kunmap_local((void *)addr);
}
--
2.40.1
From: Zi Yan <ziy(a)nvidia.com>
When dealing with hugetlb pages, manipulating struct page pointers
directly can get to wrong struct page, since struct page is not guaranteed
to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle
it properly.
Fixes: 57a196a58421 ("hugetlb: simplify hugetlb handling in follow_page_mask")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
---
mm/hugetlb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2e7188876672..2521cc694fd4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6489,7 +6489,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
}
}
- page += ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+ page = nth_page(page, ((address & ~huge_page_mask(h)) >> PAGE_SHIFT));
/*
* Note that page may be a sub-page, and with vmemmap
--
2.40.1
From: Zi Yan <ziy(a)nvidia.com>
When dealing with hugetlb pages, manipulating struct page pointers
directly can get to wrong struct page, since struct page is not guaranteed
to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle
it properly.
Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
---
mm/cma.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/cma.c b/mm/cma.c
index da2967c6a223..2b2494fd6b59 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -505,7 +505,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
*/
if (page) {
for (i = 0; i < count; i++)
- page_kasan_tag_reset(page + i);
+ page_kasan_tag_reset(nth_page(page, i));
}
if (ret && !no_warn) {
--
2.40.1
This patch will lookup existing nodes instead of always creating them
when dlm_midcomms_addr() is called. The idea is here to create midcomms
nodes when user space getting informed that nodes joins the cluster. This
is the case when dlm_midcomms_addr() is called, however it can be called
multiple times by user space to add several address configurations to one
node e.g. when using SCTP. Those multiple times need to be filtered out
and we doing that by looking up if the node exists before. Due configfs
entry it is safe that this function gets only called once at a time.
Cc: stable(a)vger.kernel.org
Fixes: 63e711b08160 ("fs: dlm: create midcomms nodes when configure")
Signed-off-by: Alexander Aring <aahringo(a)redhat.com>
---
fs/dlm/midcomms.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index f641b36a36db..455265c6ba53 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -337,13 +337,21 @@ static struct midcomms_node *nodeid2node(int nodeid)
int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
{
- int ret, r = nodeid_hash(nodeid);
+ int ret, idx, r = nodeid_hash(nodeid);
struct midcomms_node *node;
ret = dlm_lowcomms_addr(nodeid, addr, len);
if (ret)
return ret;
+ idx = srcu_read_lock(&nodes_srcu);
+ node = __find_node(nodeid, r);
+ if (node) {
+ srcu_read_unlock(&nodes_srcu, idx);
+ return 0;
+ }
+ srcu_read_unlock(&nodes_srcu, idx);
+
node = kmalloc(sizeof(*node), GFP_NOFS);
if (!node)
return -ENOMEM;
--
2.31.1
bvec_set_page has the following signature:
static inline void bvec_set_page(struct bio_vec *bv, struct page *page,
unsigned int len, unsigned int offset)
However, the usage in DRBD swaps the len and offset parameters. This
leads to a bvec with length=0 instead of the intended length=4096, which
causes sock_sendmsg to return -EIO.
This leaves DRBD unable to transmit any pages and thus completely
broken.
Swapping the parameters fixes the regression.
Fixes: eeac7405c735 ("drbd: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage()")
Reported-by: Serguei Ivantsov <manowar(a)gsc-game.com>
Link: https://lore.kernel.org/regressions/CAKH+VT3YLmAn0Y8=q37UTDShqxDLsqPcQ4hBMz…
Cc: stable(a)vger.kernel.org
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
---
drivers/block/drbd/drbd_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 79ab532aabaf..6bc86106c7b2 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1557,7 +1557,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
do {
int sent;
- bvec_set_page(&bvec, page, offset, len);
+ bvec_set_page(&bvec, page, len, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
sent = sock_sendmsg(socket, &msg);
--
2.41.0
From: Johannes Berg <johannes.berg(a)intel.com>
[ Upstream commit 34d4e3eb67fed9c19719bedb748e5a8b6ccc97a5 ]
Since links are only controlled by userspace via cfg80211
in AP mode, also only remove them from the driver in that
case.
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman(a)intel.com>
Link: https://lore.kernel.org/r/20230608163202.ed65b94916fa.I2458c46888284cc5ce30…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
net/wireless/util.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 39680e7bad45a..f433f3fdd9e94 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
* Copyright 2007-2009 Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -2479,6 +2479,13 @@ void cfg80211_remove_links(struct wireless_dev *wdev)
{
unsigned int link_id;
+ /*
+ * links are controlled by upper layers (userspace/cfg)
+ * only for AP mode, so only remove them here for AP
+ */
+ if (wdev->iftype != NL80211_IFTYPE_AP)
+ return;
+
wdev_lock(wdev);
if (wdev->valid_links) {
for_each_valid_link(wdev, link_id)
--
2.40.1
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix csid-gen2 for test pattern generator
Author: Andrey Konovalov <andrey.konovalov(a)linaro.org>
Date: Wed Aug 30 16:16:15 2023 +0100
In the current driver csid Test Pattern Generator (TPG) doesn't work.
This change:
- fixes writing frame width and height values into CSID_TPG_DT_n_CFG_0
- fixes the shift by one between test_pattern control value and the
actual pattern.
- drops fixed VC of 0x0a which testing showed prohibited some test
patterns in the CSID to produce output.
So that TPG starts working, but with the below limitations:
- only test_pattern=9 works as it should
- test_pattern=8 and test_pattern=7 produce black frame (all zeroes)
- the rest of test_pattern's don't work (yavta doesn't get the data)
- regardless of the CFA pattern set by 'media-ctl -V' the actual pixel
order is always the same (RGGB for any RAW8 or RAW10P format in
4608x2592 resolution).
Tested with:
RAW10P format, VC0:
media-ctl -V '"msm_csid0":0[fmt:SRGGB10/4608x2592 field:none]'
media-ctl -V '"msm_vfe0_rdi0":0[fmt:SRGGB10/4608x2592 field:none]'
media-ctl -l '"msm_csid0":1->"msm_vfe0_rdi0":0[1]'
v4l2-ctl -d /dev/v4l-subdev6 -c test_pattern=9
yavta -B capture-mplane --capture=3 -n 3 -f SRGGB10P -s 4608x2592 /dev/video0
RAW10P format, VC1:
media-ctl -V '"msm_csid0":2[fmt:SRGGB10/4608x2592 field:none]'
media-ctl -V '"msm_vfe0_rdi1":0[fmt:SRGGB10/4608x2592 field:none]'
media-ctl -l '"msm_csid0":2->"msm_vfe0_rdi1":0[1]'
v4l2-ctl -d /dev/v4l-subdev6 -c test_pattern=9
yavta -B capture-mplane --capture=3 -n 3 -f SRGGB10P -s 4608x2592 /dev/video1
RAW8 format, VC0:
media-ctl --reset
media-ctl -V '"msm_csid0":0[fmt:SRGGB8/4608x2592 field:none]'
media-ctl -V '"msm_vfe0_rdi0":0[fmt:SRGGB8/4608x2592 field:none]'
media-ctl -l '"msm_csid0":1->"msm_vfe0_rdi0":0[1]'
yavta -B capture-mplane --capture=3 -n 3 -f SRGGB8 -s 4608x2592 /dev/video0
Fixes: eebe6d00e9bf ("media: camss: Add support for CSID hardware version Titan 170")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrey Konovalov <andrey.konovalov(a)linaro.org>
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss-csid-gen2.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen2.c b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
index efc68f8b4de9..23acc387be5f 100644
--- a/drivers/media/platform/qcom/camss/camss-csid-gen2.c
+++ b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
@@ -355,9 +355,6 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc)
u8 dt_id = vc;
if (tg->enabled) {
- /* Config Test Generator */
- vc = 0xa;
-
/* configure one DT, infinite frames */
val = vc << TPG_VC_CFG0_VC_NUM;
val |= INTELEAVING_MODE_ONE_SHOT << TPG_VC_CFG0_LINE_INTERLEAVING_MODE;
@@ -370,14 +367,14 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc)
writel_relaxed(0x12345678, csid->base + CSID_TPG_LFSR_SEED);
- val = input_format->height & 0x1fff << TPG_DT_n_CFG_0_FRAME_HEIGHT;
- val |= input_format->width & 0x1fff << TPG_DT_n_CFG_0_FRAME_WIDTH;
+ val = (input_format->height & 0x1fff) << TPG_DT_n_CFG_0_FRAME_HEIGHT;
+ val |= (input_format->width & 0x1fff) << TPG_DT_n_CFG_0_FRAME_WIDTH;
writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_0(0));
val = format->data_type << TPG_DT_n_CFG_1_DATA_TYPE;
writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_1(0));
- val = tg->mode << TPG_DT_n_CFG_2_PAYLOAD_MODE;
+ val = (tg->mode - 1) << TPG_DT_n_CFG_2_PAYLOAD_MODE;
val |= 0xBE << TPG_DT_n_CFG_2_USER_SPECIFIED_PAYLOAD;
val |= format->decode_format << TPG_DT_n_CFG_2_ENCODE_FORMAT;
writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_2(0));
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix set CSI2_RX_CFG1_VC_MODE when VC is greater than 3
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Wed Aug 30 16:16:14 2023 +0100
VC_MODE = 0 implies a two bit VC address.
VC_MODE = 1 is required for VCs with a larger address than two bits.
Fixes: eebe6d00e9bf ("media: camss: Add support for CSID hardware version Titan 170")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss-csid-gen2.c | 2 ++
1 file changed, 2 insertions(+)
---
diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen2.c b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
index 0f8ac29d038d..efc68f8b4de9 100644
--- a/drivers/media/platform/qcom/camss/camss-csid-gen2.c
+++ b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
@@ -449,6 +449,8 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc)
writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG0);
val = 1 << CSI2_RX_CFG1_PACKET_ECC_CORRECTION_EN;
+ if (vc > 3)
+ val |= 1 << CSI2_RX_CFG1_VC_MODE;
val |= 1 << CSI2_RX_CFG1_MISR_EN;
writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG1);
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix invalid clock enable bit disjunction
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Wed Aug 30 16:16:13 2023 +0100
define CSIPHY_3PH_CMN_CSI_COMMON_CTRL5_CLK_ENABLE BIT(7)
disjunction for gen2 ? BIT(7) : is a nop we are setting the same bit
either way.
Fixes: 4abb21309fda ("media: camss: csiphy: Move to hardcode CSI Clock Lane number")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c b/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c
index 04baa80494c6..4dba61b8d3f2 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c
@@ -476,7 +476,7 @@ static void csiphy_lanes_enable(struct csiphy_device *csiphy,
settle_cnt = csiphy_settle_cnt_calc(link_freq, csiphy->timer_clk_rate);
- val = is_gen2 ? BIT(7) : CSIPHY_3PH_CMN_CSI_COMMON_CTRL5_CLK_ENABLE;
+ val = CSIPHY_3PH_CMN_CSI_COMMON_CTRL5_CLK_ENABLE;
for (i = 0; i < c->num_data; i++)
val |= BIT(c->data[i].pos * 2);
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix pm_domain_on sequence in probe
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Wed Aug 30 16:16:06 2023 +0100
We need to make sure camss_configure_pd() happens before
camss_register_entities() as the vfe_get() path relies on the pointer
provided by camss_configure_pd().
Fix the ordering sequence in probe to ensure the pointers vfe_get() demands
are present by the time camss_register_entities() runs.
In order to facilitate backporting to stable kernels I've moved the
configure_pd() call pretty early on the probe() function so that
irrespective of the existence of the old error handling jump labels this
patch should still apply to -next circa Aug 2023 to v5.13 inclusive.
Fixes: 2f6f8af67203 ("media: camss: Refactor VFE power domain toggling")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c
index f11dc59135a5..75991d849b57 100644
--- a/drivers/media/platform/qcom/camss/camss.c
+++ b/drivers/media/platform/qcom/camss/camss.c
@@ -1619,6 +1619,12 @@ static int camss_probe(struct platform_device *pdev)
if (ret < 0)
goto err_cleanup;
+ ret = camss_configure_pd(camss);
+ if (ret < 0) {
+ dev_err(dev, "Failed to configure power domains: %d\n", ret);
+ goto err_cleanup;
+ }
+
ret = camss_init_subdevices(camss);
if (ret < 0)
goto err_cleanup;
@@ -1678,12 +1684,6 @@ static int camss_probe(struct platform_device *pdev)
}
}
- ret = camss_configure_pd(camss);
- if (ret < 0) {
- dev_err(dev, "Failed to configure power domains: %d\n", ret);
- return ret;
- }
-
pm_runtime_enable(dev);
return 0;
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix missing vfe_lite clocks check
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Wed Aug 30 16:16:12 2023 +0100
check_clock doesn't account for vfe_lite which means that vfe_lite will
never get validated by this routine. Add the clock name to the expected set
to remediate.
Fixes: 7319cdf189bb ("media: camss: Add support for VFE hardware version Titan 170")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss-vfe.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c
index 5d8462ec0af2..965500b83d07 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -535,7 +535,8 @@ static int vfe_check_clock_rates(struct vfe_device *vfe)
struct camss_clock *clock = &vfe->clock[i];
if (!strcmp(clock->name, "vfe0") ||
- !strcmp(clock->name, "vfe1")) {
+ !strcmp(clock->name, "vfe1") ||
+ !strcmp(clock->name, "vfe_lite")) {
u64 min_rate = 0;
unsigned long rate;
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix VFE-17x vfe_disable_output()
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Wed Aug 30 16:16:10 2023 +0100
There are two problems with the current vfe_disable_output() routine.
Firstly we rightly use a spinlock to protect output->gen2.active_num
everywhere except for in the IDLE timeout path of vfe_disable_output().
Even if that is not racy "in practice" somehow it is by happenstance not
by design.
Secondly we do not get consistent behaviour from this routine. On
sc8280xp 50% of the time I get "VFE idle timeout - resetting". In this
case the subsequent capture will succeed. The other 50% of the time, we
don't hit the idle timeout, never do the VFE reset and subsequent
captures stall indefinitely.
Rewrite the vfe_disable_output() routine to
- Quiesce write masters with vfe_wm_stop()
- Set active_num = 0
remembering to hold the spinlock when we do so followed by
- Reset the VFE
Testing on sc8280xp and sdm845 shows this to be a valid fix.
Fixes: 7319cdf189bb ("media: camss: Add support for VFE hardware version Titan 170")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss-vfe-170.c | 22 +++-------------------
1 file changed, 3 insertions(+), 19 deletions(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss-vfe-170.c b/drivers/media/platform/qcom/camss/camss-vfe-170.c
index 02494c89da91..168baaa80d4e 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe-170.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe-170.c
@@ -7,7 +7,6 @@
* Copyright (C) 2020-2021 Linaro Ltd.
*/
-#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -494,35 +493,20 @@ static int vfe_enable_output(struct vfe_line *line)
return 0;
}
-static int vfe_disable_output(struct vfe_line *line)
+static void vfe_disable_output(struct vfe_line *line)
{
struct vfe_device *vfe = to_vfe(line);
struct vfe_output *output = &line->output;
unsigned long flags;
unsigned int i;
- bool done;
- int timeout = 0;
-
- do {
- spin_lock_irqsave(&vfe->output_lock, flags);
- done = !output->gen2.active_num;
- spin_unlock_irqrestore(&vfe->output_lock, flags);
- usleep_range(10000, 20000);
-
- if (timeout++ == 100) {
- dev_err(vfe->camss->dev, "VFE idle timeout - resetting\n");
- vfe_reset(vfe);
- output->gen2.active_num = 0;
- return 0;
- }
- } while (!done);
spin_lock_irqsave(&vfe->output_lock, flags);
for (i = 0; i < output->wm_num; i++)
vfe_wm_stop(vfe, output->wm_idx[i]);
+ output->gen2.active_num = 0;
spin_unlock_irqrestore(&vfe->output_lock, flags);
- return 0;
+ vfe_reset(vfe);
}
/*
The most critical issue with vm.memfd_noexec=2 (the fact that passing
MFD_EXEC would bypass it entirely[1]) has been fixed in Andrew's
tree[2], but there are still some outstanding issues that need to be
addressed:
* vm.memfd_noexec=2 shouldn't reject old-style memfd_create(2) syscalls
because it will make it far to difficult to ever migrate. Instead it
should imply MFD_EXEC.
* The dmesg warnings are pr_warn_once(), which on most systems means
that they will be used up by systemd or some other boot process and
userspace developers will never see it.
- For the !(flags & (MFD_EXEC | MFD_NOEXEC_SEAL)) case, outputting a
rate-limited message to the kernel log is necessary to tell
userspace that they should add the new flags.
Arguably the most ideal way to deal with the spam concern[3,4]
while still prompting userspace to switch to the new flags would be
to only log the warning once per task or something similar.
However, adding something to task_struct for tracking this would be
needless bloat for a single pr_warn_ratelimited().
So just switch to pr_info_ratelimited() to avoid spamming the log
with something that isn't a real warning. There's lots of
info-level stuff in dmesg, it seems really unlikely that this
should be an actual problem. Most programs are already switching to
the new flags anyway.
- For the vm.memfd_noexec=2 case, we need to log a warning for every
failure because otherwise userspace will have no idea why their
previously working program started returning -EACCES (previously
-EINVAL) from memfd_create(2). pr_warn_once() is simply wrong here.
* The racheting mechanism for vm.memfd_noexec makes it incredibly
unappealing for most users to enable the sysctl because enabling it
on &init_pid_ns means you need a system reboot to unset it. Given the
actual security threat being protected against, CAP_SYS_ADMIN users
being restricted in this way makes little sense.
The argument for this ratcheting by the original author was that it
allows you to have a hierarchical setting that cannot be unset by
child pidnses, but this is not accurate -- changing the parent
pidns's vm.memfd_noexec setting to be more restrictive didn't affect
children.
Instead, switch the vm.memfd_noexec sysctl to be properly
hierarchical and allow CAP_SYS_ADMIN users (in the pidns's owning
userns) to lower the setting as long as it is not lower than the
parent's effective setting. This change also makes it so that
changing a parent pidns's vm.memfd_noexec will affect all
descendants, providing a properly hierarchical setting. The
performance impact of this is incredibly minimal since the maximum
depth of pidns is 32 and it is only checked during memfd_create(2)
and unshare(CLONE_NEWPID).
* The memfd selftests would not exit with a non-zero error code when
certain tests that ran in a forked process (specifically the ones
related to MFD_EXEC and MFD_NOEXEC_SEAL) failed.
[1]: https://lore.kernel.org/all/ZJwcsU0vI-nzgOB_@codewreck.org/
[2]: https://lore.kernel.org/all/20230705063315.3680666-1-jeffxu@google.com/
[3]: https://lore.kernel.org/Y5yS8wCnuYGLHMj4@x1n/
[4]: https://lore.kernel.org/f185bb42-b29c-977e-312e-3349eea15383@linuxfoundatio…
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
---
Changes in v2:
- Make vm.memfd_noexec restrictions properly hierarchical.
- Allow vm.memfd_noexec setting to be lowered by CAP_SYS_ADMIN as long
as it is not lower than the parent's effective setting.
- Fix the logging behaviour related to the new flags and
vm.memfd_noexec=2.
- Add more thorough tests for vm.memfd_noexec in selftests.
- v1: <https://lore.kernel.org/r/20230713143406.14342-1-cyphar@cyphar.com>
---
Aleksa Sarai (5):
selftests: memfd: error out test process when child test fails
memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2
memfd: improve userspace warnings for missing exec-related flags
memfd: replace ratcheting feature from vm.memfd_noexec with hierarchy
selftests: improve vm.memfd_noexec sysctl tests
include/linux/pid_namespace.h | 39 ++--
kernel/pid.c | 3 +
kernel/pid_namespace.c | 6 +-
kernel/pid_sysctl.h | 28 ++-
mm/memfd.c | 33 ++-
tools/testing/selftests/memfd/memfd_test.c | 332 +++++++++++++++++++++++------
6 files changed, 322 insertions(+), 119 deletions(-)
---
base-commit: 3ff995246e801ea4de0a30860a1d8da4aeb538e7
change-id: 20230803-memfd-vm-noexec-uapi-fixes-ace725c67b0f
Best regards,
--
Aleksa Sarai <cyphar(a)cyphar.com>
Reset the i2c controller when an i2c transfer timeout occurs.
The remaining interrupts and device should be reset to avoid
unpredictable controller behavior.
Fixes: 2e57b7cebb98 ("i2c: aspeed: Add multi-master use case support")
Cc: Jae Hyun Yoo <jae.hyun.yoo(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v5.1+
Signed-off-by: Tommy Huang <tommy_huang(a)aspeedtech.com>
---
drivers/i2c/busses/i2c-aspeed.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 2e5acfeb76c8..5a416b39b818 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -698,13 +698,16 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
if (time_left == 0) {
/*
- * If timed out and bus is still busy in a multi master
- * environment, attempt recovery at here.
+ * In a multi-master setup, if a timeout occurs, attempt
+ * recovery. But if the bus is idle, we still need to reset the
+ * i2c controller to clear the remaining interrupts.
*/
if (bus->multi_master &&
(readl(bus->base + ASPEED_I2C_CMD_REG) &
ASPEED_I2CD_BUS_BUSY_STS))
aspeed_i2c_recover_bus(bus);
+ else
+ aspeed_i2c_reset(bus);
/*
* If timed out and the state is still pending, drop the pending
--
2.25.1
There are two places in apply_below_the_range() where it's possible for
a divide by zero error to occur. So, to fix this make sure the divisor
is non-zero before attempting the computation in both cases.
Cc: stable(a)vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2637
Fixes: a463b263032f ("drm/amd/display: Fix frames_to_insert math")
Fixes: ded6119e825a ("drm/amd/display: Reinstate LFC optimization")
Signed-off-by: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
---
drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index dbd60811f95d..ef3a67409021 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -338,7 +338,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
* - Delta for CEIL: delta_from_mid_point_in_us_1
* - Delta for FLOOR: delta_from_mid_point_in_us_2
*/
- if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) {
+ if (mid_point_frames_ceil &&
+ (last_render_time_in_us / mid_point_frames_ceil) <
+ in_out_vrr->min_duration_in_us) {
/* Check for out of range.
* If using CEIL produces a value that is out of range,
* then we are forced to use FLOOR.
@@ -385,8 +387,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
/* Either we've calculated the number of frames to insert,
* or we need to insert min duration frames
*/
- if (last_render_time_in_us / frames_to_insert <
- in_out_vrr->min_duration_in_us){
+ if (frames_to_insert &&
+ (last_render_time_in_us / frames_to_insert) <
+ in_out_vrr->min_duration_in_us){
frames_to_insert -= (frames_to_insert > 1) ?
1 : 0;
}
--
2.41.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The function tracefs_create_dir() was missing a lockdown check and was
called by the RV code. This gave an inconsistent behavior of this function
returning success while other tracefs functions failed. This caused the
inode being freed by the wrong kmem_cache.
Link: https://lore.kernel.org/all/202309050916.58201dc6-oliver.sang@intel.com/
Cc: stable(a)vger.kernel.org
Fixes: bf8e602186ec4 ("tracing: Do not create tracefs files if tracefs lockdown is in effect")
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
fs/tracefs/inode.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index de5b72216b1a..3b8dd938b1c8 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -673,6 +673,9 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
*/
struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
{
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
return __create_dir(name, parent, &simple_dir_inode_operations);
}
--
2.40.1
The quilt patch titled
Subject: revert "memfd: improve userspace warnings for missing exec-related flags".
has been removed from the -mm tree. Its filename was
revert-memfd-improve-userspace-warnings-for-missing-exec-related-flags.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Andrew Morton <akpm(a)linux-foundation.org>
Subject: revert "memfd: improve userspace warnings for missing exec-related flags".
Date: Sat Sep 2 03:59:31 PM PDT 2023
This warning is telling userspace developers to pass MFD_EXEC and
MFD_NOEXEC_SEAL to memfd_create(). Commit 434ed3350f57 ("memfd: improve
userspace warnings for missing exec-related flags") made the warning more
frequent and visible in the hope that this would accelerate the fixing of
errant userspace.
But the overall effect is to generate far too much dmesg noise.
Fixes: 434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
Reported-by: Damian Tometzki <dtometzki(a)fedoraproject.org>
Closes: https://lkml.kernel.org/r/ZPFzCSIgZ4QuHsSC@fedora.fritz.box
Cc: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Daniel Verkamp <dverkamp(a)chromium.org>
Cc: Jeff Xu <jeffxu(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memfd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/memfd.c~revert-memfd-improve-userspace-warnings-for-missing-exec-related-flags
+++ a/mm/memfd.c
@@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create,
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_info_ratelimited(
+ pr_warn_once(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
_
Patches currently in -mm which might be from akpm(a)linux-foundation.org are
mm-shmem-fix-race-in-shmem_undo_range-w-thp-fix.patch
The quilt patch titled
Subject: rcu: dump vmalloc memory info safely
has been removed from the -mm tree. Its filename was
rcu-dump-vmalloc-memory-info-safely.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Zqiang <qiang.zhang1211(a)gmail.com>
Subject: rcu: dump vmalloc memory info safely
Date: Mon, 4 Sep 2023 18:08:05 +0000
Currently, for double invoke call_rcu(), will dump rcu_head objects memory
info, if the objects is not allocated from the slab allocator, the
vmalloc_dump_obj() will be invoke and the vmap_area_lock spinlock need to
be held, since the call_rcu() can be invoked in interrupt context,
therefore, there is a possibility of spinlock deadlock scenarios.
And in Preempt-RT kernel, the rcutorture test also trigger the following
lockdep warning:
BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0
preempt_count: 1, expected: 0
RCU nest depth: 1, expected: 1
3 locks held by swapper/0/1:
#0: ffffffffb534ee80 (fullstop_mutex){+.+.}-{4:4}, at: torture_init_begin+0x24/0xa0
#1: ffffffffb5307940 (rcu_read_lock){....}-{1:3}, at: rcu_torture_init+0x1ec7/0x2370
#2: ffffffffb536af40 (vmap_area_lock){+.+.}-{3:3}, at: find_vmap_area+0x1f/0x70
irq event stamp: 565512
hardirqs last enabled at (565511): [<ffffffffb379b138>] __call_rcu_common+0x218/0x940
hardirqs last disabled at (565512): [<ffffffffb5804262>] rcu_torture_init+0x20b2/0x2370
softirqs last enabled at (399112): [<ffffffffb36b2586>] __local_bh_enable_ip+0x126/0x170
softirqs last disabled at (399106): [<ffffffffb43fef59>] inet_register_protosw+0x9/0x1d0
Preemption disabled at:
[<ffffffffb58040c3>] rcu_torture_init+0x1f13/0x2370
CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.5.0-rc4-rt2-yocto-preempt-rt+ #15
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x68/0xb0
dump_stack+0x14/0x20
__might_resched+0x1aa/0x280
? __pfx_rcu_torture_err_cb+0x10/0x10
rt_spin_lock+0x53/0x130
? find_vmap_area+0x1f/0x70
find_vmap_area+0x1f/0x70
vmalloc_dump_obj+0x20/0x60
mem_dump_obj+0x22/0x90
__call_rcu_common+0x5bf/0x940
? debug_smp_processor_id+0x1b/0x30
call_rcu_hurry+0x14/0x20
rcu_torture_init+0x1f82/0x2370
? __pfx_rcu_torture_leak_cb+0x10/0x10
? __pfx_rcu_torture_leak_cb+0x10/0x10
? __pfx_rcu_torture_init+0x10/0x10
do_one_initcall+0x6c/0x300
? debug_smp_processor_id+0x1b/0x30
kernel_init_freeable+0x2b9/0x540
? __pfx_kernel_init+0x10/0x10
kernel_init+0x1f/0x150
ret_from_fork+0x40/0x50
? __pfx_kernel_init+0x10/0x10
ret_from_fork_asm+0x1b/0x30
</TASK>
The previous patch fixes this by using the deadlock-safe best-effort
version of find_vm_area. However, in case of failure print the fact that
the pointer was a vmalloc pointer so that we print at least something.
Link: https://lkml.kernel.org/r/20230904180806.1002832-2-joel@joelfernandes.org
Fixes: 98f180837a89 ("mm: Make mem_dump_obj() handle vmalloc() memory")
Signed-off-by: Zqiang <qiang.zhang1211(a)gmail.com>
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Reported-by: Zhen Lei <thunder.leizhen(a)huaweicloud.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Paul E. McKenney <paulmck(a)kernel.org>
Cc: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/util.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/mm/util.c~rcu-dump-vmalloc-memory-info-safely
+++ a/mm/util.c
@@ -1068,7 +1068,9 @@ void mem_dump_obj(void *object)
if (vmalloc_dump_obj(object))
return;
- if (virt_addr_valid(object))
+ if (is_vmalloc_addr(object))
+ type = "vmalloc memory";
+ else if (virt_addr_valid(object))
type = "non-slab/vmalloc memory";
else if (object == NULL)
type = "NULL pointer";
_
Patches currently in -mm which might be from qiang.zhang1211(a)gmail.com are
The quilt patch titled
Subject: memcontrol: ensure memcg acquired by id is properly set up
has been removed from the -mm tree. Its filename was
memcontrol-ensure-memcg-acquired-by-id-is-properly-set-up.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Johannes Weiner <hannes(a)cmpxchg.org>
Subject: memcontrol: ensure memcg acquired by id is properly set up
Date: Wed, 23 Aug 2023 15:54:30 -0700
In the eviction recency check, we attempt to retrieve the memcg to which
the folio belonged when it was evicted, by the memcg id stored in the
shadow entry. However, there is a chance that the retrieved memcg is not
the original memcg that has been killed, but a new one which happens to
have the same id.
This is a somewhat unfortunate, but acceptable and rare inaccuracy in the
heuristics. However, if we retrieve this new memcg between its allocation
and when it is properly attached to the memcg hierarchy, we could run into
the following NULL pointer exception during the memcg hierarchy traversal
done in mem_cgroup_get_nr_swap_pages():
[ 155757.793456] BUG: kernel NULL pointer dereference, address: 00000000000000c0
[ 155757.807568] #PF: supervisor read access in kernel mode
[ 155757.818024] #PF: error_code(0x0000) - not-present page
[ 155757.828482] PGD 401f77067 P4D 401f77067 PUD 401f76067 PMD 0
[ 155757.839985] Oops: 0000 [#1] SMP
[ 155757.887870] RIP: 0010:mem_cgroup_get_nr_swap_pages+0x3d/0xb0
[ 155757.899377] Code: 29 19 4a 02 48 39 f9 74 63 48 8b 97 c0 00 00 00 48 8b b7 58 02 00 00 48 2b b7 c0 01 00 00 48 39 f0 48 0f 4d c6 48 39 d1 74 42 <48> 8b b2 c0 00 00 00 48 8b ba 58 02 00 00 48 2b ba c0 01 00 00 48
[ 155757.937125] RSP: 0018:ffffc9002ecdfbc8 EFLAGS: 00010286
[ 155757.947755] RAX: 00000000003a3b1c RBX: 000007ffffffffff RCX: ffff888280183000
[ 155757.962202] RDX: 0000000000000000 RSI: 0007ffffffffffff RDI: ffff888bbc2d1000
[ 155757.976648] RBP: 0000000000000001 R08: 000000000000000b R09: ffff888ad9cedba0
[ 155757.991094] R10: ffffea0039c07900 R11: 0000000000000010 R12: ffff888b23a7b000
[ 155758.005540] R13: 0000000000000000 R14: ffff888bbc2d1000 R15: 000007ffffc71354
[ 155758.019991] FS: 00007f6234c68640(0000) GS:ffff88903f9c0000(0000) knlGS:0000000000000000
[ 155758.036356] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 155758.048023] CR2: 00000000000000c0 CR3: 0000000a83eb8004 CR4: 00000000007706e0
[ 155758.062473] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 155758.076924] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 155758.091376] PKRU: 55555554
[ 155758.096957] Call Trace:
[ 155758.102016] <TASK>
[ 155758.106502] ? __die+0x78/0xc0
[ 155758.112793] ? page_fault_oops+0x286/0x380
[ 155758.121175] ? exc_page_fault+0x5d/0x110
[ 155758.129209] ? asm_exc_page_fault+0x22/0x30
[ 155758.137763] ? mem_cgroup_get_nr_swap_pages+0x3d/0xb0
[ 155758.148060] workingset_test_recent+0xda/0x1b0
[ 155758.157133] workingset_refault+0xca/0x1e0
[ 155758.165508] filemap_add_folio+0x4d/0x70
[ 155758.173538] page_cache_ra_unbounded+0xed/0x190
[ 155758.182919] page_cache_sync_ra+0xd6/0x1e0
[ 155758.191738] filemap_read+0x68d/0xdf0
[ 155758.199495] ? mlx5e_napi_poll+0x123/0x940
[ 155758.207981] ? __napi_schedule+0x55/0x90
[ 155758.216095] __x64_sys_pread64+0x1d6/0x2c0
[ 155758.224601] do_syscall_64+0x3d/0x80
[ 155758.232058] entry_SYSCALL_64_after_hwframe+0x46/0xb0
[ 155758.242473] RIP: 0033:0x7f62c29153b5
[ 155758.249938] Code: e8 48 89 75 f0 89 7d f8 48 89 4d e0 e8 b4 e6 f7 ff 41 89 c0 4c 8b 55 e0 48 8b 55 e8 48 8b 75 f0 8b 7d f8 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 45 f8 e8 e7 e6 f7 ff 48 8b
[ 155758.288005] RSP: 002b:00007f6234c5ffd0 EFLAGS: 00000293 ORIG_RAX: 0000000000000011
[ 155758.303474] RAX: ffffffffffffffda RBX: 00007f628c4e70c0 RCX: 00007f62c29153b5
[ 155758.318075] RDX: 000000000003c041 RSI: 00007f61d2986000 RDI: 0000000000000076
[ 155758.332678] RBP: 00007f6234c5fff0 R08: 0000000000000000 R09: 0000000064d5230c
[ 155758.347452] R10: 000000000027d450 R11: 0000000000000293 R12: 000000000003c041
[ 155758.362044] R13: 00007f61d2986000 R14: 00007f629e11b060 R15: 000000000027d450
[ 155758.376661] </TASK>
This patch fixes the issue by moving the memcg's id publication from the
alloc stage to online stage, ensuring that any memcg acquired via id must
be connected to the memcg tree.
Link: https://lkml.kernel.org/r/20230823225430.166925-1-nphamcs@gmail.com
Fixes: f78dfc7b77d5 ("workingset: fix confusion around eviction vs refault container")
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Co-developed-by: Nhat Pham <nphamcs(a)gmail.com>
Signed-off-by: Nhat Pham <nphamcs(a)gmail.com>
Acked-by: Shakeel Butt <shakeelb(a)google.com>
Cc: Yosry Ahmed <yosryahmed(a)google.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
--- a/mm/memcontrol.c~memcontrol-ensure-memcg-acquired-by-id-is-properly-set-up
+++ a/mm/memcontrol.c
@@ -5326,7 +5326,6 @@ static struct mem_cgroup *mem_cgroup_all
INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
memcg->deferred_split_queue.split_queue_len = 0;
#endif
- idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
lru_gen_init_memcg(memcg);
return memcg;
fail:
@@ -5398,14 +5397,27 @@ static int mem_cgroup_css_online(struct
if (alloc_shrinker_info(memcg))
goto offline_kmem;
- /* Online state pins memcg ID, memcg ID pins CSS */
- refcount_set(&memcg->id.ref, 1);
- css_get(css);
-
if (unlikely(mem_cgroup_is_root(memcg)))
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
FLUSH_TIME);
lru_gen_online_memcg(memcg);
+
+ /* Online state pins memcg ID, memcg ID pins CSS */
+ refcount_set(&memcg->id.ref, 1);
+ css_get(css);
+
+ /*
+ * Ensure mem_cgroup_from_id() works once we're fully online.
+ *
+ * We could do this earlier and require callers to filter with
+ * css_tryget_online(). But right now there are no users that
+ * need earlier access, and the workingset code relies on the
+ * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So
+ * publish it here at the end of onlining. This matches the
+ * regular ID destruction during offlining.
+ */
+ idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+
return 0;
offline_kmem:
memcg_offline_kmem(memcg);
_
Patches currently in -mm which might be from hannes(a)cmpxchg.org are
On Tue, Sep 05, 2023 at 05:50:06PM +0200, Cláudio Sampaio wrote:
> Hi Thorsten and Keith,
>
> Thanks for the details. I'm still unsure if responding by email is better
> or adding to the ticket, but here it goes: I have tried for days both with
> complete power off of the machine and cycle-booting all kernels in
> succession and without exception, 6.1.x LTS and the patched 6.5.1 kernel
> always recognize and operate the NVME, whilst the other kernels also fail
> with the same error message. As this is my "production" desktop, though,
> during the week it's more difficult to me to perform tests with it, but I
> will try to do it in a more methodic way and also with 6.5.1 vanilla.
>
> As for the reason the Lexar doesn't catch the quirk default, I can't say I
> catch the complex logic of the driver activation, but I found out how to
> "fix" for my case because there are three other Lexar models in the pci.c
> file: NM610, NM620 and NM760 (this one with an additional quirk marked on
> it on the code, NVME_QUIRK_IGNORE_DEV_SUBNQN) -- so I guess whatever
> justifies the exception for them also justifies for my model, NM790. Might
> even be the case that I would need NVME_QUIRK_IGNORE_DEV_SUBNQN (not sure
> what it does) like in the NM760 case, but it activates correctly without it.
The existing Lexar quirks for the identifier existed before the default
kernel behavior changed with respect to how identifiers are considered.
But the report says the device failed to enumerate with a "device not
ready" error message. That error message happens *before* identifiers
are checked, so the quirk should be a no-op with respect to that error
message. And the driver abandons the device after printing that message,
so no futher action should be taken no matter what quirk you've set.
In order for this quirk to have any effect at all, the error you should
have seen should look like a "duplicate IDs" message.
While reading from the tracing/trace, the ftrace reader rarely encounters
a KASAN invalid access issue.
It is likely that the writer has disrupted the ring_buffer that the reader
is currently parsing. the kasan report is as below:
[name:report&]BUG: KASAN: invalid-access in rb_iter_head_event+0x27c/0x3d0
[name:report&]Read of size 4 at addr 71ffff8111a18000 by task xxxx
[name:report_sw_tags&]Pointer tag: [71], memory tag: [0f]
[name:report&]
CPU: 2 PID: 380 Comm: xxxx
Call trace:
dump_backtrace+0x168/0x1b0
show_stack+0x2c/0x3c
dump_stack_lvl+0xa4/0xd4
print_report+0x268/0x9b0
kasan_report+0xdc/0x148
kasan_tag_mismatch+0x28/0x3c
__hwasan_tag_mismatch+0x2c/0x58
rb_event_length() [inline]
rb_iter_head_event+0x27c/0x3d0
ring_buffer_iter_peek+0x23c/0x6e0
__find_next_entry+0x1ac/0x3d8
s_next+0x1f0/0x310
seq_read_iter+0x4e8/0x77c
seq_read+0xf8/0x150
vfs_read+0x1a8/0x4cc
In some edge cases, ftrace reader could access to an invalid address,
specifically when reading 4 bytes beyond the end of the currently page.
While issue happened, the dump of rb_iter_head_event is shown as below:
in function rb_iter_head_event:
- iter->head = 0xFEC
- iter->next_event = 0xFEC
- commit = 0xFF0
- read_stamp = 0x2955AC46DB0
- page_stamp = 0x2955AC2439A
- iter->head_page->page = 0x71FFFF8111A17000
- iter->head_page->time_stamp = 0x2956A142267
- iter->head_page->page->commit = 0xFF0
- the content in iter->head_page->page
0x71FFFF8111A17FF0: 01010075 00002421 0A123B7C FFFFFFC0
In rb_iter_head_event, reader will call rb_event_length with argument
(struct ring_buffer_event *event = 0x71FFFF8111A17FFC).
Since the content data start at address 0x71FFFF8111A17FFC are 0xFFFFFFC0.
event->type will be interpret as 0x0, than the reader will try to get the
length by accessing event->array[0], which is an invalid address:
&event->array[0] = 0x71FFFF8111A18000
Cc: stable(a)vger.kernel.org
Signed-off-by: Tze-nan Wu <Tze-nan.Wu(a)mediatek.com>
---
resend again due to forget cc stable(a)vger.kernel.org
Following patch may not become a solution, it merely checks if the address
to be accessed is valid or not within the rb_event_length before access.
And not sure if there is any side-effect it can lead to.
I am curious about what a better solution for this issue would look like.
Should we address the problem from the writer or the reader?
Also I wonder if the cause of the issue is exactly as I suspected.
Any Suggestion will be appreciated.
Test below can reproduce the issue in 2 hours on kernel-6.1.24:
$cd /sys/kernel/tracing/
# make the reader and writer race more through resize the buffer to 8kb
$echo 8 > buffer_size_kn
# enable all events
$echo 1 > event/enable
# enable trace
$echo 1 > tracing_on
# write and run a script that keep reading trace
$./read_trace.sh
``` read_trace.sh
while :
do
cat /sys/kernel/tracing/trace > /dev/null
done
```
---
kernel/trace/ring_buffer.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 78502d4c7214..ed5ddc3a134b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -200,6 +200,8 @@ rb_event_length(struct ring_buffer_event *event)
if (rb_null_event(event))
/* undefined */
return -1;
+ if (((unsigned long)event & 0xfffUL) >= PAGE_SIZE - 4)
+ return -1;
return event->array[0] + RB_EVNT_HDR_SIZE;
case RINGBUF_TYPE_TIME_EXTEND:
@@ -209,6 +211,8 @@ rb_event_length(struct ring_buffer_event *event)
return RB_LEN_TIME_STAMP;
case RINGBUF_TYPE_DATA:
+ if ((((unsigned long)event & 0xfffUL) >= PAGE_SIZE - 4) && !event->type_len)
+ return -1;
return rb_event_data_length(event);
default:
WARN_ON_ONCE(1);
--
2.18.0