ptp_clock should never be registered unless it stubs one of gettimex64()
or gettime64() and settime64(). WARN_ON_ONCE and error out if either set
of function pointers is null.
Cc: stable(a)vger.kernel.org
Fixes: d7d38f5bd7be ("ptp: use the 64 bit get/set time methods for the posix clock.")
Suggested-by: Kuniyuki Iwashima <kuniyu(a)google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu(a)google.com>
Reviewed-by: Harshitha Ramamurthy <hramamurthy(a)google.com>
Signed-off-by: Tim Hostetler <thostet(a)google.com>
---
drivers/ptp/ptp_clock.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index ef020599b771..0bc79076771b 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -325,6 +325,10 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
if (info->n_alarm > PTP_MAX_ALARMS)
return ERR_PTR(-EINVAL);
+ if (WARN_ON_ONCE((!info->gettimex64 && !info->gettime64) ||
+ !info->settime64))
+ return ERR_PTR(-EINVAL);
+
/* Initialize a clock structure. */
ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
if (!ptp) {
--
2.51.1.851.g4ebd6896fd-goog
KASAN reports a global-out-of-bounds access when running these nfit
tests: clear.sh, pmem-errors.sh, pfn-meta-errors.sh, btt-errors.sh,
daxdev-errors.sh, and inject-error.sh.
[] BUG: KASAN: global-out-of-bounds in nfit_test_ctl+0x769f/0x7840 [nfit_test]
[] Read of size 4 at addr ffffffffc03ea01c by task ndctl/1215
[] The buggy address belongs to the variable:
[] handle+0x1c/0x1df4 [nfit_test]
nfit_test_search_spa() uses handle[nvdimm->id] to retrieve a device
handle and triggers a KASAN error when it reads past the end of the
handle array. It should not be indexing the handle array at all.
The correct device handle is stored in per-DIMM test data. Each DIMM
has a struct nfit_mem that embeds a struct acpi_nfit_memdev that
describes the NFIT device handle. Use that device handle here.
Fixes: 10246dc84dfc ("acpi nfit: nfit_test supports translate SPA")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Alison Schofield <alison.schofield(a)intel.com>
---
Changes in v2:
- Use the correct handle in per-DIMM test data (Dan)
- Update commit message and log
- Update Fixes Tag
tools/testing/nvdimm/test/nfit.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cfd4378e2129..f87e9f251d13 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -670,6 +670,7 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
.addr = spa->spa,
.region = NULL,
};
+ struct nfit_mem *nfit_mem;
u64 dpa;
ret = device_for_each_child(&bus->dev, &ctx,
@@ -687,8 +688,12 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
*/
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
nvdimm = nd_mapping->nvdimm;
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ if (!nfit_mem)
+ return -EINVAL;
- spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+ spa->devices[0].nfit_device_handle =
+ __to_nfit_memdev(nfit_mem)->device_handle;
spa->num_nvdimms = 1;
spa->devices[0].dpa = dpa;
base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
--
2.37.3
KASAN reports a global-out-of-bounds access when running these nfit
tests: clear.sh, pmem-errors, pfn-meta-errors.sh, btt-errors.sh,
daxdev-errors.sh, and inject-error.sh.
[] BUG: KASAN: global-out-of-bounds in nfit_test_ctl+0x769f/0x7840 [nfit_test]
[] Read of size 4 at addr ffffffffc03ea01c by task ndctl/1215
[] The buggy address belongs to the variable:
[] handle+0x1c/0x1df4 [nfit_test]
The nfit_test mock platform defines a static table of 7 NFIT DIMM
handles, but nfit_test.0 builds 8 mock DIMMs total (5 DCR + 3 PM).
When the final DIMM (id == 7) is selected, this code:
spa->devices[0].nfit_device_handle = handle[nvdimm->id];
indexes past the end of the 7-entry table, triggering KASAN.
Fix this by adding an eighth entry to the handle[] table and a
defensive bounds check so the test fails cleanly instead of
dereferencing out-of-bounds memory.
To generate a unique handle, the new entry sets the 'imc' field rather
than the 'chan' field. This matches the pattern of earlier entries
and avoids introducing a non-zero 'chan' which is never used in the
table. Computing the new handle shows no collision.
Notes from spelunkering for a Fixes Tag:
Commit 209851649dc4 ("acpi: nfit: Add support for hot-add") increased
the mock DIMMs to eight yet kept the handle[] array at seven.
Commit 10246dc84dfc ("acpi nfit: nfit_test supports translate SPA")
began using the last mock DIMM, triggering the KASAN.
Commit af31b04b67f4 ("tools/testing/nvdimm: Fix the array size for
dimm devices.") addressed a related KASAN warning but not the actual
handle array length.
Fixes: 209851649dc4 ("acpi: nfit: Add support for hot-add")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Alison Schofield <alison.schofield(a)intel.com>
---
tools/testing/nvdimm/test/nfit.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cfd4378e2129..cdbf9e8ee80a 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -129,6 +129,7 @@ static u32 handle[] = {
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
[5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0),
[6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
+ [7] = NFIT_DIMM_HANDLE(1, 0, 1, 0, 1),
};
static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
@@ -688,6 +689,13 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
nvdimm = nd_mapping->nvdimm;
+ if (WARN_ON_ONCE(nvdimm->id >= ARRAY_SIZE(handle))) {
+ dev_err(&bus->dev,
+ "invalid nvdimm->id %u >= handle array size %zu\n",
+ nvdimm->id, ARRAY_SIZE(handle));
+ return -EINVAL;
+ }
+
spa->devices[0].nfit_device_handle = handle[nvdimm->id];
spa->num_nvdimms = 1;
spa->devices[0].dpa = dpa;
base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
--
2.37.3
This is the start of the stable review cycle for the 6.17.7 release.
There are 35 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 02 Nov 2025 14:00:34 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.17.7-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.17.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.17.7-rc1
Menglong Dong <menglong8.dong(a)gmail.com>
arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
Tejun Heo <tj(a)kernel.org>
sched_ext: Make qmap dump operation non-destructive
Filipe Manana <fdmanana(a)suse.com>
btrfs: use smp_mb__after_atomic() when forcing COW in create_pending_snapshot()
Qu Wenruo <wqu(a)suse.com>
btrfs: tree-checker: add inode extref checks
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction if we fail to update inode in log replay dir fixup
Filipe Manana <fdmanana(a)suse.com>
btrfs: use level argument in log tree walk callback replay_one_buffer()
Filipe Manana <fdmanana(a)suse.com>
btrfs: always drop log root tree reference in btrfs_replay_log()
Thorsten Blum <thorsten.blum(a)linux.dev>
btrfs: scrub: replace max_t()/min_t() with clamp() in scrub_throttle_dev_io()
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: refine extent allocator hint selection
Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
btrfs: zoned: return error from btrfs_zone_finish_endio()
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction in the process_one_buffer() log tree walk callback
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction on specific error places when walking log tree
Chen Ridong <chenridong(a)huawei.com>
cpuset: Use new excpus for nocpu error check when enabling root partition
Avadhut Naik <avadhut.naik(a)amd.com>
EDAC/mc_sysfs: Increase legacy channel support to 16
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Fix reporting of LFENCE retpoline
Aaron Lu <ziqianlu(a)bytedance.com>
sched/fair: update_cfs_group() for throttled cfs_rqs
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Add attack vector controls for VMSCAPE
Tejun Heo <tj(a)kernel.org>
sched_ext: Keep bypass on between enable failure and scx_disable_workfn()
Jiri Olsa <jolsa(a)kernel.org>
seccomp: passthrough uprobe systemcall without filtering
Kuan-Wei Chiu <visitorckw(a)gmail.com>
EDAC: Fix wrong executable file modes for C source files
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Skip user unwind if the task is a kernel thread
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Have get_perf_callchain() return NULL if crosstask and user are set
Steven Rostedt <rostedt(a)goodmis.org>
perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL
Dapeng Mi <dapeng1.mi(a)linux.intel.com>
perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into INTEL_FIXED_BITS_MASK
Kyle Manna <kyle(a)kylemanna.com>
EDAC/ie31200: Add two more Intel Alder Lake-S SoCs for EDAC support
Richard Guy Briggs <rgb(a)redhat.com>
audit: record fanotify event regardless of presence of rules
Charles Keepax <ckeepax(a)opensource.cirrus.com>
genirq/manage: Add buslock back in to enable_irq()
Charles Keepax <ckeepax(a)opensource.cirrus.com>
genirq/manage: Add buslock back in to __disable_irq_nosync()
Charles Keepax <ckeepax(a)opensource.cirrus.com>
genirq/chip: Add buslock back in to irq_set_handler()
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Qualify RETBLEED_INTEL_MSG
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Report correct retbleed mitigation status
Haofeng Li <lihaofeng(a)kylinos.cn>
timekeeping: Fix aux clocks sysfs initialization loop bound
Tejun Heo <tj(a)kernel.org>
sched_ext: Sync error_irq_work before freeing scx_sched
Tejun Heo <tj(a)kernel.org>
sched_ext: Put event_stats_cpu in struct scx_sched_pcpu
Tejun Heo <tj(a)kernel.org>
sched_ext: Move internal type and accessor definitions to ext_internal.h
-------------
Diffstat:
.../admin-guide/hw-vuln/attack_vector_controls.rst | 1 +
Makefile | 4 +-
arch/alpha/kernel/asm-offsets.c | 1 +
arch/arc/kernel/asm-offsets.c | 1 +
arch/arm/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/asm-offsets.c | 1 +
arch/csky/kernel/asm-offsets.c | 1 +
arch/hexagon/kernel/asm-offsets.c | 1 +
arch/loongarch/kernel/asm-offsets.c | 2 +
arch/m68k/kernel/asm-offsets.c | 1 +
arch/microblaze/kernel/asm-offsets.c | 1 +
arch/mips/kernel/asm-offsets.c | 2 +
arch/nios2/kernel/asm-offsets.c | 1 +
arch/openrisc/kernel/asm-offsets.c | 1 +
arch/parisc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/riscv/kernel/asm-offsets.c | 1 +
arch/s390/kernel/asm-offsets.c | 1 +
arch/sh/kernel/asm-offsets.c | 1 +
arch/sparc/kernel/asm-offsets.c | 1 +
arch/um/kernel/asm-offsets.c | 2 +
arch/x86/events/intel/core.c | 10 +-
arch/x86/include/asm/perf_event.h | 6 +-
arch/x86/kernel/cpu/bugs.c | 27 +-
arch/x86/kvm/pmu.h | 2 +-
arch/xtensa/kernel/asm-offsets.c | 1 +
drivers/edac/ecs.c | 0
drivers/edac/edac_mc_sysfs.c | 24 +
drivers/edac/ie31200_edac.c | 4 +
drivers/edac/mem_repair.c | 0
drivers/edac/scrub.c | 0
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/extent-tree.c | 6 +-
fs/btrfs/inode.c | 7 +-
fs/btrfs/scrub.c | 3 +-
fs/btrfs/transaction.c | 2 +-
fs/btrfs/tree-checker.c | 37 +
fs/btrfs/tree-log.c | 64 +-
fs/btrfs/zoned.c | 8 +-
fs/btrfs/zoned.h | 9 +-
include/linux/audit.h | 2 +-
kernel/cgroup/cpuset.c | 6 +-
kernel/events/callchain.c | 16 +-
kernel/events/core.c | 7 +-
kernel/irq/chip.c | 2 +-
kernel/irq/manage.c | 4 +-
kernel/sched/build_policy.c | 1 +
kernel/sched/ext.c | 1056 +------------------
kernel/sched/ext.h | 23 -
kernel/sched/ext_internal.h | 1064 ++++++++++++++++++++
kernel/sched/fair.c | 3 -
kernel/seccomp.c | 32 +-
kernel/time/timekeeping.c | 2 +-
tools/sched_ext/scx_qmap.bpf.c | 18 +-
54 files changed, 1326 insertions(+), 1150 deletions(-)
From: Quentin Schulz <quentin.schulz(a)cherry.de>
In commit 296602b8e5f7 ("arm64: dts: rockchip: Move RK3399 OPPs to dtsi
files for SoC variants"), everything shared between variants of RK3399
was put into rk3399-base.dtsi and the rest in variant-specific DTSI,
such as rk3399-t, rk3399-op1, rk3399, etc.
Therefore, the variant-specific DTSI should include rk3399-base.dtsi and
not another variant's DTSI.
rk3399-op1 wrongly includes rk3399 (a variant) DTSI instead of
rk3399-base DTSI, let's fix this oversight by including the intended
DTSI.
Fortunately, this had no impact on the resulting DTB since all nodes
were named the same and all node properties were overridden in
rk3399-op1.dtsi. This was checked by doing a checksum of rk3399-op1 DTBs
before and after this commit.
No intended change in behavior.
Fixes: 296602b8e5f7 ("arm64: dts: rockchip: Move RK3399 OPPs to dtsi files for SoC variants")
Cc: stable(a)vger.kernel.org
Signed-off-by: Quentin Schulz <quentin.schulz(a)cherry.de>
---
arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi
index c4f4f1ff6117b..9da6fd82e46b2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi
@@ -3,7 +3,7 @@
* Copyright (c) 2016-2017 Fuzhou Rockchip Electronics Co., Ltd
*/
-#include "rk3399.dtsi"
+#include "rk3399-base.dtsi"
/ {
cluster0_opp: opp-table-0 {
---
base-commit: e53642b87a4f4b03a8d7e5f8507fc3cd0c595ea6
change-id: 20251029-rk3399-op1-include-b311b4e16909
Best regards,
--
Quentin Schulz <quentin.schulz(a)cherry.de>
This is the start of the stable review cycle for the 6.6.116 release.
There are 32 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 02 Nov 2025 14:00:34 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.6.116-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.6.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.6.116-rc1
William Breathitt Gray <wbg(a)kernel.org>
gpio: idio-16: Define fixed direction of the GPIO lines
Ioana Ciornei <ioana.ciornei(a)nxp.com>
gpio: regmap: add the .fixed_direction_output configuration parameter
Mathieu Dubois-Briand <mathieu.dubois-briand(a)bootlin.com>
gpio: regmap: Allow to allocate regmap-irq device
Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
bits: introduce fixed-type GENMASK_U*()
Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
bits: add comments and newlines to #if, #else and #endif directives
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: dbc: fix bogus 1024 byte prefix if ttyDBC read races with stall event
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: dbc: Avoid event polling busyloop if pending rx transfers are inactive.
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: dbc: Improve performance by removing delay in transfer event polling.
Uday M Bhat <uday.m.bhat(a)intel.com>
xhci: dbc: Allow users to modify DbC poll interval via sysfs
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: dbc: poll at different rate depending on data transfer activity
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: remove useless enable of enhanced features
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: refactor EFR lock
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: reorder code to remove prototype declarations
Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
serial: sc16is7xx: remove unused to_sc16is7xx_port macro
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: join: mark 'delete re-add signal' as skipped if not supported
Geliang Tang <tanggeliang(a)kylinos.cn>
selftests: mptcp: disable add_addr retrans in endpoint_tests
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR
Menglong Dong <menglong8.dong(a)gmail.com>
arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
Filipe Manana <fdmanana(a)suse.com>
btrfs: use smp_mb__after_atomic() when forcing COW in create_pending_snapshot()
Filipe Manana <fdmanana(a)suse.com>
btrfs: use level argument in log tree walk callback replay_one_buffer()
Filipe Manana <fdmanana(a)suse.com>
btrfs: always drop log root tree reference in btrfs_replay_log()
Thorsten Blum <thorsten.blum(a)linux.dev>
btrfs: scrub: replace max_t()/min_t() with clamp() in scrub_throttle_dev_io()
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: refine extent allocator hint selection
Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
btrfs: zoned: return error from btrfs_zone_finish_endio()
Avadhut Naik <avadhut.naik(a)amd.com>
EDAC/mc_sysfs: Increase legacy channel support to 16
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Fix reporting of LFENCE retpoline
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Report correct retbleed mitigation status
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Skip user unwind if the task is a kernel thread
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Have get_perf_callchain() return NULL if crosstask and user are set
Steven Rostedt <rostedt(a)goodmis.org>
perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL
Richard Guy Briggs <rgb(a)redhat.com>
audit: record fanotify event regardless of presence of rules
Xiang Mei <xmei5(a)asu.edu>
net/sched: sch_qfq: Fix null-deref in agg_dequeue
-------------
Diffstat:
.../ABI/testing/sysfs-bus-pci-drivers-xhci_hcd | 10 ++
Makefile | 4 +-
arch/alpha/kernel/asm-offsets.c | 1 +
arch/arc/kernel/asm-offsets.c | 1 +
arch/arm/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/asm-offsets.c | 1 +
arch/csky/kernel/asm-offsets.c | 1 +
arch/hexagon/kernel/asm-offsets.c | 1 +
arch/loongarch/kernel/asm-offsets.c | 2 +
arch/m68k/kernel/asm-offsets.c | 1 +
arch/microblaze/kernel/asm-offsets.c | 1 +
arch/mips/kernel/asm-offsets.c | 2 +
arch/nios2/kernel/asm-offsets.c | 1 +
arch/openrisc/kernel/asm-offsets.c | 1 +
arch/parisc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/riscv/kernel/asm-offsets.c | 1 +
arch/s390/kernel/asm-offsets.c | 1 +
arch/sh/kernel/asm-offsets.c | 1 +
arch/sparc/kernel/asm-offsets.c | 1 +
arch/um/kernel/asm-offsets.c | 2 +
arch/x86/kernel/cpu/bugs.c | 9 +-
arch/xtensa/kernel/asm-offsets.c | 1 +
drivers/edac/edac_mc_sysfs.c | 24 +++
drivers/gpio/gpio-idio-16.c | 5 +
drivers/gpio/gpio-regmap.c | 53 +++++-
drivers/tty/serial/sc16is7xx.c | 185 ++++++++++-----------
drivers/usb/host/xhci-dbgcap.c | 70 +++++++-
drivers/usb/host/xhci-dbgcap.h | 7 +-
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/extent-tree.c | 6 +-
fs/btrfs/inode.c | 7 +-
fs/btrfs/scrub.c | 3 +-
fs/btrfs/transaction.c | 2 +-
fs/btrfs/tree-log.c | 9 +-
fs/btrfs/zoned.c | 8 +-
fs/btrfs/zoned.h | 9 +-
include/linux/audit.h | 2 +-
include/linux/bitops.h | 1 -
include/linux/bits.h | 38 ++++-
include/linux/gpio/regmap.h | 16 ++
include/net/pkt_sched.h | 25 ++-
kernel/events/callchain.c | 16 +-
kernel/events/core.c | 7 +-
net/mptcp/pm_netlink.c | 6 +
net/sched/sch_api.c | 10 --
net/sched/sch_hfsc.c | 16 --
net/sched/sch_qfq.c | 2 +-
tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 +-
49 files changed, 405 insertions(+), 174 deletions(-)
This is the start of the stable review cycle for the 6.12.57 release.
There are 40 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 02 Nov 2025 14:00:34 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.12.57-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.12.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.12.57-rc1
Edward Cree <ecree.xilinx(a)gmail.com>
sfc: fix NULL dereferences in ef100_process_design_param()
Xiaogang Chen <xiaogang.chen(a)amd.com>
udmabuf: fix a buf size overflow issue during udmabuf creation
Aditya Kumar Singh <quic_adisi(a)quicinc.com>
wifi: ath12k: fix read pointer after free in ath12k_mac_assign_vif_to_vdev()
Kees Bakker <kees(a)ijzerbout.nl>
iommu/vt-d: Avoid use of NULL after WARN_ON_ONCE
William Breathitt Gray <wbg(a)kernel.org>
gpio: idio-16: Define fixed direction of the GPIO lines
Ioana Ciornei <ioana.ciornei(a)nxp.com>
gpio: regmap: add the .fixed_direction_output configuration parameter
Mathieu Dubois-Briand <mathieu.dubois-briand(a)bootlin.com>
gpio: regmap: Allow to allocate regmap-irq device
Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
bits: introduce fixed-type GENMASK_U*()
Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
bits: add comments and newlines to #if, #else and #endif directives
Wang Liang <wangliang74(a)huawei.com>
bonding: check xdp prog when set bond mode
Hangbin Liu <liuhangbin(a)gmail.com>
bonding: return detailed error when loading native XDP fails
Alexander Wetzel <Alexander(a)wetzel-home.de>
wifi: cfg80211: Add missing lock in cfg80211_check_and_end_cac()
Chao Yu <chao(a)kernel.org>
f2fs: fix to avoid panic once fallocation fails for pinfile
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: join: mark 'delete re-add signal' as skipped if not supported
Geliang Tang <geliang(a)kernel.org>
selftests: mptcp: disable add_addr retrans in endpoint_tests
Jonathan Corbet <corbet(a)lwn.net>
docs: kdoc: handle the obsolescensce of docutils.ErrorString()
Menglong Dong <menglong8.dong(a)gmail.com>
arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
Tejun Heo <tj(a)kernel.org>
sched_ext: Make qmap dump operation non-destructive
Filipe Manana <fdmanana(a)suse.com>
btrfs: use smp_mb__after_atomic() when forcing COW in create_pending_snapshot()
Qu Wenruo <wqu(a)suse.com>
btrfs: tree-checker: add inode extref checks
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction if we fail to update inode in log replay dir fixup
Filipe Manana <fdmanana(a)suse.com>
btrfs: use level argument in log tree walk callback replay_one_buffer()
Filipe Manana <fdmanana(a)suse.com>
btrfs: always drop log root tree reference in btrfs_replay_log()
Thorsten Blum <thorsten.blum(a)linux.dev>
btrfs: scrub: replace max_t()/min_t() with clamp() in scrub_throttle_dev_io()
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: refine extent allocator hint selection
Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
btrfs: zoned: return error from btrfs_zone_finish_endio()
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction in the process_one_buffer() log tree walk callback
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction on specific error places when walking log tree
Chen Ridong <chenridong(a)huawei.com>
cpuset: Use new excpus for nocpu error check when enabling root partition
Avadhut Naik <avadhut.naik(a)amd.com>
EDAC/mc_sysfs: Increase legacy channel support to 16
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Fix reporting of LFENCE retpoline
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Report correct retbleed mitigation status
Jiri Olsa <jolsa(a)kernel.org>
seccomp: passthrough uprobe systemcall without filtering
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Skip user unwind if the task is a kernel thread
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Have get_perf_callchain() return NULL if crosstask and user are set
Steven Rostedt <rostedt(a)goodmis.org>
perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL
Dapeng Mi <dapeng1.mi(a)linux.intel.com>
perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into INTEL_FIXED_BITS_MASK
Richard Guy Briggs <rgb(a)redhat.com>
audit: record fanotify event regardless of presence of rules
Xiang Mei <xmei5(a)asu.edu>
net/sched: sch_qfq: Fix null-deref in agg_dequeue
-------------
Diffstat:
Documentation/sphinx/kernel_abi.py | 4 +-
Documentation/sphinx/kernel_feat.py | 4 +-
Documentation/sphinx/kernel_include.py | 6 ++-
Documentation/sphinx/maintainers_include.py | 4 +-
Makefile | 4 +-
arch/alpha/kernel/asm-offsets.c | 1 +
arch/arc/kernel/asm-offsets.c | 1 +
arch/arm/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/asm-offsets.c | 1 +
arch/csky/kernel/asm-offsets.c | 1 +
arch/hexagon/kernel/asm-offsets.c | 1 +
arch/loongarch/kernel/asm-offsets.c | 2 +
arch/m68k/kernel/asm-offsets.c | 1 +
arch/microblaze/kernel/asm-offsets.c | 1 +
arch/mips/kernel/asm-offsets.c | 2 +
arch/nios2/kernel/asm-offsets.c | 1 +
arch/openrisc/kernel/asm-offsets.c | 1 +
arch/parisc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/riscv/kernel/asm-offsets.c | 1 +
arch/s390/kernel/asm-offsets.c | 1 +
arch/sh/kernel/asm-offsets.c | 1 +
arch/sparc/kernel/asm-offsets.c | 1 +
arch/um/kernel/asm-offsets.c | 2 +
arch/x86/events/intel/core.c | 10 ++--
arch/x86/include/asm/perf_event.h | 6 ++-
arch/x86/kernel/cpu/bugs.c | 9 ++--
arch/x86/kvm/pmu.h | 2 +-
arch/xtensa/kernel/asm-offsets.c | 1 +
drivers/dma-buf/udmabuf.c | 2 +-
drivers/edac/edac_mc_sysfs.c | 24 ++++++++++
drivers/gpio/gpio-idio-16.c | 5 ++
drivers/gpio/gpio-regmap.c | 53 ++++++++++++++++++--
drivers/iommu/intel/iommu.c | 7 +--
drivers/net/bonding/bond_main.c | 11 +++--
drivers/net/bonding/bond_options.c | 3 ++
drivers/net/ethernet/sfc/ef100_netdev.c | 6 +--
drivers/net/ethernet/sfc/ef100_nic.c | 47 ++++++++----------
drivers/net/wireless/ath/ath12k/mac.c | 6 +--
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/extent-tree.c | 6 ++-
fs/btrfs/inode.c | 7 +--
fs/btrfs/scrub.c | 3 +-
fs/btrfs/transaction.c | 2 +-
fs/btrfs/tree-checker.c | 37 ++++++++++++++
fs/btrfs/tree-log.c | 64 +++++++++++++++++++------
fs/btrfs/zoned.c | 8 ++--
fs/btrfs/zoned.h | 9 ++--
fs/f2fs/file.c | 8 ++--
fs/f2fs/segment.c | 20 ++++----
include/linux/audit.h | 2 +-
include/linux/bitops.h | 1 -
include/linux/bits.h | 38 ++++++++++++++-
include/linux/gpio/regmap.h | 16 +++++++
include/net/bonding.h | 1 +
include/net/pkt_sched.h | 25 +++++++++-
kernel/cgroup/cpuset.c | 6 +--
kernel/events/callchain.c | 16 +++----
kernel/events/core.c | 7 +--
kernel/seccomp.c | 32 ++++++++++---
net/mptcp/pm_netlink.c | 6 +++
net/sched/sch_api.c | 10 ----
net/sched/sch_hfsc.c | 16 -------
net/sched/sch_qfq.c | 2 +-
net/wireless/reg.c | 4 ++
tools/sched_ext/scx_qmap.bpf.c | 18 ++++++-
tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 +-
67 files changed, 442 insertions(+), 164 deletions(-)
The patch titled
Subject: mm/secretmem: fix use-after-free race in fault handler
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-secretmem-fix-use-after-free-race-in-fault-handler.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Lance Yang <lance.yang(a)linux.dev>
Subject: mm/secretmem: fix use-after-free race in fault handler
Date: Fri, 31 Oct 2025 20:09:55 +0800
When a page fault occurs in a secret memory file created with
`memfd_secret(2)`, the kernel will allocate a new folio for it, mark the
underlying page as not-present in the direct map, and add it to the file
mapping.
If two tasks cause a fault in the same page concurrently, both could end
up allocating a folio and removing the page from the direct map, but only
one would succeed in adding the folio to the file mapping. The task that
failed undoes the effects of its attempt by (a) freeing the folio again
and (b) putting the page back into the direct map. However, by doing
these two operations in this order, the page becomes available to the
allocator again before it is placed back in the direct mapping.
If another task attempts to allocate the page between (a) and (b), and the
kernel tries to access it via the direct map, it would result in a
supervisor not-present page fault.
Fix the ordering to restore the direct map before the folio is freed.
Link: https://lkml.kernel.org/r/20251031120955.92116-1-lance.yang@linux.dev
Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
Reported-by: Google Big Sleep <big-sleep-vuln-reports(a)google.com>
Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWK…
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/secretmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/secretmem.c~mm-secretmem-fix-use-after-free-race-in-fault-handler
+++ a/mm/secretmem.c
@@ -82,13 +82,13 @@ retry:
__folio_mark_uptodate(folio);
err = filemap_add_folio(mapping, folio, offset, gfp);
if (unlikely(err)) {
- folio_put(folio);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(folio_page(folio, 0));
+ folio_put(folio);
if (err == -EEXIST)
goto retry;
_
Patches currently in -mm which might be from lance.yang(a)linux.dev are
mm-secretmem-fix-use-after-free-race-in-fault-handler.patch
mm-khugepaged-guard-is_zero_pfn-calls-with-pte_present.patch
From: Doug Berger <opendmb(a)gmail.com>
[ Upstream commit 382748c05e58a9f1935f5a653c352422375566ea ]
Commit 16b269436b72 ("sched/deadline: Modify cpudl::free_cpus
to reflect rd->online") introduced the cpudl_set/clear_freecpu
functions to allow the cpu_dl::free_cpus mask to be manipulated
by the deadline scheduler class rq_on/offline callbacks so the
mask would also reflect this state.
Commit 9659e1eeee28 ("sched/deadline: Remove cpu_active_mask
from cpudl_find()") removed the check of the cpu_active_mask to
save some processing on the premise that the cpudl::free_cpus
mask already reflected the runqueue online state.
Unfortunately, there are cases where it is possible for the
cpudl_clear function to set the free_cpus bit for a CPU when the
deadline runqueue is offline. When this occurs while a CPU is
connected to the default root domain the flag may retain the bad
state after the CPU has been unplugged. Later, a different CPU
that is transitioning through the default root domain may push a
deadline task to the powered down CPU when cpudl_find sees its
free_cpus bit is set. If this happens the task will not have the
opportunity to run.
One example is outlined here:
https://lore.kernel.org/lkml/20250110233010.2339521-1-opendmb@gmail.com
Another occurs when the last deadline task is migrated from a
CPU that has an offlined runqueue. The dequeue_task member of
the deadline scheduler class will eventually call cpudl_clear
and set the free_cpus bit for the CPU.
This commit modifies the cpudl_clear function to be aware of the
online state of the deadline runqueue so that the free_cpus mask
can be updated appropriately.
It is no longer necessary to manage the mask outside of the
cpudl_set/clear functions so the cpudl_set/clear_freecpu
functions are removed. In addition, since the free_cpus mask is
now only updated under the cpudl lock the code was changed to
use the non-atomic __cpumask functions.
Signed-off-by: Doug Berger <opendmb(a)gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
---
kernel/sched/cpudeadline.c | 34 +++++++++-------------------------
kernel/sched/cpudeadline.h | 4 +---
kernel/sched/deadline.c | 8 ++++----
3 files changed, 14 insertions(+), 32 deletions(-)
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 8cb06c8c7eb1..fd28ba4e1a28 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -166,12 +166,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
* cpudl_clear - remove a CPU from the cpudl max-heap
* @cp: the cpudl max-heap context
* @cpu: the target CPU
+ * @online: the online state of the deadline runqueue
*
* Notes: assumes cpu_rq(cpu)->lock is locked
*
* Returns: (void)
*/
-void cpudl_clear(struct cpudl *cp, int cpu)
+void cpudl_clear(struct cpudl *cp, int cpu, bool online)
{
int old_idx, new_cpu;
unsigned long flags;
@@ -184,7 +185,7 @@ void cpudl_clear(struct cpudl *cp, int cpu)
if (old_idx == IDX_INVALID) {
/*
* Nothing to remove if old_idx was invalid.
- * This could happen if a rq_offline_dl is
+ * This could happen if rq_online_dl or rq_offline_dl is
* called for a CPU without -dl tasks running.
*/
} else {
@@ -195,9 +196,12 @@ void cpudl_clear(struct cpudl *cp, int cpu)
cp->elements[new_cpu].idx = old_idx;
cp->elements[cpu].idx = IDX_INVALID;
cpudl_heapify(cp, old_idx);
-
- cpumask_set_cpu(cpu, cp->free_cpus);
}
+ if (likely(online))
+ __cpumask_set_cpu(cpu, cp->free_cpus);
+ else
+ __cpumask_clear_cpu(cpu, cp->free_cpus);
+
raw_spin_unlock_irqrestore(&cp->lock, flags);
}
@@ -228,7 +232,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
cp->elements[new_idx].cpu = cpu;
cp->elements[cpu].idx = new_idx;
cpudl_heapify_up(cp, new_idx);
- cpumask_clear_cpu(cpu, cp->free_cpus);
+ __cpumask_clear_cpu(cpu, cp->free_cpus);
} else {
cp->elements[old_idx].dl = dl;
cpudl_heapify(cp, old_idx);
@@ -237,26 +241,6 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
raw_spin_unlock_irqrestore(&cp->lock, flags);
}
-/*
- * cpudl_set_freecpu - Set the cpudl.free_cpus
- * @cp: the cpudl max-heap context
- * @cpu: rd attached CPU
- */
-void cpudl_set_freecpu(struct cpudl *cp, int cpu)
-{
- cpumask_set_cpu(cpu, cp->free_cpus);
-}
-
-/*
- * cpudl_clear_freecpu - Clear the cpudl.free_cpus
- * @cp: the cpudl max-heap context
- * @cpu: rd attached CPU
- */
-void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
-{
- cpumask_clear_cpu(cpu, cp->free_cpus);
-}
-
/*
* cpudl_init - initialize the cpudl structure
* @cp: the cpudl max-heap context
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index 0adeda93b5fb..ecff718d94ae 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -18,9 +18,7 @@ struct cpudl {
#ifdef CONFIG_SMP
int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask);
void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
-void cpudl_clear(struct cpudl *cp, int cpu);
+void cpudl_clear(struct cpudl *cp, int cpu, bool online);
int cpudl_init(struct cpudl *cp);
-void cpudl_set_freecpu(struct cpudl *cp, int cpu);
-void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
void cpudl_cleanup(struct cpudl *cp);
#endif /* CONFIG_SMP */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6548bd90c5c3..85e4ef476686 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1414,7 +1414,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
if (!dl_rq->dl_nr_running) {
dl_rq->earliest_dl.curr = 0;
dl_rq->earliest_dl.next = 0;
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, rq->online);
} else {
struct rb_node *leftmost = dl_rq->root.rb_leftmost;
struct sched_dl_entity *entry;
@@ -2349,9 +2349,10 @@ static void rq_online_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_set_overload(rq);
- cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
if (rq->dl.dl_nr_running > 0)
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
+ else
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, true);
}
/* Assumes rq->lock is held */
@@ -2360,8 +2361,7 @@ static void rq_offline_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_clear_overload(rq);
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
- cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, false);
}
void __init init_sched_dl_class(void)
--
2.34.1
From: Kairui Song <kasong(a)tencent.com>
Since commit 1b7e90020eb77 ("mm, swap: use percpu cluster as allocation
fast path"), swap allocation is protected by a local lock, which means
we can't do any sleeping calls during allocation.
However, the discard routine is not taken well care of. When the swap
allocator failed to find any usable cluster, it would look at the
pending discard cluster and try to issue some blocking discards. It may
not necessarily sleep, but the cond_resched at the bio layer indicates
this is wrong when combined with a local lock. And the bio GFP flag used
for discard bio is also wrong (not atomic).
It's arguable whether this synchronous discard is helpful at all. In
most cases, the async discard is good enough. And the swap allocator is
doing very differently at organizing the clusters since the recent
change, so it is very rare to see discard clusters piling up.
So far, no issues have been observed or reported with typical SSD setups
under months of high pressure. This issue was found during my code
review. But by hacking the kernel a bit: adding a mdelay(500) in the
async discard path, this issue will be observable with WARNING triggered
by the wrong GFP and cond_resched in the bio layer for debug builds.
So now let's apply a hotfix for this issue: remove the synchronous
discard in the swap allocation path. And when order 0 is failing with
all cluster list drained on all swap devices, try to do a discard
following the swap device priority list. If any discards released some
cluster, try the allocation again. This way, we can still avoid OOM due
to swap failure if the hardware is very slow and memory pressure is
extremely high.
This may cause more fragmentation issues if the discarding hardware is
really slow. Ideally, we want to discard pending clusters before
continuing to iterate the fragment cluster lists. This can be
implemented in a cleaner way if we clean up the device list iteration
part first.
Cc: stable(a)vger.kernel.org
Fixes: 1b7e90020eb77 ("mm, swap: use percpu cluster as allocation fast path")
Acked-by: Nhat Pham <nphamcs(a)gmail.com>
Signed-off-by: Kairui Song <kasong(a)tencent.com>
---
mm/swapfile.c | 40 +++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index cb2392ed8e0e..33e0bd905c55 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1101,13 +1101,6 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
goto done;
}
- /*
- * We don't have free cluster but have some clusters in discarding,
- * do discard now and reclaim them.
- */
- if ((si->flags & SWP_PAGE_DISCARD) && swap_do_scheduled_discard(si))
- goto new_cluster;
-
if (order)
goto done;
@@ -1394,6 +1387,33 @@ static bool swap_alloc_slow(swp_entry_t *entry,
return false;
}
+/*
+ * Discard pending clusters in a synchronized way when under high pressure.
+ * Return: true if any cluster is discarded.
+ */
+static bool swap_sync_discard(void)
+{
+ bool ret = false;
+ int nid = numa_node_id();
+ struct swap_info_struct *si, *next;
+
+ spin_lock(&swap_avail_lock);
+ plist_for_each_entry_safe(si, next, &swap_avail_heads[nid], avail_lists[nid]) {
+ spin_unlock(&swap_avail_lock);
+ if (get_swap_device_info(si)) {
+ if (si->flags & SWP_PAGE_DISCARD)
+ ret = swap_do_scheduled_discard(si);
+ put_swap_device(si);
+ }
+ if (ret)
+ return true;
+ spin_lock(&swap_avail_lock);
+ }
+ spin_unlock(&swap_avail_lock);
+
+ return false;
+}
+
/**
* folio_alloc_swap - allocate swap space for a folio
* @folio: folio we want to move to swap
@@ -1432,11 +1452,17 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp)
}
}
+again:
local_lock(&percpu_swap_cluster.lock);
if (!swap_alloc_fast(&entry, order))
swap_alloc_slow(&entry, order);
local_unlock(&percpu_swap_cluster.lock);
+ if (unlikely(!order && !entry.val)) {
+ if (swap_sync_discard())
+ goto again;
+ }
+
/* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */
if (mem_cgroup_try_charge_swap(folio, entry))
goto out_free;
--
2.51.0
Hi,
This compile tested only series aims to fix the DBI parsing issue repored in
[1]. The issue stems from the fact that the DT and binding describing 'dbi'
region as 'elbi' from the start.
Now, both binding and DTs are fixed and the driver is reworked to work with both
old and new DTs.
Note: The driver patch is OK to be backported till 6.2 where the common resource
parsing code was introduced. But the DTS patch should not be backported. And I'm
not sure about the backporting of the binding.
Please test this series on the Meson board with old and new DTs.
- Mani
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
---
Manivannan Sadhasivam (3):
dt-bindings: PCI: amlogic: Fix the register name of the DBI region
arm64: dts: amlogic: Fix the register name of the 'DBI' region
PCI: meson: Fix parsing the DBI register region
.../devicetree/bindings/pci/amlogic,axg-pcie.yaml | 6 +++---
arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 4 ++--
arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 2 +-
drivers/pci/controller/dwc/pci-meson.c | 18 +++++++++++++++---
drivers/pci/controller/dwc/pcie-designware.c | 12 +++++++-----
5 files changed, 28 insertions(+), 14 deletions(-)
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20251031-pci-meson-fix-c8b651bc6662
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
Make sure to drop the references taken to the vtg devices by
of_find_device_by_node() when looking up their driver data during
component probe.
Note that holding a reference to a platform device does not prevent its
driver data from going away so there is no point in keeping the
reference after the lookup helper returns.
Fixes: cc6b741c6f63 ("drm: sti: remove useless fields from vtg structure")
Cc: stable(a)vger.kernel.org # 4.16
Cc: Benjamin Gaignard <benjamin.gaignard(a)collabora.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/gpu/drm/sti/sti_vtg.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index ee81691b3203..ce6bc7e7b135 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -143,12 +143,17 @@ struct sti_vtg {
struct sti_vtg *of_vtg_find(struct device_node *np)
{
struct platform_device *pdev;
+ struct sti_vtg *vtg;
pdev = of_find_device_by_node(np);
if (!pdev)
return NULL;
- return (struct sti_vtg *)platform_get_drvdata(pdev);
+ vtg = platform_get_drvdata(pdev);
+
+ put_device(&pdev->dev);
+
+ return vtg;
}
static void vtg_reset(struct sti_vtg *vtg)
--
2.49.1
For the cases where user includes a non-zero value in 'token_uuid_ptr'
field of 'struct vfio_device_bind_iommufd', the copy_struct_from_user()
in vfio_df_ioctl_bind_iommufd() fails with -E2BIG. For the 'minsz' passed,
copy_struct_from_user() expects the newly introduced field to be zero-ed,
which would be incorrect in this case.
Fix this by passing the actual size of the kernel struct. If working
with a newer userspace, copy_struct_from_user() would copy the
'token_uuid_ptr' field, and if working with an old userspace, it would
zero out this field, thus still retaining backward compatibility.
Fixes: 86624ba3b522 ("vfio/pci: Do vf_token checks for VFIO_DEVICE_BIND_IOMMUFD")
Cc: stable(a)vger.kernel.org
Signed-off-by: Raghavendra Rao Ananta <rananta(a)google.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
---
drivers/vfio/device_cdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
index 480cac3a0c274..8ceca24ac136c 100644
--- a/drivers/vfio/device_cdev.c
+++ b/drivers/vfio/device_cdev.c
@@ -99,7 +99,7 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
return ret;
if (user_size < minsz)
return -EINVAL;
- ret = copy_struct_from_user(&bind, minsz, arg, user_size);
+ ret = copy_struct_from_user(&bind, sizeof(bind), arg, user_size);
if (ret)
return ret;
--
2.51.1.930.gacf6e81ea2-goog
Changes in v2:
- Add also resets
- Drop cc-stable tag in the last patch
- Link to v1: https://patch.msgid.link/20251029-dt-bindings-pci-qcom-fixes-power-domains-…
Recent binding changes forgot to make power-domains and resets required.
I am not updating SC8180xp because it will be fixed other way in my next
patchset.
Best regards,
Krzysztof
---
Krzysztof Kozlowski (9):
dt-bindings: PCI: qcom,pcie-sa8775p: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sc7280: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sc8280xp: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sm8150: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sm8250: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sm8350: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sm8450: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-sm8550: Add missing required power-domains and resets
dt-bindings: PCI: qcom,pcie-x1e80100: Add missing required power-domains and resets
Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml | 3 +++
Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml | 5 +++++
Documentation/devicetree/bindings/pci/qcom,pcie-sc8280xp.yaml | 3 +++
Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml | 5 +++++
Documentation/devicetree/bindings/pci/qcom,pcie-sm8250.yaml | 5 +++++
Documentation/devicetree/bindings/pci/qcom,pcie-sm8350.yaml | 5 +++++
Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml | 5 +++++
Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml | 5 +++++
Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml | 5 +++++
9 files changed, 41 insertions(+)
---
base-commit: 326bbf6e2b1ce8d1e6166cce2ca414aa241c382f
change-id: 20251029-dt-bindings-pci-qcom-fixes-power-domains-36a669b2e252
Best regards,
--
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Set the DMA mask before calling nvkm_device_ctor(), so that when the
flush page is created in nvkm_fb_ctor(), the allocation will not fail
if the page is outside of DMA address space, which can easily happen if
IOMMU is disable. In such situations, you will get an error like this:
nouveau 0000:65:00.0: DMA addr 0x0000000107c56000+4096 overflow (mask ffffffff, bus limit 0).
Commit 38f5359354d4 ("rm/nouveau/pci: set streaming DMA mask early")
set the mask after calling nvkm_device_ctor(), but back then there was
no flush page being created, which might explain why the mask wasn't
set earlier.
Flush page allocation was added in commit 5728d064190e ("drm/nouveau/fb:
handle sysmem flush page from common code"). nvkm_fb_ctor() calls
alloc_page(), which can allocate a page anywhere in system memory, but
then calls dma_map_page() on that page. But since the DMA mask is still
set to 32, the map can fail if the page is allocated above 4GB. This is
easy to reproduce on systems with a lot of memory and IOMMU disabled.
An alternative approach would be to force the allocation of the flush
page to low memory, by specifying __GFP_DMA32. However, this would
always allocate the page in low memory, even though the hardware can
access high memory.
Fixes: 5728d064190e ("drm/nouveau/fb: handle sysmem flush page from common code")
Signed-off-by: Timur Tabi <ttabi(a)nvidia.com>
---
.../gpu/drm/nouveau/nvkm/engine/device/pci.c | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 8f0261a0d618..7cc5a7499583 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1695,6 +1695,18 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg,
*pdevice = &pdev->device;
pdev->pdev = pci_dev;
+ /* Set DMA mask based on capabilities reported by the MMU subdev. */
+ if (pdev->device.mmu && !pdev->device.pci->agp.bridge)
+ bits = pdev->device.mmu->dma_bits;
+ else
+ bits = 32;
+
+ ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits));
+ if (ret && bits != 32) {
+ dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+ pdev->device.mmu->dma_bits = 32;
+ }
+
ret = nvkm_device_ctor(&nvkm_device_pci_func, quirk, &pci_dev->dev,
pci_is_pcie(pci_dev) ? NVKM_DEVICE_PCIE :
pci_find_capability(pci_dev, PCI_CAP_ID_AGP) ?
@@ -1708,17 +1720,5 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg,
if (ret)
return ret;
- /* Set DMA mask based on capabilities reported by the MMU subdev. */
- if (pdev->device.mmu && !pdev->device.pci->agp.bridge)
- bits = pdev->device.mmu->dma_bits;
- else
- bits = 32;
-
- ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits));
- if (ret && bits != 32) {
- dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
- pdev->device.mmu->dma_bits = 32;
- }
-
return 0;
}
base-commit: 18a7e218cfcdca6666e1f7356533e4c988780b57
--
2.51.0
In the parse_adv_monitor_pattern() function, the value of
the 'length' variable is currently limited to HCI_MAX_EXT_AD_LENGTH(251).
The size of the 'value' array in the mgmt_adv_pattern structure is 31.
If the value of 'pattern[i].length' is set in the user space
and exceeds 31, the 'patterns[i].value' array can be accessed
out of bound when copied.
Increasing the size of the 'value' array in
the 'mgmt_adv_pattern' structure will break the userspace.
Considering this, and to avoid OOB access revert the limits for 'offset'
and 'length' back to the value of HCI_MAX_AD_LENGTH.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: db08722fc7d4 ("Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
---
include/net/bluetooth/mgmt.h | 2 +-
net/bluetooth/mgmt.c | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 74edea06985b..4b07ce6dfd69 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -780,7 +780,7 @@ struct mgmt_adv_pattern {
__u8 ad_type;
__u8 offset;
__u8 length;
- __u8 value[31];
+ __u8 value[HCI_MAX_AD_LENGTH];
} __packed;
#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a3d16eece0d2..500033b70a96 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5391,9 +5391,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count,
for (i = 0; i < pattern_count; i++) {
offset = patterns[i].offset;
length = patterns[i].length;
- if (offset >= HCI_MAX_EXT_AD_LENGTH ||
- length > HCI_MAX_EXT_AD_LENGTH ||
- (offset + length) > HCI_MAX_EXT_AD_LENGTH)
+ if (offset >= HCI_MAX_AD_LENGTH ||
+ length > HCI_MAX_AD_LENGTH ||
+ (offset + length) > HCI_MAX_AD_LENGTH)
return MGMT_STATUS_INVALID_PARAMS;
p = kmalloc(sizeof(*p), GFP_KERNEL);
--
2.39.5
A kernel memory leak was identified by the 'ioctl_sg01' test from Linux
Test Project (LTP). The following bytes were mainly observed: 0x53425355.
When USB storage devices incorrectly skip the data phase with status data,
the code extracts/validates the CSW from the sg buffer, but fails to clear
it afterwards. This leaves status protocol data in srb's transfer buffer,
such as the US_BULK_CS_SIGN 'USBS' signature observed here. Thus, this can
lead to USB protocols leaks to user space through SCSI generic (/dev/sg*)
interfaces, such as the one seen here when the LTP test requested 512 KiB.
Fix the leak by zeroing the CSW data in srb's transfer buffer immediately
after the validation of devices that skip data phase.
Note: Differently from CVE-2018-1000204, which fixed a big leak by zero-
ing pages at allocation time, this leak occurs after allocation, when USB
protocol data is written to already-allocated sg pages.
Fixes: a45b599ad808 ("scsi: sg: allocate with __GFP_ZERO in sg_build_indirect()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Desnes Nunes <desnesn(a)redhat.com>
---
V2->V3: Changed memset to use sizeof(buf) and added a comment about the leak
V1->V2: Used the same code style found on usb_stor_Bulk_transport()
drivers/usb/storage/transport.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 1aa1bd26c81f..9a4bf86e7b6a 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -1200,7 +1200,23 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us)
US_BULK_CS_WRAP_LEN &&
bcs->Signature ==
cpu_to_le32(US_BULK_CS_SIGN)) {
+ unsigned char buf[US_BULK_CS_WRAP_LEN];
+
usb_stor_dbg(us, "Device skipped data phase\n");
+
+ /*
+ * Devices skipping data phase might leave CSW data in srb's
+ * transfer buffer. Zero it to prevent USB protocol leakage.
+ */
+ sg = NULL;
+ offset = 0;
+ memset(buf, 0, sizeof(buf));
+ if (usb_stor_access_xfer_buf(buf,
+ US_BULK_CS_WRAP_LEN, srb, &sg,
+ &offset, TO_XFER_BUF) !=
+ US_BULK_CS_WRAP_LEN)
+ usb_stor_dbg(us, "Failed to clear CSW data\n");
+
scsi_set_resid(srb, transfer_length);
goto skipped_data_phase;
}
--
2.51.0
Since kernel 6.17 display stack needs to reset the hardware properly to
ensure that we don't run into issues with the hardware configured by the
bootloader. MDSS reset is necessary to have working display when the
bootloader has already initialized it for the boot splash screen.
Signed-off-by: Alexey Minnekhanov <<alexeymin(a)postmarketos.org>>
---
Alexey Minnekhanov (3):
dt-bindings: clock: mmcc-sdm660: Add missing MDSS reset
clk: qcom: mmcc-sdm660: Add missing MDSS reset
arm64: dts: qcom: sdm630: Add missing MDSS reset
arch/arm64/boot/dts/qcom/sdm630.dtsi | 1 +
drivers/clk/qcom/mmcc-sdm660.c | 1 +
include/dt-bindings/clock/qcom,mmcc-sdm660.h | 1 +
3 files changed, 3 insertions(+)
---
base-commit: e53642b87a4f4b03a8d7e5f8507fc3cd0c595ea6
change-id: 20251031-sdm660-mdss-reset-015a46a238b5
Best regards,
--
Alexey Minnekhanov <<alexeymin(a)postmarketos.org>>
In order to set the AMCR register, which configures the
memory-region split between ospi1 and ospi2, we need to
identify the ospi instance.
By using memory-region-names, it allows to identify the
ospi instance this memory-region belongs to.
Fixes: cad2492de91c ("arm64: dts: st: Add SPI NOR flash support on stm32mp257f-ev1 board")
Cc: stable(a)vger.kernel.org
Signed-off-by: Patrice Chotard <patrice.chotard(a)foss.st.com>
---
Changes in v4:
- Rebase on v6.18-rc1
- Link to v3: https://lore.kernel.org/r/20250811-upstream_fix_dts_omm-v3-1-c4186b7667cb@f…
Changes in v3:
- Set again "Cc: <stable(a)vger.kernel.org>"
- Link to v2: https://lore.kernel.org/r/20250811-upstream_fix_dts_omm-v2-1-00ff55076bd5@f…
Changes in v2:
- Update commit message.
- Use correct memory-region-names value.
- Remove "Cc: <stable(a)vger.kernel.org>" tag as the fixed patch is not part of a LTS.
- Link to v1: https://lore.kernel.org/r/20250806-upstream_fix_dts_omm-v1-1-e68c15ed422d@f…
---
arch/arm64/boot/dts/st/stm32mp257f-ev1.dts | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts b/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
index 6e165073f732..bb6d6393d2e4 100644
--- a/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
+++ b/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
@@ -266,6 +266,7 @@ &i2c8 {
&ommanager {
memory-region = <&mm_ospi1>;
+ memory-region-names = "ospi1";
pinctrl-0 = <&ospi_port1_clk_pins_a
&ospi_port1_io03_pins_a
&ospi_port1_cs0_pins_a>;
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20250806-upstream_fix_dts_omm-c006b69042f1
Best regards,
--
Patrice Chotard <patrice.chotard(a)foss.st.com>
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 2ba5772e530f73eb847fb96ce6c4017894869552
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102619-plaster-sitting-ed2e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ba5772e530f73eb847fb96ce6c4017894869552 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <wbg(a)kernel.org>
Date: Mon, 20 Oct 2025 17:51:46 +0900
Subject: [PATCH] gpio: idio-16: Define fixed direction of the GPIO lines
The direction of the IDIO-16 GPIO lines is fixed with the first 16 lines
as output and the remaining 16 lines as input. Set the gpio_config
fixed_direction_output member to represent the fixed direction of the
GPIO lines.
Fixes: db02247827ef ("gpio: idio-16: Migrate to the regmap API")
Reported-by: Mark Cave-Ayland <mark.caveayland(a)nutanix.com>
Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com
Suggested-by: Michael Walle <mwalle(a)kernel.org>
Cc: stable(a)vger.kernel.org # ae495810cffe: gpio: regmap: add the .fixed_direction_output configuration parameter
Cc: stable(a)vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Signed-off-by: William Breathitt Gray <wbg(a)kernel.org>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-3-ebeb50e93c3…
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c
index 0103be977c66..4fbae6f6a497 100644
--- a/drivers/gpio/gpio-idio-16.c
+++ b/drivers/gpio/gpio-idio-16.c
@@ -6,6 +6,7 @@
#define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16"
+#include <linux/bitmap.h>
#include <linux/bits.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -107,6 +108,7 @@ int devm_idio_16_regmap_register(struct device *const dev,
struct idio_16_data *data;
struct regmap_irq_chip *chip;
struct regmap_irq_chip_data *chip_data;
+ DECLARE_BITMAP(fixed_direction_output, IDIO_16_NGPIO);
if (!config->parent)
return -EINVAL;
@@ -164,6 +166,9 @@ int devm_idio_16_regmap_register(struct device *const dev,
gpio_config.irq_domain = regmap_irq_get_domain(chip_data);
gpio_config.reg_mask_xlate = idio_16_reg_mask_xlate;
+ bitmap_from_u64(fixed_direction_output, GENMASK_U64(15, 0));
+ gpio_config.fixed_direction_output = fixed_direction_output;
+
return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config));
}
EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register);
Hi
When people update docutils to 0.22, then the Documentation build will
start failing as documented with the commit 00d95fcc4dee ("docs: kdoc:
handle the obsolescensce of docutils.ErrorString()").
So it would be nice if people can still build the documenation with
newer versions (was for instance relevant for Debian unstable for
building the 6.17.y based packages): https://bugs.debian.org/1118100
Thus can you please backport 00d95fcc4dee ("docs: kdoc: handle the
obsolescensce of docutils.ErrorString()") down to 6.17.y stable
series? The commit does not apply cleanly so adding a backport for it.
Actually it would be nice to go further back, but I just tested as
well 6.12.y and there due to missing faccc0ec64e1 ("docs:
sphinx/kernel_abi: adjust coding style") there are more work.
faccc0ec64e1 ("docs: sphinx/kernel_abi: adjust coding style") should
be applicable but I'm not sure if you want to support that. Jonathan
what would you think?
Regards,
Salvatore
From: Lance Yang <lance.yang(a)linux.dev>
When a page fault occurs in a secret memory file created with
`memfd_secret(2)`, the kernel will allocate a new folio for it, mark the
underlying page as not-present in the direct map, and add it to the file
mapping.
If two tasks cause a fault in the same page concurrently, both could end
up allocating a folio and removing the page from the direct map, but only
one would succeed in adding the folio to the file mapping. The task that
failed undoes the effects of its attempt by (a) freeing the folio again
and (b) putting the page back into the direct map. However, by doing
these two operations in this order, the page becomes available to the
allocator again before it is placed back in the direct mapping.
If another task attempts to allocate the page between (a) and (b), and
the kernel tries to access it via the direct map, it would result in a
supervisor not-present page fault.
Fix the ordering to restore the direct map before the folio is freed.
Cc: <stable(a)vger.kernel.org>
Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Reported-by: Google Big Sleep <big-sleep-vuln-reports(a)google.com>
Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWK…
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
---
v1 -> v2:
- Collect Reviewed-by from Mike and Lorenzo - thanks!
- Collect Acked-by from David - thanks!
- Update the changelog as Mike suggested
- https://lore.kernel.org/linux-mm/aQSIdCpf-2pJLwAF@kernel.org/
mm/secretmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/secretmem.c b/mm/secretmem.c
index c1bd9a4b663d..37f6d1097853 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -82,13 +82,13 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
__folio_mark_uptodate(folio);
err = filemap_add_folio(mapping, folio, offset, gfp);
if (unlikely(err)) {
- folio_put(folio);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(folio_page(folio, 0));
+ folio_put(folio);
if (err == -EEXIST)
goto retry;
--
2.49.0
Hello!
We introduce ourselves as Hanjin Group A general merchandise company located in South Korea. One of our partner introduced your company to us please let me know if you can accept new orders, I will have to forward our PO and specifications immediately to place a trial order. Please NOTE only copy & reply us to our sales box here> ( jannicre(a)yahoo.com )
Best Regards,
Jannice Choi ,
Sales Director
Email: jannicre(a)yahoo.com
Head Office
Address: 63, Namdaemun-ro, Jung-gu, Seoul, South Korea
Tel: + 82-2-3287-742
Fax: +82-2-384-2491
From: Lance Yang <lance.yang(a)linux.dev>
The error path in secretmem_fault() frees a folio before restoring its
direct map status, which is a race leading to a panic.
Fix the ordering to restore the map before the folio is freed.
Cc: <stable(a)vger.kernel.org>
Reported-by: Google Big Sleep <big-sleep-vuln-reports(a)google.com>
Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWK…
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
---
mm/secretmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/secretmem.c b/mm/secretmem.c
index c1bd9a4b663d..37f6d1097853 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -82,13 +82,13 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
__folio_mark_uptodate(folio);
err = filemap_add_folio(mapping, folio, offset, gfp);
if (unlikely(err)) {
- folio_put(folio);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(folio_page(folio, 0));
+ folio_put(folio);
if (err == -EEXIST)
goto retry;
--
2.49.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 2ba5772e530f73eb847fb96ce6c4017894869552
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102619-shortage-tabby-5157@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ba5772e530f73eb847fb96ce6c4017894869552 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <wbg(a)kernel.org>
Date: Mon, 20 Oct 2025 17:51:46 +0900
Subject: [PATCH] gpio: idio-16: Define fixed direction of the GPIO lines
The direction of the IDIO-16 GPIO lines is fixed with the first 16 lines
as output and the remaining 16 lines as input. Set the gpio_config
fixed_direction_output member to represent the fixed direction of the
GPIO lines.
Fixes: db02247827ef ("gpio: idio-16: Migrate to the regmap API")
Reported-by: Mark Cave-Ayland <mark.caveayland(a)nutanix.com>
Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com
Suggested-by: Michael Walle <mwalle(a)kernel.org>
Cc: stable(a)vger.kernel.org # ae495810cffe: gpio: regmap: add the .fixed_direction_output configuration parameter
Cc: stable(a)vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Signed-off-by: William Breathitt Gray <wbg(a)kernel.org>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-3-ebeb50e93c3…
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c
index 0103be977c66..4fbae6f6a497 100644
--- a/drivers/gpio/gpio-idio-16.c
+++ b/drivers/gpio/gpio-idio-16.c
@@ -6,6 +6,7 @@
#define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16"
+#include <linux/bitmap.h>
#include <linux/bits.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -107,6 +108,7 @@ int devm_idio_16_regmap_register(struct device *const dev,
struct idio_16_data *data;
struct regmap_irq_chip *chip;
struct regmap_irq_chip_data *chip_data;
+ DECLARE_BITMAP(fixed_direction_output, IDIO_16_NGPIO);
if (!config->parent)
return -EINVAL;
@@ -164,6 +166,9 @@ int devm_idio_16_regmap_register(struct device *const dev,
gpio_config.irq_domain = regmap_irq_get_domain(chip_data);
gpio_config.reg_mask_xlate = idio_16_reg_mask_xlate;
+ bitmap_from_u64(fixed_direction_output, GENMASK_U64(15, 0));
+ gpio_config.fixed_direction_output = fixed_direction_output;
+
return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config));
}
EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register);
This kernel version doesn't build with GCC 15:
In file included from include/uapi/linux/posix_types.h:5,
from include/uapi/linux/types.h:14,
from include/linux/types.h:6,
from arch/x86/realmode/rm/wakeup.h:11,
from arch/x86/realmode/rm/wakemain.c:2:
include/linux/stddef.h:11:9: error: cannot use keyword 'false' as enumeration constant
11 | false = 0,
| ^~~~~
include/linux/stddef.h:11:9: note: 'false' is a keyword with '-std=c23' onwards
include/linux/types.h:30:33: error: 'bool' cannot be defined via 'typedef'
30 | typedef _Bool bool;
| ^~~~
include/linux/types.h:30:33: note: 'bool' is a keyword with '-std=c23' onwards
include/linux/types.h:30:1: warning: useless type name in empty declaration
30 | typedef _Bool bool;
| ^~~~~~~
I initially fixed this by adding -std=gnu11 in arch/x86/Makefile, then I
realised this fix was already done in an upstream commit, created before
the GCC 15 release and not mentioning the error I had. This is the first
patch.
When I was investigating my error, I noticed other commits were already
backported to stable versions. They were all adding -std=gnu11 in
different Makefiles. In their commit message, they were mentioning
'gnu11' was picked to use the same as the one from the main Makefile.
But this is not the case in this kernel version. Patch 2 fixes that.
Finally, I noticed an extra warning that I didn't have in v5.15. Patch 3
fixes that.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Alexey Dobriyan (1):
x86/boot: Compile boot code with -std=gnu11 too
Matthieu Baerts (NGI0) (2):
arch: back to -std=gnu89 in < v5.18
tracing: fix declaration-after-statement warning
arch/parisc/boot/compressed/Makefile | 2 +-
arch/s390/Makefile | 2 +-
arch/s390/purgatory/Makefile | 2 +-
arch/x86/Makefile | 2 +-
arch/x86/boot/compressed/Makefile | 2 +-
drivers/firmware/efi/libstub/Makefile | 2 +-
kernel/trace/trace_events_synth.c | 3 ++-
7 files changed, 8 insertions(+), 7 deletions(-)
---
base-commit: a32db271d59d9f35f3a937ac27fcc2db1e029cdc
change-id: 20251017-v5-10-gcc-15-ad2510f784f7
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
The patch titled
Subject: nilfs2: avoid having an active sc_timer before freeing sci
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-avoid-having-an-active-sc_timer-before-freeing-sci.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Edward Adam Davis <eadavis(a)qq.com>
Subject: nilfs2: avoid having an active sc_timer before freeing sci
Date: Thu, 30 Oct 2025 07:51:52 +0900
Because kthread_stop did not stop sc_task properly and returned -EINTR,
the sc_timer was not properly closed, ultimately causing the problem [1]
reported by syzbot when freeing sci due to the sc_timer not being closed.
Because the thread sc_task main function nilfs_segctor_thread() returns 0
when it succeeds, when the return value of kthread_stop() is not 0 in
nilfs_segctor_destroy(), we believe that it has not properly closed
sc_timer.
We use timer_shutdown_sync() to sync wait for sc_timer to shutdown, and
set the value of sc_task to NULL under the protection of lock
sc_state_lock, so as to avoid the issue caused by sc_timer not being
properly shutdowned.
[1]
ODEBUG: free active (active state 0) object: 00000000dacb411a object type: timer_list hint: nilfs_construction_timeout
Call trace:
nilfs_segctor_destroy fs/nilfs2/segment.c:2811 [inline]
nilfs_detach_log_writer+0x668/0x8cc fs/nilfs2/segment.c:2877
nilfs_put_super+0x4c/0x12c fs/nilfs2/super.c:509
Link: https://lkml.kernel.org/r/20251029225226.16044-1-konishi.ryusuke@gmail.com
Fixes: 3f66cc261ccb ("nilfs2: use kthread_create and kthread_stop for the log writer thread")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+24d8b70f039151f65590(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=24d8b70f039151f65590
Tested-by: syzbot+24d8b70f039151f65590(a)syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis(a)qq.com>
Cc: <stable(a)vger.kernel.org> [6.12+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/fs/nilfs2/segment.c~nilfs2-avoid-having-an-active-sc_timer-before-freeing-sci
+++ a/fs/nilfs2/segment.c
@@ -2768,7 +2768,12 @@ static void nilfs_segctor_destroy(struct
if (sci->sc_task) {
wake_up(&sci->sc_wait_daemon);
- kthread_stop(sci->sc_task);
+ if (kthread_stop(sci->sc_task)) {
+ spin_lock(&sci->sc_state_lock);
+ sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
+ }
}
spin_lock(&sci->sc_state_lock);
_
Patches currently in -mm which might be from eadavis(a)qq.com are
nilfs2-avoid-having-an-active-sc_timer-before-freeing-sci.patch
The patch titled
Subject: scripts/decode_stacktrace.sh: fix build ID and PC source parsing
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
scripts-decode_stacktracesh-fix-build-id-and-pc-source-parsing.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Carlos Llamas <cmllamas(a)google.com>
Subject: scripts/decode_stacktrace.sh: fix build ID and PC source parsing
Date: Thu, 30 Oct 2025 01:03:33 +0000
Support for parsing PC source info in stacktraces (e.g. '(P)') was added
in commit 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of
lines with an additional info"). However, this logic was placed after the
build ID processing. This incorrect order fails to parse lines containing
both elements, e.g.:
drm_gem_mmap_obj+0x114/0x200 [drm 03d0564e0529947d67bb2008c3548be77279fd27] (P)
This patch fixes the problem by extracting the PC source info first and
then processing the module build ID. With this change, the line above is
now properly parsed as such:
drm_gem_mmap_obj (./include/linux/mmap_lock.h:212 ./include/linux/mm.h:811 drivers/gpu/drm/drm_gem.c:1177) drm (P)
While here, also add a brief explanation the build ID section.
Link: https://lkml.kernel.org/r/20251030010347.2731925-1-cmllamas@google.com
Fixes: bdf8eafbf7f5 ("arm64: stacktrace: report source of unwind data")
Fixes: 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of lines with an additional info")
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Reviewed-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Marc Rutland <mark.rutland(a)arm.com>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Matthieu Baerts <matttbe(a)kernel.org>
Cc: Miroslav Benes <mbenes(a)suse.cz>
Cc: Puranjay Mohan <puranjay(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/decode_stacktrace.sh | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
--- a/scripts/decode_stacktrace.sh~scripts-decode_stacktracesh-fix-build-id-and-pc-source-parsing
+++ a/scripts/decode_stacktrace.sh
@@ -277,12 +277,6 @@ handle_line() {
fi
done
- if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
- words[$last-1]="${words[$last-1]} ${words[$last]}"
- unset words[$last] spaces[$last]
- last=$(( $last - 1 ))
- fi
-
# Extract info after the symbol if present. E.g.:
# func_name+0x54/0x80 (P)
# ^^^
@@ -294,6 +288,14 @@ handle_line() {
unset words[$last] spaces[$last]
last=$(( $last - 1 ))
fi
+
+ # Join module name with its build id if present, as these were
+ # split during tokenization (e.g. "[module" and "modbuildid]").
+ if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
+ words[$last-1]="${words[$last-1]} ${words[$last]}"
+ unset words[$last] spaces[$last]
+ last=$(( $last - 1 ))
+ fi
if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
module=${words[$last]}
_
Patches currently in -mm which might be from cmllamas(a)google.com are
scripts-decode_stacktracesh-fix-build-id-and-pc-source-parsing.patch
A kernel memory leak was identified by the 'ioctl_sg01' test from Linux
Test Project (LTP). The following bytes were mainly observed: 0x53425355.
When USB storage devices incorrectly skip the data phase with status data,
the code extracts/validates the CSW from the sg buffer, but fails to clear
it afterwards. This leaves status protocol data in srb's transfer buffer,
such as the US_BULK_CS_SIGN 'USBS' signature observed here. Thus, this can
lead to USB protocols leaks to user space through SCSI generic (/dev/sg*)
interfaces, such as the one seen here when the LTP test requested 512 KiB.
Fix the leak by zeroing the CSW data in srb's transfer buffer immediately
after the validation of devices that skip data phase.
Note: Differently from CVE-2018-1000204, which fixed a big leak by zero-
ing pages at allocation time, this leak occurs after allocation, when USB
protocol data is written to already-allocated sg pages.
v2: Use the same code style found on usb_stor_Bulk_transport()
Fixes: a45b599ad808 ("scsi: sg: allocate with __GFP_ZERO in sg_build_indirect()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Desnes Nunes <desnesn(a)redhat.com>
---
drivers/usb/storage/transport.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 1aa1bd26c81f..ee6b89f7f9ac 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -1200,7 +1200,19 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us)
US_BULK_CS_WRAP_LEN &&
bcs->Signature ==
cpu_to_le32(US_BULK_CS_SIGN)) {
+ unsigned char buf[US_BULK_CS_WRAP_LEN];
+
+ sg = NULL;
+ offset = 0;
+ memset(buf, 0, US_BULK_CS_WRAP_LEN);
usb_stor_dbg(us, "Device skipped data phase\n");
+
+ if (usb_stor_access_xfer_buf(buf,
+ US_BULK_CS_WRAP_LEN, srb, &sg,
+ &offset, TO_XFER_BUF) !=
+ US_BULK_CS_WRAP_LEN)
+ usb_stor_dbg(us, "Failed to clear CSW data\n");
+
scsi_set_resid(srb, transfer_length);
goto skipped_data_phase;
}
--
2.51.0
The patch titled
Subject: mm/damon/sysfs: change next_update_jiffies to a global variable
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-sysfs-change-next_update_jiffies-to-a-global-variable.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Quanmin Yan <yanquanmin1(a)huawei.com>
Subject: mm/damon/sysfs: change next_update_jiffies to a global variable
Date: Thu, 30 Oct 2025 10:07:46 +0800
In DAMON's damon_sysfs_repeat_call_fn(), time_before() is used to compare
the current jiffies with next_update_jiffies to determine whether to
update the sysfs files at this moment.
On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this causes time_before() in
damon_sysfs_repeat_call_fn() to unexpectedly return true during the first
5 minutes after boot on 32-bit systems (see [1] for more explanation,
which fixes another jiffies-related issue before). As a result, DAMON
does not update sysfs files during that period.
There is also an issue unrelated to the system's word size[2]: if the
user stops DAMON just after next_update_jiffies is updated and restarts
it after 'refresh_ms' or a longer delay, next_update_jiffies will retain
an older value, causing time_before() to return false and the update to
happen earlier than expected.
Fix these issues by making next_update_jiffies a global variable and
initializing it each time DAMON is started.
Link: https://lkml.kernel.org/r/20251030020746.967174-3-yanquanmin1@huawei.com
Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1]
Link: https://lore.kernel.org/all/20251029013038.66625-1-sj@kernel.org/ [2]
Fixes: d809a7c64ba8 ("mm/damon/sysfs: implement refresh_ms file internal work")
Suggested-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Signed-off-by: Quanmin Yan <yanquanmin1(a)huawei.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: ze zuo <zuoze1(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/sysfs.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/mm/damon/sysfs.c~mm-damon-sysfs-change-next_update_jiffies-to-a-global-variable
+++ a/mm/damon/sysfs.c
@@ -1552,16 +1552,17 @@ static struct damon_ctx *damon_sysfs_bui
return ctx;
}
+static unsigned long damon_sysfs_next_update_jiffies;
+
static int damon_sysfs_repeat_call_fn(void *data)
{
struct damon_sysfs_kdamond *sysfs_kdamond = data;
- static unsigned long next_update_jiffies;
if (!sysfs_kdamond->refresh_ms)
return 0;
- if (time_before(jiffies, next_update_jiffies))
+ if (time_before(jiffies, damon_sysfs_next_update_jiffies))
return 0;
- next_update_jiffies = jiffies +
+ damon_sysfs_next_update_jiffies = jiffies +
msecs_to_jiffies(sysfs_kdamond->refresh_ms);
if (!mutex_trylock(&damon_sysfs_lock))
@@ -1607,6 +1608,9 @@ static int damon_sysfs_turn_damon_on(str
}
kdamond->damon_ctx = ctx;
+ damon_sysfs_next_update_jiffies =
+ jiffies + msecs_to_jiffies(kdamond->refresh_ms);
+
repeat_call_control->fn = damon_sysfs_repeat_call_fn;
repeat_call_control->data = kdamond;
repeat_call_control->repeat = true;
_
Patches currently in -mm which might be from yanquanmin1(a)huawei.com are
mm-damon-stat-change-last_refresh_jiffies-to-a-global-variable.patch
mm-damon-sysfs-change-next_update_jiffies-to-a-global-variable.patch
mm-damon-add-a-min_sz_region-parameter-to-damon_set_region_biggest_system_ram_default.patch
mm-damon-reclaim-use-min_sz_region-for-core-address-alignment-when-setting-regions.patch
The patch titled
Subject: mm/damon/stat: change last_refresh_jiffies to a global variable
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-stat-change-last_refresh_jiffies-to-a-global-variable.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Quanmin Yan <yanquanmin1(a)huawei.com>
Subject: mm/damon/stat: change last_refresh_jiffies to a global variable
Date: Thu, 30 Oct 2025 10:07:45 +0800
Patch series "mm/damon: fixes for the jiffies-related issues", v2.
On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this may cause the
time_before() series of functions to return unexpected values, resulting
in DAMON not functioning as intended. Meanwhile, similar issues exist in
some specific user operation scenarios.
This patchset addresses these issues. The first patch is about the
DAMON_STAT module, and the second patch is about the core layer's sysfs.
This patch (of 2):
In DAMON_STAT's damon_stat_damon_call_fn(), time_before_eq() is used to
avoid unnecessarily frequent stat update.
On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this causes time_before_eq()
in DAMON_STAT to unexpectedly return true during the first 5 minutes after
boot on 32-bit systems (see [1] for more explanation, which fixes another
jiffies-related issue before). As a result, DAMON_STAT does not update
any monitoring results during that period, which becomes more confusing
when DAMON_STAT_ENABLED_DEFAULT is enabled.
There is also an issue unrelated to the system's word size[2]: if the user
stops DAMON_STAT just after last_refresh_jiffies is updated and restarts
it after 5 seconds or a longer delay, last_refresh_jiffies will retain an
older value, causing time_before_eq() to return false and the update to
happen earlier than expected.
Fix these issues by making last_refresh_jiffies a global variable and
initializing it each time DAMON_STAT is started.
Link: https://lkml.kernel.org/r/20251030020746.967174-2-yanquanmin1@huawei.com
Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1]
Link: https://lore.kernel.org/all/20251028143250.50144-1-sj@kernel.org/ [2]
Fixes: fabdd1e911da ("mm/damon/stat: calculate and expose estimated memory bandwidth")
Signed-off-by: Quanmin Yan <yanquanmin1(a)huawei.com>
Suggested-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: ze zuo <zuoze1(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/stat.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/mm/damon/stat.c~mm-damon-stat-change-last_refresh_jiffies-to-a-global-variable
+++ a/mm/damon/stat.c
@@ -46,6 +46,8 @@ MODULE_PARM_DESC(aggr_interval_us,
static struct damon_ctx *damon_stat_context;
+static unsigned long damon_stat_last_refresh_jiffies;
+
static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
{
struct damon_target *t;
@@ -130,13 +132,12 @@ static void damon_stat_set_idletime_perc
static int damon_stat_damon_call_fn(void *data)
{
struct damon_ctx *c = data;
- static unsigned long last_refresh_jiffies;
/* avoid unnecessarily frequent stat update */
- if (time_before_eq(jiffies, last_refresh_jiffies +
+ if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
msecs_to_jiffies(5 * MSEC_PER_SEC)))
return 0;
- last_refresh_jiffies = jiffies;
+ damon_stat_last_refresh_jiffies = jiffies;
aggr_interval_us = c->attrs.aggr_interval;
damon_stat_set_estimated_memory_bandwidth(c);
@@ -210,6 +211,8 @@ static int damon_stat_start(void)
err = damon_start(&damon_stat_context, 1, true);
if (err)
return err;
+
+ damon_stat_last_refresh_jiffies = jiffies;
call_control.data = damon_stat_context;
return damon_call(damon_stat_context, &call_control);
}
_
Patches currently in -mm which might be from yanquanmin1(a)huawei.com are
mm-damon-stat-change-last_refresh_jiffies-to-a-global-variable.patch
mm-damon-sysfs-change-next_update_jiffies-to-a-global-variable.patch
mm-damon-add-a-min_sz_region-parameter-to-damon_set_region_biggest_system_ram_default.patch
mm-damon-reclaim-use-min_sz_region-for-core-address-alignment-when-setting-regions.patch
The quilt patch titled
Subject: s390: fix HugeTLB vmemmap optimization crash
has been removed from the -mm tree. Its filename was
s390-fix-hugetlb-vmemmap-optimization-crash.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Luiz Capitulino <luizcap(a)redhat.com>
Subject: s390: fix HugeTLB vmemmap optimization crash
Date: Tue, 28 Oct 2025 17:15:33 -0400
A reproducible crash occurs when enabling HugeTLB vmemmap optimization
(HVO) on s390. The crash and the proposed fix were worked on an s390 KVM
guest running on an older hypervisor, as I don't have access to an LPAR.
However, the same issue should occur on bare-metal.
Reproducer (it may take a few runs to trigger):
# sysctl vm.hugetlb_optimize_vmemmap=1
# echo 1 > /proc/sys/vm/nr_hugepages
# echo 0 > /proc/sys/vm/nr_hugepages
Crash log:
[ 52.340369] list_del corruption. prev->next should be 000000d382110008, but was 000000d7116d3880. (prev=000000d7116d3910)
[ 52.340420] ------------[ cut here ]------------
[ 52.340424] kernel BUG at lib/list_debug.c:62!
[ 52.340566] monitor event: 0040 ilc:2 [#1]SMP
[ 52.340573] Modules linked in: ctcm fsm qeth ccwgroup zfcp scsi_transport_fc qdio dasd_fba_mod dasd_eckd_mod dasd_mod xfs ghash_s390 prng des_s390 libdes sha3_512_s390 sha3_256_s390 virtio_net virtio_blk net_failover sha_common failover dm_mirror dm_region_hash dm_log dm_mod paes_s390 crypto_engine pkey_cca pkey_ep11 zcrypt pkey_pckmo pkey aes_s390
[ 52.340606] CPU: 1 UID: 0 PID: 1672 Comm: root-rep2 Kdump: loaded Not tainted 6.18.0-rc3 #1 NONE
[ 52.340610] Hardware name: IBM 3931 LA1 400 (KVM/Linux)
[ 52.340611] Krnl PSW : 0704c00180000000 0000015710cda7fe (__list_del_entry_valid_or_report+0xfe/0x128)
[ 52.340619] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
[ 52.340622] Krnl GPRS: c0000000ffffefff 0000000100000027 000000000000006d 0000000000000000
[ 52.340623] 000000d7116d35d8 000000d7116d35d0 0000000000000002 000000d7116d39b0
[ 52.340625] 000000d7116d3880 000000d7116d3910 000000d7116d3910 000000d382110008
[ 52.340626] 000003ffac1ccd08 000000d7116d39b0 0000015710cda7fa 000000d7116d37d0
[ 52.340632] Krnl Code: 0000015710cda7ee: c020003e496f larl %r2,00000157114a3acc
0000015710cda7f4: c0e5ffd5280e brasl %r14,000001571077f810
#0000015710cda7fa: af000000 mc 0,0
>0000015710cda7fe: b9040029 lgr %r2,%r9
0000015710cda802: c0e5ffe5e193 brasl %r14,0000015710996b28
0000015710cda808: e34090080004 lg %r4,8(%r9)
0000015710cda80e: b9040059 lgr %r5,%r9
0000015710cda812: b9040038 lgr %r3,%r8
[ 52.340643] Call Trace:
[ 52.340645] [<0000015710cda7fe>] __list_del_entry_valid_or_report+0xfe/0x128
[ 52.340649] ([<0000015710cda7fa>] __list_del_entry_valid_or_report+0xfa/0x128)
[ 52.340652] [<0000015710a30b2e>] hugetlb_vmemmap_restore_folios+0x96/0x138
[ 52.340655] [<0000015710a268ac>] update_and_free_pages_bulk+0x64/0x150
[ 52.340659] [<0000015710a26f8a>] set_max_huge_pages+0x4ca/0x6f0
[ 52.340662] [<0000015710a273ba>] hugetlb_sysctl_handler_common+0xea/0x120
[ 52.340665] [<0000015710a27484>] hugetlb_sysctl_handler+0x44/0x50
[ 52.340667] [<0000015710b53ffa>] proc_sys_call_handler+0x17a/0x280
[ 52.340672] [<0000015710a90968>] vfs_write+0x2c8/0x3a0
[ 52.340676] [<0000015710a90bd2>] ksys_write+0x72/0x100
[ 52.340679] [<00000157111483a8>] __do_syscall+0x150/0x318
[ 52.340682] [<0000015711153a5e>] system_call+0x6e/0x90
[ 52.340684] Last Breaking-Event-Address:
[ 52.340684] [<000001571077f85c>] _printk+0x4c/0x58
[ 52.340690] Kernel panic - not syncing: Fatal exception: panic_on_oops
This issue was introduced by commit f13b83fdd996 ("hugetlb: batch TLB
flushes when freeing vmemmap"). Before that change, the HVO
implementation called flush_tlb_kernel_range() each time a vmemmap PMD
split and remapping was performed. The mentioned commit changed this to
issue a few flush_tlb_all() calls after performing all remappings.
However, on s390, flush_tlb_kernel_range() expands to __tlb_flush_kernel()
while flush_tlb_all() is not implemented. As a result, we went from
flushing the TLB for every remapping to no flushing at all.
This commit fixes this by implementing flush_tlb_all() on s390 as an alias
to __tlb_flush_global(). This should cause a flush on all TLB entries on
all CPUs as expected by the flush_tlb_all() semantics.
Link: https://lkml.kernel.org/r/20251028211533.47694-1-luizcap@redhat.com
Fixes: f13b83fdd996 ("hugetlb: batch TLB flushes when freeing vmemmap")
Signed-off-by: Luiz Capitulino <luizcap(a)redhat.com>
Acked-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)kernel.org>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/s390/include/asm/tlbflush.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/arch/s390/include/asm/tlbflush.h~s390-fix-hugetlb-vmemmap-optimization-crash
+++ a/arch/s390/include/asm/tlbflush.h
@@ -103,9 +103,13 @@ static inline void __tlb_flush_mm_lazy(s
* flush_tlb_range functions need to do the flush.
*/
#define flush_tlb() do { } while (0)
-#define flush_tlb_all() do { } while (0)
#define flush_tlb_page(vma, addr) do { } while (0)
+static inline void flush_tlb_all(void)
+{
+ __tlb_flush_global();
+}
+
static inline void flush_tlb_mm(struct mm_struct *mm)
{
__tlb_flush_mm_lazy(mm);
_
Patches currently in -mm which might be from luizcap(a)redhat.com are
The patch titled
Subject: maple_tree: fix tracepoint string pointers
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
maple_tree-fix-tracepoint-string-pointers.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Martin Kaiser <martin(a)kaiser.cx>
Subject: maple_tree: fix tracepoint string pointers
Date: Thu, 30 Oct 2025 16:55:05 +0100
maple_tree tracepoints contain pointers to function names. Such a pointer
is saved when a tracepoint logs an event. There's no guarantee that it's
still valid when the event is parsed later and the pointer is dereferenced.
The kernel warns about these unsafe pointers.
event 'ma_read' has unsafe pointer field 'fn'
WARNING: kernel/trace/trace.c:3779 at ignore_event+0x1da/0x1e4
Mark the function names as tracepoint_string() to fix the events.
Link: https://lkml.kernel.org/r/20251030155537.87972-1-martin@kaiser.cx
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Martin Kaiser <martin(a)kaiser.cx>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 30 ++++++++++++++++--------------
1 file changed, 16 insertions(+), 14 deletions(-)
--- a/lib/maple_tree.c~maple_tree-fix-tracepoint-string-pointers
+++ a/lib/maple_tree.c
@@ -64,6 +64,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/maple_tree.h>
+#define TP_FCT tracepoint_string(__func__)
+
/*
* Kernel pointer hashing renders much of the maple tree dump useless as tagged
* pointers get hashed to arbitrary values.
@@ -2756,7 +2758,7 @@ static inline void mas_rebalance(struct
MA_STATE(l_mas, mas->tree, mas->index, mas->last);
MA_STATE(r_mas, mas->tree, mas->index, mas->last);
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
/*
* Rebalancing occurs if a node is insufficient. Data is rebalanced
@@ -2997,7 +2999,7 @@ static void mas_split(struct ma_state *m
MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
mast.l = &l_mas;
mast.r = &r_mas;
@@ -3172,7 +3174,7 @@ static bool mas_is_span_wr(struct ma_wr_
return false;
}
- trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
+ trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry);
return true;
}
@@ -3416,7 +3418,7 @@ static noinline void mas_wr_spanning_sto
* of data may happen.
*/
mas = wr_mas->mas;
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
if (unlikely(!mas->index && mas->last == ULONG_MAX))
return mas_new_root(mas, wr_mas->entry);
@@ -3552,7 +3554,7 @@ done:
} else {
memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
}
- trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
mas_update_gap(mas);
mas->end = new_end;
return;
@@ -3596,7 +3598,7 @@ static inline void mas_wr_slot_store(str
mas->offset++; /* Keep mas accurate. */
}
- trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
/*
* Only update gap when the new entry is empty or there is an empty
* entry in the original two ranges.
@@ -3717,7 +3719,7 @@ static inline void mas_wr_append(struct
mas_update_gap(mas);
mas->end = new_end;
- trace_ma_write(__func__, mas, new_end, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
return;
}
@@ -3731,7 +3733,7 @@ static void mas_wr_bnode(struct ma_wr_st
{
struct maple_big_node b_node;
- trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
memset(&b_node, 0, sizeof(struct maple_big_node));
mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
mas_commit_b_node(wr_mas, &b_node);
@@ -5062,7 +5064,7 @@ void *mas_store(struct ma_state *mas, vo
{
MA_WR_STATE(wr_mas, mas, entry);
- trace_ma_write(__func__, mas, 0, entry);
+ trace_ma_write(TP_FCT, mas, 0, entry);
#ifdef CONFIG_DEBUG_MAPLE_TREE
if (MAS_WARN_ON(mas, mas->index > mas->last))
pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
@@ -5163,7 +5165,7 @@ void mas_store_prealloc(struct ma_state
}
store:
- trace_ma_write(__func__, mas, 0, entry);
+ trace_ma_write(TP_FCT, mas, 0, entry);
mas_wr_store_entry(&wr_mas);
MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
mas_destroy(mas);
@@ -5882,7 +5884,7 @@ void *mtree_load(struct maple_tree *mt,
MA_STATE(mas, mt, index, index);
void *entry;
- trace_ma_read(__func__, &mas);
+ trace_ma_read(TP_FCT, &mas);
rcu_read_lock();
retry:
entry = mas_start(&mas);
@@ -5925,7 +5927,7 @@ int mtree_store_range(struct maple_tree
MA_STATE(mas, mt, index, last);
int ret = 0;
- trace_ma_write(__func__, &mas, 0, entry);
+ trace_ma_write(TP_FCT, &mas, 0, entry);
if (WARN_ON_ONCE(xa_is_advanced(entry)))
return -EINVAL;
@@ -6148,7 +6150,7 @@ void *mtree_erase(struct maple_tree *mt,
void *entry = NULL;
MA_STATE(mas, mt, index, index);
- trace_ma_op(__func__, &mas);
+ trace_ma_op(TP_FCT, &mas);
mtree_lock(mt);
entry = mas_erase(&mas);
@@ -6485,7 +6487,7 @@ void *mt_find(struct maple_tree *mt, uns
unsigned long copy = *index;
#endif
- trace_ma_read(__func__, &mas);
+ trace_ma_read(TP_FCT, &mas);
if ((*index) > max)
return NULL;
_
Patches currently in -mm which might be from martin(a)kaiser.cx are
maple_tree-fix-tracepoint-string-pointers.patch
folio split clears PG_has_hwpoisoned, but the flag should be preserved in
after-split folios containing pages with PG_hwpoisoned flag if the folio is
split to >0 order folios. Scan all pages in a to-be-split folio to
determine which after-split folios need the flag.
An alternatives is to change PG_has_hwpoisoned to PG_maybe_hwpoisoned to
avoid the scan and set it on all after-split folios, but resulting false
positive has undesirable negative impact. To remove false positive, caller
of folio_test_has_hwpoisoned() and folio_contain_hwpoisoned_page() needs to
do the scan. That might be causing a hassle for current and future callers
and more costly than doing the scan in the split code. More details are
discussed in [1].
This issue can be exposed via:
1. splitting a has_hwpoisoned folio to >0 order from debugfs interface;
2. truncating part of a has_hwpoisoned folio in
truncate_inode_partial_folio().
And later accesses to a hwpoisoned page could be possible due to the
missing has_hwpoisoned folio flag. This will lead to MCE errors.
Link: https://lore.kernel.org/all/CAHbLzkoOZm0PXxE9qwtF4gKR=cpRXrSrJ9V9Pm2DJexs98… [1]
Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
From V3[1]:
1. Separated from the original series;
2. Added Fixes tag and cc'd stable;
3. Simplified page_range_has_hwpoisoned();
4. Renamed check_poisoned_pages to handle_hwpoison, made it const, and
shorten the statement;
5. Removed poisoned_new_folio variable and checked the condition
directly.
[1] https://lore.kernel.org/all/20251022033531.389351-2-ziy@nvidia.com/
mm/huge_memory.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fc65ec3393d2..5215bb6aecfc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3455,6 +3455,14 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins)
caller_pins;
}
+static bool page_range_has_hwpoisoned(struct page *page, long nr_pages)
+{
+ for (; nr_pages; page++, nr_pages--)
+ if (PageHWPoison(page))
+ return true;
+ return false;
+}
+
/*
* It splits @folio into @new_order folios and copies the @folio metadata to
* all the resulting folios.
@@ -3462,17 +3470,24 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins)
static void __split_folio_to_order(struct folio *folio, int old_order,
int new_order)
{
+ /* Scan poisoned pages when split a poisoned folio to large folios */
+ const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order;
long new_nr_pages = 1 << new_order;
long nr_pages = 1 << old_order;
long i;
+ folio_clear_has_hwpoisoned(folio);
+
+ /* Check first new_nr_pages since the loop below skips them */
+ if (handle_hwpoison &&
+ page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages))
+ folio_set_has_hwpoisoned(folio);
/*
* Skip the first new_nr_pages, since the new folio from them have all
* the flags from the original folio.
*/
for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) {
struct page *new_head = &folio->page + i;
-
/*
* Careful: new_folio is not a "real" folio before we cleared PageTail.
* Don't pass it around before clear_compound_head().
@@ -3514,6 +3529,10 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
(1L << PG_dirty) |
LRU_GEN_MASK | LRU_REFS_MASK));
+ if (handle_hwpoison &&
+ page_range_has_hwpoisoned(new_head, new_nr_pages))
+ folio_set_has_hwpoisoned(new_folio);
+
new_folio->mapping = folio->mapping;
new_folio->index = folio->index + i;
@@ -3600,8 +3619,6 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
int start_order = uniform_split ? new_order : old_order - 1;
int split_order;
- folio_clear_has_hwpoisoned(folio);
-
/*
* split to new_order one order at a time. For uniform split,
* folio is split to new_order directly.
--
2.51.0
A kernel memory leak was identified by the 'ioctl_sg01' test from Linux
Test Project (LTP). The following bytes were maily observed: 0x53425355.
When USB storage devices incorrectly skip the data phase with status data,
the code extracts/validates the CSW from the sg buffer, but fails to clear
it afterwards. This leaves status protocol data in srb's transfer buffer,
such as the US_BULK_CS_SIGN 'USBS' signature observed here. Thus, this
leads to USB protocols leaks to user space through SCSI generic (/dev/sg*)
interfaces, such as the one seen here when the LTP test requested 512 KiB.
Fix the leak by zeroing the CSW data in srb's transfer buffer immediately
after the validation of devices that skip data phase.
Note: Differently from CVE-2018-1000204, which fixed a big leak by zero-
ing pages at allocation time, this leak occurs after allocation, when USB
protocol data is written to already-allocated sg pages.
Fixes: a45b599ad808 ("scsi: sg: allocate with __GFP_ZERO in sg_build_indirect()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Desnes Nunes <desnesn(a)redhat.com>
---
drivers/usb/storage/transport.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 1aa1bd26c81f..8e9f6459e197 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -1200,7 +1200,17 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us)
US_BULK_CS_WRAP_LEN &&
bcs->Signature ==
cpu_to_le32(US_BULK_CS_SIGN)) {
+ unsigned char buf[US_BULK_CS_WRAP_LEN];
+
+ sg = NULL;
+ offset = 0;
+ memset(buf, 0, US_BULK_CS_WRAP_LEN);
usb_stor_dbg(us, "Device skipped data phase\n");
+
+ if (usb_stor_access_xfer_buf(buf, US_BULK_CS_WRAP_LEN, srb,
+ &sg, &offset, TO_XFER_BUF) != US_BULK_CS_WRAP_LEN)
+ usb_stor_dbg(us, "Failed to clear CSW data\n");
+
scsi_set_resid(srb, transfer_length);
goto skipped_data_phase;
}
--
2.51.0
From: Al Viro <viro(a)zeniv.linux.org.uk>
commit c28f922c9dcee0e4876a2c095939d77fe7e15116 upstream.
What we want is to verify there is that clone won't expose something
hidden by a mount we wouldn't be able to undo. "Wouldn't be able to undo"
may be a result of MNT_LOCKED on a child, but it may also come from
lacking admin rights in the userns of the namespace mount belongs to.
clone_private_mnt() checks the former, but not the latter.
There's a number of rather confusing CAP_SYS_ADMIN checks in various
userns during the mount, especially with the new mount API; they serve
different purposes and in case of clone_private_mnt() they usually,
but not always end up covering the missing check mentioned above.
Reviewed-by: Christian Brauner <brauner(a)kernel.org>
Reported-by: "Orlando, Noah" <Noah.Orlando(a)deshaw.com>
Fixes: 427215d85e8d ("ovl: prevent private clone if bind mount is not allowed")
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
[ merge conflict resolution: clone_private_mount() was reworked in
db04662e2f4f ("fs: allow detached mounts in clone_private_mount()").
Tweak the relevant ns_capable check so that it works on older kernels ]
Signed-off-by: Noah Orlando <Noah.Orlando(a)deshaw.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
(cherry picked from commit 36fecd740de2d542d2091d65d36554ee2bcf9c65)
[ kovalev: bp to fix CVE-2025-38499 ]
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
fs/namespace.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/namespace.c b/fs/namespace.c
index e9b8d516f191..b6d9250e1b38 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1965,6 +1965,11 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (!check_mnt(old_mnt))
goto invalid;
+ if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) {
+ up_read(&namespace_sem);
+ return ERR_PTR(-EPERM);
+ }
+
if (has_locked_children(old_mnt, path->dentry))
goto invalid;
--
2.50.1
From: Moon Hee Lee <moonhee.lee.ca(a)gmail.com>
commit 16ecdab5446f15a61ec88eb0d23d25d009821db0 upstream.
syzbot triggered a WARN in ieee80211_tdls_oper() by sending
NL80211_TDLS_ENABLE_LINK immediately after NL80211_CMD_CONNECT,
before association completed and without prior TDLS setup.
This left internal state like sdata->u.mgd.tdls_peer uninitialized,
leading to a WARN_ON() in code paths that assumed it was valid.
Reject the operation early if not in station mode or not associated.
Reported-by: syzbot+f73f203f8c9b19037380(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=f73f203f8c9b19037380
Fixes: 81dd2b882241 ("mac80211: move TDLS data to mgd private part")
Tested-by: syzbot+f73f203f8c9b19037380(a)syzkaller.appspotmail.com
Signed-off-by: Moon Hee Lee <moonhee.lee.ca(a)gmail.com>
Link: https://patch.msgid.link/20250715230904.661092-2-moonhee.lee.ca@gmail.com
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
[ kovalev: bp to fix CVE-2025-38644; adapted check from !sdata->vif.cfg.assoc
to !sdata->vif.bss_conf.assoc due to the older kernel not having the mac80211
configuration refactoring (see upstream commit f276e20b182d) ]
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
net/mac80211/tdls.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 137be9ec94af..69a67fcf1112 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1349,7 +1349,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
return -ENOTSUPP;
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ if (sdata->vif.type != NL80211_IFTYPE_STATION || !sdata->vif.bss_conf.assoc)
return -EINVAL;
switch (oper) {
--
2.50.1
From: Ido Schimmel <idosch(a)nvidia.com>
commit 1f5d2fd1ca04a23c18b1bde9a43ce2fa2ffa1bce upstream.
When the "proxy" option is enabled on a VXLAN device, the device will
suppress ARP requests and IPv6 Neighbor Solicitation messages if it is
able to reply on behalf of the remote host. That is, if a matching and
valid neighbor entry is configured on the VXLAN device whose MAC address
is not behind the "any" remote (0.0.0.0 / ::).
The code currently assumes that the FDB entry for the neighbor's MAC
address points to a valid remote destination, but this is incorrect if
the entry is associated with an FDB nexthop group. This can result in a
NPD [1][3] which can be reproduced using [2][4].
Fix by checking that the remote destination exists before dereferencing
it.
[1]
BUG: kernel NULL pointer dereference, address: 0000000000000000
[...]
CPU: 4 UID: 0 PID: 365 Comm: arping Not tainted 6.17.0-rc2-virtme-g2a89cb21162c #2 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc41 04/01/2014
RIP: 0010:vxlan_xmit+0xb58/0x15f0
[...]
Call Trace:
<TASK>
dev_hard_start_xmit+0x5d/0x1c0
__dev_queue_xmit+0x246/0xfd0
packet_sendmsg+0x113a/0x1850
__sock_sendmsg+0x38/0x70
__sys_sendto+0x126/0x180
__x64_sys_sendto+0x24/0x30
do_syscall_64+0xa4/0x260
entry_SYSCALL_64_after_hwframe+0x4b/0x53
[2]
#!/bin/bash
ip address add 192.0.2.1/32 dev lo
ip nexthop add id 1 via 192.0.2.2 fdb
ip nexthop add id 10 group 1 fdb
ip link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 4789 proxy
ip neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm dev vx0
bridge fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10
arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3
[3]
BUG: kernel NULL pointer dereference, address: 0000000000000000
[...]
CPU: 13 UID: 0 PID: 372 Comm: ndisc6 Not tainted 6.17.0-rc2-virtmne-g6ee90cb26014 #3 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1v996), BIOS 1.17.0-4.fc41 04/01/2x014
RIP: 0010:vxlan_xmit+0x803/0x1600
[...]
Call Trace:
<TASK>
dev_hard_start_xmit+0x5d/0x1c0
__dev_queue_xmit+0x246/0xfd0
ip6_finish_output2+0x210/0x6c0
ip6_finish_output+0x1af/0x2b0
ip6_mr_output+0x92/0x3e0
ip6_send_skb+0x30/0x90
rawv6_sendmsg+0xe6e/0x12e0
__sock_sendmsg+0x38/0x70
__sys_sendto+0x126/0x180
__x64_sys_sendto+0x24/0x30
do_syscall_64+0xa4/0x260
entry_SYSCALL_64_after_hwframe+0x4b/0x53
RIP: 0033:0x7f383422ec77
[4]
#!/bin/bash
ip address add 2001:db8:1::1/128 dev lo
ip nexthop add id 1 via 2001:db8:1::1 fdb
ip nexthop add id 10 group 1 fdb
ip link add name vx0 up type vxlan id 10010 local 2001:db8:1::1 dstport 4789 proxy
ip neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud perm dev vx0
bridge fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10
ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0
Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries")
Reviewed-by: Petr Machata <petrm(a)nvidia.com>
Signed-off-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor(a)blackwall.org>
Link: https://patch.msgid.link/20250901065035.159644-3-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ kovalev: bp to fix CVE-2025-39850 ]
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
drivers/net/vxlan/vxlan_core.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 1b6b6acd3489..57606891e413 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1883,6 +1883,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
n = neigh_lookup(&arp_tbl, &tip, dev);
if (n) {
+ struct vxlan_rdst *rdst = NULL;
struct vxlan_fdb *f;
struct sk_buff *reply;
@@ -1892,7 +1893,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
}
f = vxlan_find_mac(vxlan, n->ha, vni);
- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+ if (f)
+ rdst = first_remote_rcu(f);
+ if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
/* bridge-local neighbor */
neigh_release(n);
goto out;
@@ -2047,6 +2050,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
if (n) {
+ struct vxlan_rdst *rdst = NULL;
struct vxlan_fdb *f;
struct sk_buff *reply;
@@ -2056,7 +2060,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
}
f = vxlan_find_mac(vxlan, n->ha, vni);
- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+ if (f)
+ rdst = first_remote_rcu(f);
+ if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
/* bridge-local neighbor */
neigh_release(n);
goto out;
--
2.50.1
A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
and commit 7ef4c19d245f
("smackfs: restrict bytes count in smackfs write functions")
Found via static analysis and code review.
Fixes: d0e6a8064c42 ("bna: use memdup_user to copy userspace buffers")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 8f0972e6737c..ad33ab1d266d 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -311,6 +311,9 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
unsigned long flags;
void *kern_buf;
+ if (nbytes == 0 || nbytes > PAGE_SIZE)
+ return -EINVAL;
+
/* Copy the user space buf */
kern_buf = memdup_user_nul(buf, nbytes);
if (IS_ERR(kern_buf))
--
2.39.5 (Apple Git-154)
These patches backport the following upstream commits fixing CVEs to the Linux 6.12.y stable tree.
CVE-2025-21833 -> 60f030f7418d ("iommu/vt-d: Avoid use of NULL after WARN_ON_ONCE")
CVE-2025-37803 -> 021ba7f1babd ("udmabuf: fix a buf size overflow issue during udmabuf creation")
CVE-2024-57995 -> 5a10971c7645 ("wifi: ath12k: fix read pointer after free in ath12k_mac_assign_vif_to_vdev()")
CVE-2025-37860 -> 8241ecec1cdc6 ("sfc: fix NULL dereferences in ef100_process_design_param()")
The following upstream commit applies cleanly to v6.12.y, please pick it up.
CVE-2024-58097 -> 16c6c35c03ea ("wifi: ath11k: fix RCU stall while reaping monitor destination ring")
The of_get_parent() function increments the reference count of the
returned parent node, and of_node_put() is required to
release the reference when it is no longer needed.
The uli_init() function has a device_node reference leak.
The issue occurs in two scenarios:
1. When the function finds a matching device, it breaks out of the loop,
the reference held by 'node' is not released.
2. When the loop terminates normally (of_get_parent returns NULL),
the final parent node reference is not released.
Fix this by adding of_node_put(node).
Fixes: 91a6f347921e ("powerpc/mpc85xx_ds: convert to unified PCI init")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
arch/powerpc/platforms/fsl_uli1575.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index b8d37a9932f1..36624c88d5f3 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -376,4 +376,5 @@ void __init uli_init(void)
break;
}
}
+ of_node_put(node);
}
--
2.39.5 (Apple Git-154)
From: Donet Tom <donettom(a)linux.ibm.com>
On systems using the hash MMU, there is a software SLB preload cache that
mirrors the entries loaded into the hardware SLB buffer. This preload
cache is subject to periodic eviction — typically after every 256 context
switches — to remove old entry.
To optimize performance, the kernel skips switch_mmu_context() in
switch_mm_irqs_off() when the prev and next mm_struct are the same.
However, on hash MMU systems, this can lead to inconsistencies between
the hardware SLB and the software preload cache.
If an SLB entry for a process is evicted from the software cache on one
CPU, and the same process later runs on another CPU without executing
switch_mmu_context(), the hardware SLB may retain stale entries. If the
kernel then attempts to reload that entry, it can trigger an SLB
multi-hit error.
The following timeline shows how stale SLB entries are created and can
cause a multi-hit error when a process moves between CPUs without a
MMU context switch.
CPU 0 CPU 1
----- -----
Process P
exec swapper/1
load_elf_binary
begin_new_exc
activate_mm
switch_mm_irqs_off
switch_mmu_context
switch_slb
/*
* This invalidates all
* the entries in the HW
* and setup the new HW
* SLB entries as per the
* preload cache.
*/
context_switch
sched_migrate_task migrates process P to cpu-1
Process swapper/0 context switch (to process P)
(uses mm_struct of Process P) switch_mm_irqs_off()
switch_slb
load_slb++
/*
* load_slb becomes 0 here
* and we evict an entry from
* the preload cache with
* preload_age(). We still
* keep HW SLB and preload
* cache in sync, that is
* because all HW SLB entries
* anyways gets evicted in
* switch_slb during SLBIA.
* We then only add those
* entries back in HW SLB,
* which are currently
* present in preload_cache
* (after eviction).
*/
load_elf_binary continues...
setup_new_exec()
slb_setup_new_exec()
sched_switch event
sched_migrate_task migrates
process P to cpu-0
context_switch from swapper/0 to Process P
switch_mm_irqs_off()
/*
* Since both prev and next mm struct are same we don't call
* switch_mmu_context(). This will cause the HW SLB and SW preload
* cache to go out of sync in preload_new_slb_context. Because there
* was an SLB entry which was evicted from both HW and preload cache
* on cpu-1. Now later in preload_new_slb_context(), when we will try
* to add the same preload entry again, we will add this to the SW
* preload cache and then will add it to the HW SLB. Since on cpu-0
* this entry was never invalidated, hence adding this entry to the HW
* SLB will cause a SLB multi-hit error.
*/
load_elf_binary continues...
START_THREAD
start_thread
preload_new_slb_context
/*
* This tries to add a new EA to preload cache which was earlier
* evicted from both cpu-1 HW SLB and preload cache. This caused the
* HW SLB of cpu-0 to go out of sync with the SW preload cache. The
* reason for this was, that when we context switched back on CPU-0,
* we should have ideally called switch_mmu_context() which will
* bring the HW SLB entries on CPU-0 in sync with SW preload cache
* entries by setting up the mmu context properly. But we didn't do
* that since the prev mm_struct running on cpu-0 was same as the
* next mm_struct (which is true for swapper / kernel threads). So
* now when we try to add this new entry into the HW SLB of cpu-0,
* we hit a SLB multi-hit error.
*/
WARNING: CPU: 0 PID: 1810970 at arch/powerpc/mm/book3s64/slb.c:62
assert_slb_presence+0x2c/0x50(48 results) 02:47:29 [20157/42149]
Modules linked in:
CPU: 0 UID: 0 PID: 1810970 Comm: dd Not tainted 6.16.0-rc3-dirty #12
VOLUNTARY
Hardware name: IBM pSeries (emulated by qemu) POWER8 (architected)
0x4d0200 0xf000004 of:SLOF,HEAD hv:linux,kvm pSeries
NIP: c00000000015426c LR: c0000000001543b4 CTR: 0000000000000000
REGS: c0000000497c77e0 TRAP: 0700 Not tainted (6.16.0-rc3-dirty)
MSR: 8000000002823033 <SF,VEC,VSX,FP,ME,IR,DR,RI,LE> CR: 28888482 XER: 00000000
CFAR: c0000000001543b0 IRQMASK: 3
<...>
NIP [c00000000015426c] assert_slb_presence+0x2c/0x50
LR [c0000000001543b4] slb_insert_entry+0x124/0x390
Call Trace:
0x7fffceb5ffff (unreliable)
preload_new_slb_context+0x100/0x1a0
start_thread+0x26c/0x420
load_elf_binary+0x1b04/0x1c40
bprm_execve+0x358/0x680
do_execveat_common+0x1f8/0x240
sys_execve+0x58/0x70
system_call_exception+0x114/0x300
system_call_common+0x160/0x2c4
>From the above analysis, during early exec the hardware SLB is cleared,
and entries from the software preload cache are reloaded into hardware
by switch_slb. However, preload_new_slb_context and slb_setup_new_exec
also attempt to load some of the same entries, which can trigger a
multi-hit. In most cases, these additional preloads simply hit existing
entries and add nothing new. Removing these functions avoids redundant
preloads and eliminates the multi-hit issue. This patch removes these
two functions.
We tested process switching performance using the context_switch
benchmark on POWER9/hash, and observed no regression.
Without this patch: 129041 ops/sec
With this patch: 129341 ops/sec
We also measured SLB faults during boot, and the counts are essentially
the same with and without this patch.
SLB faults without this patch: 19727
SLB faults with this patch: 19786
Cc: Madhavan Srinivasan <maddy(a)linux.ibm.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Paul Mackerras <paulus(a)ozlabs.org>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)kernel.org>
Cc: Donet Tom <donettom(a)linux.ibm.com>
Cc: <linuxppc-dev(a)lists.ozlabs.org>
Fixes: 5434ae74629a ("powerpc/64s/hash: Add a SLB preload cache")
cc: <stable(a)vger.kernel.org>
Suggested-by: Nicholas Piggin <npiggin(a)gmail.com>
Signed-off-by: Donet Tom <donettom(a)linux.ibm.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list(a)gmail.com>
---
arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 -
arch/powerpc/kernel/process.c | 5 --
arch/powerpc/mm/book3s64/internal.h | 2 -
arch/powerpc/mm/book3s64/mmu_context.c | 2 -
arch/powerpc/mm/book3s64/slb.c | 88 -------------------
5 files changed, 98 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 346351423207..af12e2ba8eb8 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -524,7 +524,6 @@ void slb_save_contents(struct slb_entry *slb_ptr);
void slb_dump_contents(struct slb_entry *slb_ptr);
extern void slb_vmalloc_update(void);
-void preload_new_slb_context(unsigned long start, unsigned long sp);
#ifdef CONFIG_PPC_64S_HASH_MMU
void slb_set_size(u16 size);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index eb23966ac0a9..a45fe147868b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1897,8 +1897,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
return 0;
}
-void preload_new_slb_context(unsigned long start, unsigned long sp);
-
/*
* Set up a thread for executing a new program
*/
@@ -1906,9 +1904,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
{
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
-
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
- preload_new_slb_context(start, sp);
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index a57a25f06a21..c26a6f0c90fc 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -24,8 +24,6 @@ static inline bool stress_hpt(void)
void hpt_do_stress(unsigned long ea, unsigned long hpte_group);
-void slb_setup_new_exec(void);
-
void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 4e1e45420bd4..fb9dcf9ca599 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -150,8 +150,6 @@ static int hash__init_new_context(struct mm_struct *mm)
void hash__setup_new_exec(void)
{
slice_setup_new_exec();
-
- slb_setup_new_exec();
}
#else
static inline int hash__init_new_context(struct mm_struct *mm)
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 6b783552403c..7e053c561a09 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -328,94 +328,6 @@ static void preload_age(struct thread_info *ti)
ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
}
-void slb_setup_new_exec(void)
-{
- struct thread_info *ti = current_thread_info();
- struct mm_struct *mm = current->mm;
- unsigned long exec = 0x10000000;
-
- WARN_ON(irqs_disabled());
-
- /*
- * preload cache can only be used to determine whether a SLB
- * entry exists if it does not start to overflow.
- */
- if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
- return;
-
- hard_irq_disable();
-
- /*
- * We have no good place to clear the slb preload cache on exec,
- * flush_thread is about the earliest arch hook but that happens
- * after we switch to the mm and have already preloaded the SLBEs.
- *
- * For the most part that's probably okay to use entries from the
- * previous exec, they will age out if unused. It may turn out to
- * be an advantage to clear the cache before switching to it,
- * however.
- */
-
- /*
- * preload some userspace segments into the SLB.
- * Almost all 32 and 64bit PowerPC executables are linked at
- * 0x10000000 so it makes sense to preload this segment.
- */
- if (!is_kernel_addr(exec)) {
- if (preload_add(ti, exec))
- slb_allocate_user(mm, exec);
- }
-
- /* Libraries and mmaps. */
- if (!is_kernel_addr(mm->mmap_base)) {
- if (preload_add(ti, mm->mmap_base))
- slb_allocate_user(mm, mm->mmap_base);
- }
-
- /* see switch_slb */
- asm volatile("isync" : : : "memory");
-
- local_irq_enable();
-}
-
-void preload_new_slb_context(unsigned long start, unsigned long sp)
-{
- struct thread_info *ti = current_thread_info();
- struct mm_struct *mm = current->mm;
- unsigned long heap = mm->start_brk;
-
- WARN_ON(irqs_disabled());
-
- /* see above */
- if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
- return;
-
- hard_irq_disable();
-
- /* Userspace entry address. */
- if (!is_kernel_addr(start)) {
- if (preload_add(ti, start))
- slb_allocate_user(mm, start);
- }
-
- /* Top of stack, grows down. */
- if (!is_kernel_addr(sp)) {
- if (preload_add(ti, sp))
- slb_allocate_user(mm, sp);
- }
-
- /* Bottom of heap, grows up. */
- if (heap && !is_kernel_addr(heap)) {
- if (preload_add(ti, heap))
- slb_allocate_user(mm, heap);
- }
-
- /* see switch_slb */
- asm volatile("isync" : : : "memory");
-
- local_irq_enable();
-}
-
static void slb_cache_slbie_kernel(unsigned int index)
{
unsigned long slbie_data = get_paca()->slb_cache[index];
--
2.51.0
Since commit 4959aebba8c0 ("virtio-net: use mtu size as buffer length
for big packets"), when guest gso is off, the allocated size for big
packets is not MAX_SKB_FRAGS * PAGE_SIZE anymore but depends on
negotiated MTU. The number of allocated frags for big packets is stored
in vi->big_packets_num_skbfrags.
Because the host announced buffer length can be malicious (e.g. the host
vhost_net driver's get_rx_bufs is modified to announce incorrect
length), we need a check in virtio_net receive path. Currently, the
check is not adapted to the new change which can lead to NULL page
pointer dereference in the below while loop when receiving length that
is larger than the allocated one.
This commit fixes the received length check corresponding to the new
change.
Fixes: 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bui Quang Minh <minhquangbui99(a)gmail.com>
---
Changes in v7:
- Fix typos
- Link to v6: https://lore.kernel.org/netdev/20251028143116.4532-1-minhquangbui99@gmail.c…
Changes in v6:
- Fix the length check
- Link to v5: https://lore.kernel.org/netdev/20251024150649.22906-1-minhquangbui99@gmail.…
Changes in v5:
- Move the length check to receive_big
- Link to v4: https://lore.kernel.org/netdev/20251022160623.51191-1-minhquangbui99@gmail.…
Changes in v4:
- Remove unrelated changes, add more comments
- Link to v3: https://lore.kernel.org/netdev/20251021154534.53045-1-minhquangbui99@gmail.…
Changes in v3:
- Convert BUG_ON to WARN_ON_ONCE
- Link to v2: https://lore.kernel.org/netdev/20250708144206.95091-1-minhquangbui99@gmail.…
Changes in v2:
- Remove incorrect give_pages call
- Link to v1: https://lore.kernel.org/netdev/20250706141150.25344-1-minhquangbui99@gmail.…
---
drivers/net/virtio_net.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a757cbcab87f..421b9aa190a0 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
goto ok;
}
- /*
- * Verify that we can indeed put this data into a skb.
- * This is here to handle cases when the device erroneously
- * tries to receive more than is possible. This is usually
- * the case of a broken device.
- */
- if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
- net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
- dev_kfree_skb(skb);
- return NULL;
- }
BUG_ON(offset >= PAGE_SIZE);
while (len) {
unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
@@ -2107,9 +2096,19 @@ static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page = buf;
- struct sk_buff *skb =
- page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
+ struct sk_buff *skb;
+
+ /* Make sure that len does not exceed the size allocated in
+ * add_recvbuf_big.
+ */
+ if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) {
+ pr_debug("%s: rx error: len %u exceeds allocated size %lu\n",
+ dev->name, len,
+ (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE);
+ goto err;
+ }
+ skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
u64_stats_add(&stats->bytes, len - vi->hdr_len);
if (unlikely(!skb))
goto err;
--
2.43.0
According to documentation, the DP PHY on x1e80100 has another clock
called ref.
The current X Elite devices supported upstream work fine without this
clock, because the boot firmware leaves this clock enabled. But we should
not rely on that. Also, when it comes to power management, this clock
needs to be also disabled on suspend. So even though this change breaks
the ABI, it is needed in order to make we disable this clock on runtime
PM, when that is going to be enabled in the driver.
So rework the driver to allow different number of clocks, fix the
dt-bindings schema and add the clock to the DT node as well.
Signed-off-by: Abel Vesa <abel.vesa(a)linaro.org>
---
Changes in v5:
- Picked-up Bjorn's R-b tags.
- Replaced "parse" with "get" on clocks acquiring failure.
- Link to v4: https://lore.kernel.org/r/20251029-phy-qcom-edp-add-missing-refclk-v4-0-adb…
Changes in v4:
- Picked Dmitry's R-b tag for the driver patch
- Added x1e80100 substring to subject of dts patch
- Link to v3 (resend): https://lore.kernel.org/r/20251014-phy-qcom-edp-add-missing-refclk-v3-0-078…
Changes in v3 (resend)
- picked-up Krzysztof's R-b tag for bindings patch
- Link to v3: https://lore.kernel.org/r/20250909-phy-qcom-edp-add-missing-refclk-v3-0-4ec…
Changes in v3:
- Use dev_err_probe() on clocks parsing failure.
- Explain why the ABI break is necessary.
- Drop the extra 'clk' suffix from the clock name. So ref instead of
refclk.
- Link to v2: https://lore.kernel.org/r/20250903-phy-qcom-edp-add-missing-refclk-v2-0-d88…
Changes in v2:
- Fix schema by adding the minItems, as suggested by Krzysztof.
- Use devm_clk_bulk_get_all, as suggested by Konrad.
- Rephrase the commit messages to reflect the flexible number of clocks.
- Link to v1: https://lore.kernel.org/r/20250730-phy-qcom-edp-add-missing-refclk-v1-0-6f7…
---
Abel Vesa (3):
dt-bindings: phy: qcom-edp: Add missing clock for X Elite
phy: qcom: edp: Make the number of clocks flexible
arm64: dts: qcom: x1e80100: Add missing TCSR ref clock to the DP PHYs
.../devicetree/bindings/phy/qcom,edp-phy.yaml | 28 +++++++++++++++++++++-
arch/arm64/boot/dts/qcom/hamoa.dtsi | 12 ++++++----
drivers/phy/qualcomm/phy-qcom-edp.c | 16 ++++++-------
3 files changed, 43 insertions(+), 13 deletions(-)
---
base-commit: 131f3d9446a6075192cdd91f197989d98302faa6
change-id: 20250730-phy-qcom-edp-add-missing-refclk-5ab82828f8e7
Best regards,
--
Abel Vesa <abel.vesa(a)linaro.org>
According to documentation, the DP PHY on x1e80100 has another clock
called ref.
The current X Elite devices supported upstream work fine without this
clock, because the boot firmware leaves this clock enabled. But we should
not rely on that. Also, when it comes to power management, this clock
needs to be also disabled on suspend. So even though this change breaks
the ABI, it is needed in order to make we disable this clock on runtime
PM, when that is going to be enabled in the driver.
So rework the driver to allow different number of clocks, fix the
dt-bindings schema and add the clock to the DT node as well.
Signed-off-by: Abel Vesa <abel.vesa(a)linaro.org>
---
Changes in v4:
- Picked Dmitry's R-b tag for the driver patch
- Added x1e80100 substring to subject of dts patch
- Link to v3 (resend): https://lore.kernel.org/r/20251014-phy-qcom-edp-add-missing-refclk-v3-0-078…
Changes in v3 (resend)
- picked-up Krzysztof's R-b tag for bindings patch
- Link to v3: https://lore.kernel.org/r/20250909-phy-qcom-edp-add-missing-refclk-v3-0-4ec…
Changes in v3:
- Use dev_err_probe() on clocks parsing failure.
- Explain why the ABI break is necessary.
- Drop the extra 'clk' suffix from the clock name. So ref instead of
refclk.
- Link to v2: https://lore.kernel.org/r/20250903-phy-qcom-edp-add-missing-refclk-v2-0-d88…
Changes in v2:
- Fix schema by adding the minItems, as suggested by Krzysztof.
- Use devm_clk_bulk_get_all, as suggested by Konrad.
- Rephrase the commit messages to reflect the flexible number of clocks.
- Link to v1: https://lore.kernel.org/r/20250730-phy-qcom-edp-add-missing-refclk-v1-0-6f7…
---
Abel Vesa (3):
dt-bindings: phy: qcom-edp: Add missing clock for X Elite
phy: qcom: edp: Make the number of clocks flexible
arm64: dts: qcom: x1e80100: Add missing TCSR ref clock to the DP PHYs
.../devicetree/bindings/phy/qcom,edp-phy.yaml | 28 +++++++++++++++++++++-
arch/arm64/boot/dts/qcom/hamoa.dtsi | 12 ++++++----
drivers/phy/qualcomm/phy-qcom-edp.c | 16 ++++++-------
3 files changed, 43 insertions(+), 13 deletions(-)
---
base-commit: f9ba12abc5282bf992f9a9ae87ad814fd03a0270
change-id: 20250730-phy-qcom-edp-add-missing-refclk-5ab82828f8e7
Best regards,
--
Abel Vesa <abel.vesa(a)linaro.org>
The RFCOMM driver confuses the local and remote modem control signals,
which specifically means that the reported DTR and RTS state will
instead reflect the remote end (i.e. DSR and CTS).
This issue dates back to the original driver (and a follow-on update)
merged in 2002, which resulted in a non-standard implementation of
TIOCMSET that allowed controlling also the TS07.10 IC and DV signals by
mapping them to the RI and DCD input flags, while TIOCMGET failed to
return the actual state of DTR and RTS.
Note that the bogus control of input signals in tiocmset() is just
dead code as those flags will have been masked out by the tty layer
since 2003.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
Changes in v2
- fix a compilation issue discovered before sending v1 but never folded
into the actual patch...
net/bluetooth/rfcomm/tty.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 376ce6de84be..b783526ab588 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -643,8 +643,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
tty_port_tty_hangup(&dev->port, true);
dev->modem_status =
- ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
- ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) |
+ ((v24_sig & RFCOMM_V24_RTC) ? TIOCM_DSR : 0) |
+ ((v24_sig & RFCOMM_V24_RTR) ? TIOCM_CTS : 0) |
((v24_sig & RFCOMM_V24_IC) ? TIOCM_RI : 0) |
((v24_sig & RFCOMM_V24_DV) ? TIOCM_CD : 0);
}
@@ -1055,10 +1055,14 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
static int rfcomm_tty_tiocmget(struct tty_struct *tty)
{
struct rfcomm_dev *dev = tty->driver_data;
+ struct rfcomm_dlc *dlc = dev->dlc;
+ u8 v24_sig;
BT_DBG("tty %p dev %p", tty, dev);
- return dev->modem_status;
+ rfcomm_dlc_get_modem_status(dlc, &v24_sig);
+
+ return (v24_sig & (TIOCM_DTR | TIOCM_RTS)) | dev->modem_status;
}
static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
@@ -1071,23 +1075,15 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigne
rfcomm_dlc_get_modem_status(dlc, &v24_sig);
- if (set & TIOCM_DSR || set & TIOCM_DTR)
+ if (set & TIOCM_DTR)
v24_sig |= RFCOMM_V24_RTC;
- if (set & TIOCM_RTS || set & TIOCM_CTS)
+ if (set & TIOCM_RTS)
v24_sig |= RFCOMM_V24_RTR;
- if (set & TIOCM_RI)
- v24_sig |= RFCOMM_V24_IC;
- if (set & TIOCM_CD)
- v24_sig |= RFCOMM_V24_DV;
- if (clear & TIOCM_DSR || clear & TIOCM_DTR)
+ if (clear & TIOCM_DTR)
v24_sig &= ~RFCOMM_V24_RTC;
- if (clear & TIOCM_RTS || clear & TIOCM_CTS)
+ if (clear & TIOCM_RTS)
v24_sig &= ~RFCOMM_V24_RTR;
- if (clear & TIOCM_RI)
- v24_sig &= ~RFCOMM_V24_IC;
- if (clear & TIOCM_CD)
- v24_sig &= ~RFCOMM_V24_DV;
rfcomm_dlc_set_modem_status(dlc, v24_sig);
--
2.49.1
Hi.
I've check c-repro [1] on 6.1.y branch and found that repro still produce
the crash on 6.1.y. I notice that syzbot bisection result [2]
is incorrect: indeed, the hung was fixed by upstream commit b0ad381fa769
("btrfs: fix deadlock with fiemap and extent locking"). Also,
I saw CVE-2024-35784 [3][4] vulnerability, that have direct relation with that syzbot
report. Therefore, syzbot reproducer provided additional way to check for CVE-2024-35784.
I attempted to fix CVE-2024-35784 in stable 6.1.y (over v6.1.157), and
found that the initial fix commit b0ad381fa769 ("btrfs: fix deadlock with
fiemap and extent locking") introduced regressions [5][6].
IMHO here is the minimum patch series to eliminate CVE-2024-35784 from 6.1.y:
b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking") (Initial fix of the CVE-2024-35784)
a1a4a9ca77f1 ("btrfs: fix race between ordered extent completion and fiemap") (Fixes: b0ad381fa769)
978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap") (Fixes: b0ad381fa769)
1cab1375ba6d ("btrfs: reuse cloned extent buffer during fiemap to avoid re-allocations") (Optimization: 978b63f7464a)
53e24158684b ("btrfs: set start on clone before calling copy_extent_buffer_full") (Fixes: 1cab1375ba6d)
Required patches attached.
Only two patches in the series have minor backport modifications due to v6.1.157 btrfs code differences.
The remaining patches are identical to the upstream.
Regards,
AK
Reported-by: syzbot+f8217aae382555004877(a)syzkaller.appspotmail.com
----
[1] https://syzkaller.appspot.com/text?tag=ReproC&x=12b4c88b280000
[2] https://syzkaller.appspot.com/bug?extid=f8217aae382555004877
[3] https://lore.kernel.org/all/2024051704-CVE-2024-35784-6dec@gregkh/
[4] https://cve.org/CVERecord/?id=CVE-2024-35784
[5] https://lore.kernel.org/linux-btrfs/cover.1709202499.git.fdmanana@suse.com/
[6] https://lore.kernel.org/all/20240304211551.880347593@linuxfoundation.org/
Since npages is declared as int, shifting npages << PAGE_SHIFT
for a 2 GB+ scatter-gather list overflows before reaching
__unmap_single(), leading to incorrect unmapping.
A 2 GB region equals 524,288 pages. The expression
npages << PAGE_SHIFT yields 0x80000000, which exceeds
INT32_MAX (0x7FFFFFFF). Casting to size_t therefore produces
0xFFFFFFFF80000000, an overflow value that breaks the unmap
size calculation.
Fix the overflow by casting npages to size_t before the
PAGE_SHIFT left-shift.
Fixes: 89736a0ee81d ("Revert "iommu/amd: Remove the leftover of bypass support"")
Cc: stable(a)vger.kernel.org # 5.4
Signed-off-by: Jinhui Guo <guojinhui.liam(a)bytedance.com>
---
Hi,
We hit an IO_PAGE_FAULT on AMD with 5.4-stable when mapping a
2 GB scatter-gather list.
The fault is caused by an overflow in unmap_sg(): on stable-5.4
the SG-mmap path was never moved to the IOMMU framework, so the
bug exists only in this branch.
Regards,
Jinhui
drivers/iommu/amd_iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a30aac41af42..60872d7be52b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2682,7 +2682,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
dma_dom = to_dma_ops_domain(domain);
npages = sg_num_pages(dev, sglist, nelems);
- __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir);
+ __unmap_single(dma_dom, startaddr, (size_t)npages << PAGE_SHIFT, dir);
}
/*
--
2.20.1
Titas reports that the accelerometer sensor on their laptop only
works after a warm boot or unloading/reloading the amd-sfh kernel
module.
Presumably the sensor is in a bad state on cold boot and failing to
start, so explicitly stop it before starting.
Cc: stable(a)vger.kernel.org
Fixes: 93ce5e0231d79 ("HID: amd_sfh: Implement SFH1.1 functionality")
Reported-by: Titas <novatitas366(a)gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220670
Tested-by: Titas <novatitas366(a)gmail.com>
Signed-off-by: Mario Limonciello (AMD) <superm1(a)kernel.org>
---
drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
index 0a9b44ce4904e..b0bab2a1ddcc5 100644
--- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
+++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
@@ -194,6 +194,8 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
if (rc)
goto cleanup;
+ mp2_ops->stop(privdata, cl_data->sensor_idx[i]);
+ amd_sfh_wait_for_response(privdata, cl_data->sensor_idx[i], DISABLE_SENSOR);
writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0));
mp2_ops->start(privdata, info);
status = amd_sfh_wait_for_response
--
2.43.0
The ethtool tsconfig Netlink path can trigger a null pointer
dereference. A call chain such as:
tsconfig_prepare_data() ->
dev_get_hwtstamp_phylib() ->
vlan_hwtstamp_get() ->
generic_hwtstamp_get_lower() ->
generic_hwtstamp_ioctl_lower()
results in generic_hwtstamp_ioctl_lower() being called with
kernel_cfg->ifr as NULL.
The generic_hwtstamp_ioctl_lower() function does not expect a
NULL ifr and dereferences it, leading to a system crash.
Fix this by adding a NULL check for kernel_cfg->ifr in
generic_hwtstamp_get/set_lower(). If ifr is NULL, return
-EOPNOTSUPP to prevent the call to the legacy IOCTL helper.
Fixes: 6e9e2eed4f39 ("net: ethtool: Add support for tsconfig command to get/set hwtstamp config")
Closes: https://lore.kernel.org/lkml/cd6a7056-fa6d-43f8-b78a-f5e811247ba8@linux.dev…
Signed-off-by: Jiaming Zhang <r772577952(a)gmail.com>
---
net/core/dev_ioctl.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index ad54b12d4b4c..39eaf6ba981a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -474,6 +474,10 @@ int generic_hwtstamp_get_lower(struct net_device *dev,
return err;
}
+ /* Netlink path with unconverted driver */
+ if (!kernel_cfg->ifr)
+ return -EOPNOTSUPP;
+
/* Legacy path: unconverted lower driver */
return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg);
}
@@ -498,6 +502,10 @@ int generic_hwtstamp_set_lower(struct net_device *dev,
return err;
}
+ /* Netlink path with unconverted driver */
+ if (!kernel_cfg->ifr)
+ return -EOPNOTSUPP;
+
/* Legacy path: unconverted lower driver */
return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg);
}
--
2.34.1
drm_sched_job_init() is just racing when checking an entity's runqueue, without
taking the proper spinlock.
Add the lock.
Cc: stable(a)vger.kernel.org # 6.7+
Fixes: 56e449603f0a ("drm/sched: Convert the GPU scheduler to variable number of run-queues")
Signed-off-by: Philipp Stanner <phasta(a)kernel.org>
---
drivers/gpu/drm/scheduler/sched_main.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 7f938f491b6f..30028054385f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -799,7 +799,12 @@ int drm_sched_job_init(struct drm_sched_job *job,
u32 credits, void *owner,
uint64_t drm_client_id)
{
- if (!entity->rq) {
+ struct drm_sched_rq *rq;
+
+ spin_lock(&entity->lock);
+ rq = entity->rq;
+ spin_unlock(&entity->lock);
+ if (!rq) {
/* This will most likely be followed by missing frames
* or worse--a blank screen--leave a trail in the
* logs, so this can be debugged easier.
--
2.49.0
A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
and commit 7ef4c19d245f
("smackfs: restrict bytes count in smackfs write functions")
Found via static analysis and code review.
Fixes: 183238ffb886 ("misc: eeprom/idt_89hpesx: Switch to memdup_user_nul() helper")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/misc/eeprom/idt_89hpesx.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
index 60c42170d147..b2e771bfc6da 100644
--- a/drivers/misc/eeprom/idt_89hpesx.c
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -907,6 +907,9 @@ static ssize_t idt_dbgfs_csr_write(struct file *filep, const char __user *ubuf,
if (*offp)
return 0;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
/* Copy data from User-space */
buf = memdup_user_nul(ubuf, count);
if (IS_ERR(buf))
--
2.39.5 (Apple Git-154)
Backport commit: 094ee6017ea0 ("bonding: check xdp prog when set bond
mode") to 6.12.y to fix a bond issue.
It depends on commit: 22ccb684c1ca ("bonding: return detailed
error when loading native XDP fails)
In order to make a clean backport on stable kernel, backport 2 commits.
Hangbin Liu (1):
bonding: return detailed error when loading native XDP fails
Wang Liang (1):
bonding: check xdp prog when set bond mode
drivers/net/bonding/bond_main.c | 11 +++++++----
drivers/net/bonding/bond_options.c | 3 +++
include/net/bonding.h | 1 +
3 files changed, 11 insertions(+), 4 deletions(-)
--
2.17.1
A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
Found via static analysis and code review.
Fixes: 3783225130f0 ("powerpc/pseries: use memdup_user_nul")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
arch/powerpc/platforms/pseries/reconfig.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 599bd2c78514..b6bc1d8b2207 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -366,6 +366,9 @@ static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t coun
if (rv)
return rv;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
kbuf = memdup_user_nul(buf, count);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
--
2.39.5 (Apple Git-154)
The code did not check the return value of usbnet_get_endpoints.
Add checks and return the error if it fails to transfer the error.
Found via static anlaysis and this is similar to
commit 07161b2416f7 ("sr9800: Add check for usbnet_get_endpoints").
Fixes: 933a27d39e0e ("USB: asix - Add AX88178 support and many other changes")
Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
- v1:http://lore.kernel.org/all/20250830103743.2118777-1-linmq006@gmail.com
changes in v2:
- fix the blank line.
- update message to clarify how this is detected
- add Cc: stable
---
drivers/net/usb/asix_devices.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 85bd5d845409..232bbd79a4de 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
int i;
unsigned long gpio_bits = dev->driver_info->data;
- usbnet_get_endpoints(dev,intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ goto out;
/* Toggle the GPIOs in a manufacturer/model specific way */
for (i = 2; i >= 0; i--) {
@@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
dev->driver_priv = priv;
- usbnet_get_endpoints(dev, intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ return ret;
/* Maybe the boot loader passed the MAC address via device tree */
if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) {
@@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
int ret;
u8 buf[ETH_ALEN] = {0};
- usbnet_get_endpoints(dev,intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ return ret;
/* Get the MAC address */
ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
--
2.39.5 (Apple Git-154)
Here are various fixes from Paolo, addressing very occasional issues on
the sending side:
- Patch 1: drop an optimisation that could lead to timeout in case of
race conditions. A fix for up to v5.11.
- Patch 2: fix stream corruption under very specific conditions. A fix
for up to v5.13.
- Patch 3: restore MPTCP-level zero window probe after a recent fix. A
fix for up to v5.16.
- Patch 4: new MIB counter to track MPTCP-level zero windows probe to
help catching issues similar to the one fixed by the previous patch.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Paolo Abeni (4):
mptcp: drop bogus optimization in __mptcp_check_push()
mptcp: fix MSG_PEEK stream corruption
mptcp: restore window probe
mptcp: zero window probe mib
net/mptcp/mib.c | 1 +
net/mptcp/mib.h | 1 +
net/mptcp/protocol.c | 57 +++++++++++++++++++++++++++++++++-------------------
net/mptcp/protocol.h | 2 +-
4 files changed, 39 insertions(+), 22 deletions(-)
---
base-commit: 210b35d6a7ea415494ce75490c4b43b4e717d935
change-id: 20251027-net-mptcp-send-timeout-7fc1474fd849
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
The patch titled
Subject: kasan: unpoison vms[area] addresses with a common tag
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kasan-unpoison-vms-addresses-with-a-common-tag.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Subject: kasan: unpoison vms[area] addresses with a common tag
Date: Wed, 29 Oct 2025 19:06:03 +0000
The problem presented here is related to NUMA systems and tag-based KASAN
modes - software and hardware ones. It can be explained in the following
points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits the
tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the first
chunk.
5. Thus, the subsequent chunks need to have their tag set to match
that of the first chunk.
Unpoison all vms[]->addr memory and pointers with the same tag to resolve
the mismatch.
Link: https://lkml.kernel.org/r/932121edc75be8e2038d64ecb4853df2e2b258df.17617636…
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Bill Wendling <morbo(a)google.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: FUJITA Tomonori <fujita.tomonori(a)gmail.com>
Cc: Guilherme Giacomo Simoes <trintaeoitogc(a)gmail.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Jeremy Linton <jeremy.linton(a)arm.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Justin Stitt <justinstitt(a)google.com>
Cc: Kalesh Singh <kaleshsingh(a)google.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: levi.yun <yeoreum.yun(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Marco Elver <elver(a)google.com>
Cc: Marc Rutland <mark.rutland(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mostafa Saleh <smostafa(a)google.com>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Pankaj Gupta <pankaj.gupta(a)amd.com>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Samuel Holland <samuel.holland(a)sifive.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Uros Bizjak <ubizjak(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Will Deacon <will(a)kernel.org>
Cc: Xin Li (Intel) <xin(a)zytor.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org> [6.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kasan/tags.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
--- a/mm/kasan/tags.c~kasan-unpoison-vms-addresses-with-a-common-tag
+++ a/mm/kasan/tags.c
@@ -148,12 +148,20 @@ void __kasan_save_free_info(struct kmem_
save_stack_info(cache, object, 0, true);
}
+/*
+ * A tag mismatch happens when calculating per-cpu chunk addresses, because
+ * they all inherit the tag from vms[0]->addr, even when nr_vms is bigger
+ * than 1. This is a problem because all the vms[]->addr come from separate
+ * allocations and have different tags so while the calculated address is
+ * correct the tag isn't.
+ */
void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
{
int area;
for (area = 0 ; area < nr_vms ; area++) {
kasan_poison(vms[area]->addr, vms[area]->size,
- arch_kasan_get_tag(vms[area]->addr), false);
+ arch_kasan_get_tag(vms[0]->addr), false);
+ arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
}
}
_
Patches currently in -mm which might be from maciej.wieczor-retman(a)intel.com are
kasan-unpoison-pcpu-chunks-with-base-address-tag.patch
kasan-unpoison-vms-addresses-with-a-common-tag.patch
The patch titled
Subject: kasan: unpoison pcpu chunks with base address tag
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kasan-unpoison-pcpu-chunks-with-base-address-tag.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Subject: kasan: unpoison pcpu chunks with base address tag
Date: Wed, 29 Oct 2025 19:05:49 +0000
The problem presented here is related to NUMA systems and tag-based KASAN
modes - software and hardware ones. It can be explained in the following
points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits the
tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the first
chunk.
5. Thus, the subsequent chunks need to have their tag set to match
that of the first chunk.
Refactor code by moving it into a helper in preparation for the actual
fix.
Link: https://lkml.kernel.org/r/fbce40a59b0a22a5735cb6e9b95c5a45a34b23cb.17617636…
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Bill Wendling <morbo(a)google.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: FUJITA Tomonori <fujita.tomonori(a)gmail.com>
Cc: Guilherme Giacomo Simoes <trintaeoitogc(a)gmail.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Jeremy Linton <jeremy.linton(a)arm.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Justin Stitt <justinstitt(a)google.com>
Cc: Kalesh Singh <kaleshsingh(a)google.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: levi.yun <yeoreum.yun(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Marco Elver <elver(a)google.com>
Cc: Marc Rutland <mark.rutland(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mostafa Saleh <smostafa(a)google.com>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Pankaj Gupta <pankaj.gupta(a)amd.com>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Samuel Holland <samuel.holland(a)sifive.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Uros Bizjak <ubizjak(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Will Deacon <will(a)kernel.org>
Cc: Xin Li (Intel) <xin(a)zytor.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org> [6.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/kasan.h | 10 ++++++++++
mm/kasan/tags.c | 11 +++++++++++
mm/vmalloc.c | 4 +---
3 files changed, 22 insertions(+), 3 deletions(-)
--- a/include/linux/kasan.h~kasan-unpoison-pcpu-chunks-with-base-address-tag
+++ a/include/linux/kasan.h
@@ -614,6 +614,13 @@ static __always_inline void kasan_poison
__kasan_poison_vmalloc(start, size);
}
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms);
+static __always_inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ if (kasan_enabled())
+ __kasan_unpoison_vmap_areas(vms, nr_vms);
+}
+
#else /* CONFIG_KASAN_VMALLOC */
static inline void kasan_populate_early_vm_area_shadow(void *start,
@@ -638,6 +645,9 @@ static inline void *kasan_unpoison_vmall
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
{ }
+static inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{ }
+
#endif /* CONFIG_KASAN_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
--- a/mm/kasan/tags.c~kasan-unpoison-pcpu-chunks-with-base-address-tag
+++ a/mm/kasan/tags.c
@@ -18,6 +18,7 @@
#include <linux/static_key.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include "kasan.h"
#include "../slab.h"
@@ -146,3 +147,13 @@ void __kasan_save_free_info(struct kmem_
{
save_stack_info(cache, object, 0, true);
}
+
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ int area;
+
+ for (area = 0 ; area < nr_vms ; area++) {
+ kasan_poison(vms[area]->addr, vms[area]->size,
+ arch_kasan_get_tag(vms[area]->addr), false);
+ }
+}
--- a/mm/vmalloc.c~kasan-unpoison-pcpu-chunks-with-base-address-tag
+++ a/mm/vmalloc.c
@@ -4870,9 +4870,7 @@ retry:
* With hardware tag-based KASAN, marking is skipped for
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
*/
- for (area = 0; area < nr_vms; area++)
- vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
- vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
+ kasan_unpoison_vmap_areas(vms, nr_vms);
kfree(vas);
return vms;
_
Patches currently in -mm which might be from maciej.wieczor-retman(a)intel.com are
kasan-unpoison-pcpu-chunks-with-base-address-tag.patch
kasan-unpoison-vms-addresses-with-a-common-tag.patch
Hi
We got in Debian a request to backport 3c591faadd8a ("Reapply "Revert
drm/amd/display: Enable Freesync Video Mode by default"") for the
kernel in Debian bookworm, based on 6.1.y stable series.
https://bugs.debian.org/1119232
While looking at he request, I noticed that the series of commits had
a bit of a convuluted history. AFAICT the story began with:
de05abe6b9d0 ("drm/amd/display: Enable Freesync Video Mode by
default"), this landed in 5.18-rc1 (and backported to v6.1.5,
v6.0.19).
This was then reverted with 4243c84aa082 ("Revert "drm/amd/display:
Enable Freesync Video Mode by default""), which landed in v6.3-rc1
(and in turn was backported to v6.1.53).
So far we are in sync.
The above was then reverted again, via 11b92df8a2f7 ("Revert "Revert
drm/amd/display: Enable Freesync Video Mode by default"") applied in
v6.5-rc1 and as well backported to v6.1.53 (so still in sync).
Now comes were we are diverging: 3c591faadd8a ("Reapply "Revert
drm/amd/display: Enable Freesync Video Mode by default"") got applied
later on, landing in v6.9-rc1 but *not* in 6.1.y anymore.
I suspect this one was not applied to 6.1.y because in meanwhile there
was a conflict to cherry-pick it cleanly due to context changes due to
3e094a287526 ("drm/amd/display: Use drm_connector in
create_stream_for_sink").
If this is correct, then the 6.1.y series can be brough in sync with
cherry-picking the commit and adjust the context around the change.
I'm attaching the proposed change.
Alex in particular, does that make sense?
Regards,
Salvatore
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
The problem presented here is related to NUMA systems and tag-based
KASAN modes - software and hardware ones. It can be explained in the
following points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits
the tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the
first chunk.
5. Thus, the subsequent chunks need to have their tag set to
match that of the first chunk.
Unpoison all vms[]->addr memory and pointers with the same tag to
resolve the mismatch.
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
---
Changelog v6:
- Add Baoquan's tested-by tag.
- Move patch to the beginning of the series as it is a fix.
- Add fixes tag.
mm/kasan/tags.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index ecc17c7c675a..c6b40cbffae3 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -148,12 +148,20 @@ void __kasan_save_free_info(struct kmem_cache *cache, void *object)
save_stack_info(cache, object, 0, true);
}
+/*
+ * A tag mismatch happens when calculating per-cpu chunk addresses, because
+ * they all inherit the tag from vms[0]->addr, even when nr_vms is bigger
+ * than 1. This is a problem because all the vms[]->addr come from separate
+ * allocations and have different tags so while the calculated address is
+ * correct the tag isn't.
+ */
void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
{
int area;
for (area = 0 ; area < nr_vms ; area++) {
kasan_poison(vms[area]->addr, vms[area]->size,
- arch_kasan_get_tag(vms[area]->addr), false);
+ arch_kasan_get_tag(vms[0]->addr), false);
+ arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
}
}
--
2.51.0
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
The problem presented here is related to NUMA systems and tag-based
KASAN modes - software and hardware ones. It can be explained in the
following points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits
the tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the
first chunk.
5. Thus, the subsequent chunks need to have their tag set to
match that of the first chunk.
Refactor code by moving it into a helper in preparation for the actual
fix.
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
---
Changelog v6:
- Add Baoquan's tested-by tag.
- Move patch to the beginning of the series as it is a fix.
- Move the refactored code to tags.c because both software and hardware
modes compile it.
- Add fixes tag.
Changelog v4:
- Redo the patch message numbered list.
- Do the refactoring in this patch and move additions to the next new
one.
Changelog v3:
- Remove last version of this patch that just resets the tag on
base_addr and add this patch that unpoisons all areas with the same
tag instead.
include/linux/kasan.h | 10 ++++++++++
mm/kasan/tags.c | 11 +++++++++++
mm/vmalloc.c | 4 +---
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d12e1a5f5a9a..b00849ea8ffd 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -614,6 +614,13 @@ static __always_inline void kasan_poison_vmalloc(const void *start,
__kasan_poison_vmalloc(start, size);
}
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms);
+static __always_inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ if (kasan_enabled())
+ __kasan_unpoison_vmap_areas(vms, nr_vms);
+}
+
#else /* CONFIG_KASAN_VMALLOC */
static inline void kasan_populate_early_vm_area_shadow(void *start,
@@ -638,6 +645,9 @@ static inline void *kasan_unpoison_vmalloc(const void *start,
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
{ }
+static inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{ }
+
#endif /* CONFIG_KASAN_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index b9f31293622b..ecc17c7c675a 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -18,6 +18,7 @@
#include <linux/static_key.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include "kasan.h"
#include "../slab.h"
@@ -146,3 +147,13 @@ void __kasan_save_free_info(struct kmem_cache *cache, void *object)
{
save_stack_info(cache, object, 0, true);
}
+
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ int area;
+
+ for (area = 0 ; area < nr_vms ; area++) {
+ kasan_poison(vms[area]->addr, vms[area]->size,
+ arch_kasan_get_tag(vms[area]->addr), false);
+ }
+}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 798b2ed21e46..934c8bfbcebf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4870,9 +4870,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
* With hardware tag-based KASAN, marking is skipped for
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
*/
- for (area = 0; area < nr_vms; area++)
- vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
- vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
+ kasan_unpoison_vmap_areas(vms, nr_vms);
kfree(vas);
return vms;
--
2.51.0
Hello.
We have observed a huge latency increase using `fork()` after ingesting the CVE-2025-38085 fix which leads to the commit `1013af4f585f: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race`. On large machines with 1.5TB of memory with 196 cores, we identified mmapping of 1.2TB of shared memory and forking itself dozens or hundreds of times we see a increase of execution times of a factor of 4. The reproducer is at the end of the email.
Comparing the a kernel without this patch with a kernel with this patch applied when spawning 1000 children we see those execution times:
Patched kernel:
$ time make stress
...
real 0m11.275s
user 0m0.177s
sys 0m23.905s
Original kernel :
$ time make stress
...real 0m2.475s
user 0m1.398s
sys 0m2.501s
The patch in question: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
My observation/assumption is:
each child touches 100 random pages and despawns
on each despawn `huge_pmd_unshare()` is called
each call to `huge_pmd_unshare()` syncrhonizes all threads using `tlb_remove_table_sync_one()` leading to the regression
I'm happy to provide more information.
Thank you
Stanislav Uschakow
=== Reproducer ===
Setup:
#!/bin/bash
echo "Setting up hugepages for reproduction..."
# hugepages (1.2TB / 2MB = 614400 pages)
REQUIRED_PAGES=614400
# Check current hugepage allocation
CURRENT_PAGES=$(cat /proc/sys/vm/nr_hugepages)
echo "Current hugepages: $CURRENT_PAGES"
if [ "$CURRENT_PAGES" -lt "$REQUIRED_PAGES" ]; then
echo "Allocating $REQUIRED_PAGES hugepages..."
echo $REQUIRED_PAGES | sudo tee /proc/sys/vm/nr_hugepages
ALLOCATED=$(cat /proc/sys/vm/nr_hugepages)
echo "Allocated hugepages: $ALLOCATED"
if [ "$ALLOCATED" -lt "$REQUIRED_PAGES" ]; then
echo "Warning: Could not allocate all required hugepages"
echo "Available: $ALLOCATED, Required: $REQUIRED_PAGES"
fi
fi
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo -e "\nHugepage information:"
cat /proc/meminfo | grep -i huge
echo -e "\nSetup complete. You can now run the reproduction test."
Makefile:
CXX = gcc
CXXFLAGS = -O2 -Wall
TARGET = hugepage_repro
SOURCE = hugepage_repro.c
$(TARGET): $(SOURCE)
$(CXX) $(CXXFLAGS) -o $(TARGET) $(SOURCE)
clean:
rm -f $(TARGET)
setup:
chmod +x setup_hugepages.sh
./setup_hugepages.sh
test: $(TARGET)
./$(TARGET) 20 3
stress: $(TARGET)
./$(TARGET) 1000 1
.PHONY: clean setup test stress
hugepage_repro.c:
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
#define HUGEPAGE_SIZE (2 * 1024 * 1024) // 2MB
#define TOTAL_SIZE (1200ULL * 1024 * 1024 * 1024) // 1.2TB
#define NUM_HUGEPAGES (TOTAL_SIZE / HUGEPAGE_SIZE)
void* create_hugepage_mapping() {
void* addr = mmap(NULL, TOTAL_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
if (addr == MAP_FAILED) {
perror("mmap hugepages failed");
exit(1);
}
return addr;
}
void touch_random_pages(void* addr, int num_touches) {
char* base = (char*)addr;
for (int i = 0; i < num_touches; ++i) {
size_t offset = (rand() % NUM_HUGEPAGES) * HUGEPAGE_SIZE;
volatile char val = base[offset];
(void)val;
}
}
void child_process(void* shared_mem, int child_id) {
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
touch_random_pages(shared_mem, 100);
clock_gettime(CLOCK_MONOTONIC, &end);
long duration = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_nsec - start.tv_nsec) / 1000;
printf("Child %d completed in %ld μs\n", child_id, duration);
}
int main(int argc, char* argv[]) {
int num_processes = argc > 1 ? atoi(argv[1]) : 50;
int iterations = argc > 2 ? atoi(argv[2]) : 5;
printf("Creating %lluGB hugepage mapping...\n", TOTAL_SIZE / (1024*1024*1024));
void* shared_mem = create_hugepage_mapping();
for (int iter = 0; iter < iterations; ++iter) {
printf("\nIteration %d: Forking %d processes\n", iter + 1, num_processes);
pid_t children[num_processes];
struct timespec iter_start, iter_end;
clock_gettime(CLOCK_MONOTONIC, &iter_start);
for (int i = 0; i < num_processes; ++i) {
pid_t pid = fork();
if (pid == 0) {
child_process(shared_mem, i);
exit(0);
} else if (pid > 0) {
children[i] = pid;
}
}
for (int i = 0; i < num_processes; ++i) {
waitpid(children[i], NULL, 0);
}
clock_gettime(CLOCK_MONOTONIC, &iter_end);
long iter_duration = (iter_end.tv_sec - iter_start.tv_sec) * 1000 +
(iter_end.tv_nsec - iter_start.tv_nsec) / 1000000;
printf("Iteration completed in %ld ms\n", iter_duration);
}
munmap(shared_mem, TOTAL_SIZE);
return 0;
}
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
Evaluaciones Psicométricas para RR.HH.
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Mejora tus procesos de selección con evaluaciones psicométricas fáciles y confiables.
Hola, ,
Sabemos que encontrar al candidato ideal va más allá del currículum. Por eso quiero contarte brevemente sobre PsicoSmart, una plataforma que ayuda a equipos de RR.HH. a evaluar talento con pruebas psicométricas rápidas, confiables y fáciles de aplicar.
Con PsicoSmart puedes:
Aplicar evaluaciones psicométricas 100% en línea.
Elegir entre más de 31 pruebas psicométricas
Generar reportes automáticos, visuales y fáciles de interpretar.
Comparar resultados entre candidatos en segundos.
Ahorrar horas valiosas del proceso de selección.
Si estás buscando mejorar tus contrataciones, te lo recomiendo muchísimo. Si quieres conocer más puedes responder este correo o simplemente contactarme, mis datos están abajo.
Saludos,
-----------------------------
Atte.: Valeria Pérez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewispppseup">click aquí</a>
Fix a memory leak in gpi_peripheral_config() where the original memory
pointed to by gchan->config could be lost if krealloc() fails.
The issue occurs when:
1. gchan->config points to previously allocated memory
2. krealloc() fails and returns NULL
3. The function directly assigns NULL to gchan->config, losing the
reference to the original memory
4. The original memory becomes unreachable and cannot be freed
Fix this by using a temporary variable to hold the krealloc() result
and only updating gchan->config when the allocation succeeds.
Found via static analysis and code review.
Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/dma/qcom/gpi.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 8e87738086b2..8908b7c71900 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1605,14 +1605,16 @@ static int
gpi_peripheral_config(struct dma_chan *chan, struct dma_slave_config *config)
{
struct gchan *gchan = to_gchan(chan);
+ void *new_config;
if (!config->peripheral_config)
return -EINVAL;
- gchan->config = krealloc(gchan->config, config->peripheral_size, GFP_NOWAIT);
- if (!gchan->config)
+ new_config = krealloc(gchan->config, config->peripheral_size, GFP_NOWAIT);
+ if (!new_config)
return -ENOMEM;
+ gchan->config = new_config;
memcpy(gchan->config, config->peripheral_config, config->peripheral_size);
return 0;
--
2.39.5 (Apple Git-154)
Sasha has also maintaining stable branch in conjunction with Greg
since cb5d21946d2a2f ("MAINTAINERS: Add Sasha as a stable branch
maintainer"). Mention him in 2.Process.rst.
Cc: stable(a)vger.kernel.org
Signed-off-by: Bagas Sanjaya <bagasdotme(a)gmail.com>
---
Documentation/process/2.Process.rst | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst
index 8e63d171767db8..7bd41838a5464f 100644
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -99,8 +99,10 @@ go out with a handful of known regressions, though, hopefully, none of them
are serious.
Once a stable release is made, its ongoing maintenance is passed off to the
-"stable team," currently Greg Kroah-Hartman. The stable team will release
-occasional updates to the stable release using the 9.x.y numbering scheme.
+"stable team," currently consists of Greg Kroah-Hartman and Sasha Levin. The
+stable team will release occasional updates to the stable release using the
+9.x.y numbering scheme.
+
To be considered for an update release, a patch must (1) fix a significant
bug, and (2) already be merged into the mainline for the next development
kernel. Kernels will typically receive stable updates for a little more
base-commit: 0aa760051f4eb3d3bcd812125557bd09629a71e8
--
An old man doll... just what I always wanted! - Clara
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from
log base 2 of the allocation size. This is not correct if the
allocation size is smaller than a page, and yields a negative value
for the order as seen below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear)
TCP bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is
smaller than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear)
TCP bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org # v5.4+
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
---
mm/mm_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..7712d887b696 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename,
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
--
2.51.1.851.g4ebd6896fd-goog
Since commit 4959aebba8c0 ("virtio-net: use mtu size as buffer length
for big packets"), when guest gso is off, the allocated size for big
packets is not MAX_SKB_FRAGS * PAGE_SIZE anymore but depends on
negotiated MTU. The number of allocated frags for big packets is stored
in vi->big_packets_num_skbfrags.
Because the host announced buffer length can be malicious (e.g. the host
vhost_net driver's get_rx_bufs is modified to announce incorrect
length), we need a check in virtio_net receive path. Currently, the
check is not adapted to the new change which can lead to NULL page
pointer dereference in the below while loop when receiving length that
is larger than the allocated one.
This commit fixes the received length check corresponding to the new
change.
Fixes: 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bui Quang Minh <minhquangbui99(a)gmail.com>
---
Changes in v6:
- Fix the length check
- Link to v5: https://lore.kernel.org/netdev/20251024150649.22906-1-minhquangbui99@gmail.…
Changes in v5:
- Move the length check to receive_big
- Link to v4: https://lore.kernel.org/netdev/20251022160623.51191-1-minhquangbui99@gmail.…
Changes in v4:
- Remove unrelated changes, add more comments
- Link to v3: https://lore.kernel.org/netdev/20251021154534.53045-1-minhquangbui99@gmail.…
Changes in v3:
- Convert BUG_ON to WARN_ON_ONCE
- Link to v2: https://lore.kernel.org/netdev/20250708144206.95091-1-minhquangbui99@gmail.…
Changes in v2:
- Remove incorrect give_pages call
- Link to v1: https://lore.kernel.org/netdev/20250706141150.25344-1-minhquangbui99@gmail.…
---
drivers/net/virtio_net.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a757cbcab87f..461ad1019c37 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
goto ok;
}
- /*
- * Verify that we can indeed put this data into a skb.
- * This is here to handle cases when the device erroneously
- * tries to receive more than is possible. This is usually
- * the case of a broken device.
- */
- if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
- net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
- dev_kfree_skb(skb);
- return NULL;
- }
BUG_ON(offset >= PAGE_SIZE);
while (len) {
unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
@@ -2107,9 +2096,19 @@ static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page = buf;
- struct sk_buff *skb =
- page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
+ struct sk_buff *skb;
+
+ /* Make sure that len does not exceed the allocated size in
+ * add_recvbuf_big.
+ */
+ if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) {
+ pr_debug("%s: rx error: len %u exceeds allocate size %lu\n",
+ dev->name, len,
+ (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE);
+ goto err;
+ }
+ skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
u64_stats_add(&stats->bytes, len - vi->hdr_len);
if (unlikely(!skb))
goto err;
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6f40e50ceb99fc8ef37e5c56e2ec1d162733fef0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102920-mace-herbal-edee@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6f40e50ceb99fc8ef37e5c56e2ec1d162733fef0 Mon Sep 17 00:00:00 2001
From: Qianchang Zhao <pioooooooooip(a)gmail.com>
Date: Wed, 22 Oct 2025 15:27:47 +0900
Subject: [PATCH] ksmbd: transport_ipc: validate payload size before reading
handle
handle_response() dereferences the payload as a 4-byte handle without
verifying that the declared payload size is at least 4 bytes. A malformed
or truncated message from ksmbd.mountd can lead to a 4-byte read past the
declared payload size. Validate the size before dereferencing.
This is a minimal fix to guard the initial handle read.
Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
Cc: stable(a)vger.kernel.org
Reported-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Signed-off-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 46f87fd1ce1c..2c08cccfa680 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -263,10 +263,16 @@ static void ipc_msg_handle_free(int handle)
static int handle_response(int type, void *payload, size_t sz)
{
- unsigned int handle = *(unsigned int *)payload;
+ unsigned int handle;
struct ipc_msg_table_entry *entry;
int ret = 0;
+ /* Prevent 4-byte read beyond declared payload size */
+ if (sz < sizeof(unsigned int))
+ return -EINVAL;
+
+ handle = *(unsigned int *)payload;
+
ipc_update_last_active();
down_read(&ipc_msg_table_lock);
hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) {
When the SLAB_STORE_USER debug flag is used, any metadata placed after
the original kmalloc request size (orig_size) is not properly aligned
on 64-bit architectures because its type is unsigned int. When both KASAN
and SLAB_STORE_USER are enabled, kasan_alloc_meta is misaligned.
Note that 64-bit architectures without HAVE_EFFICIENT_UNALIGNED_ACCESS
are assumed to require 64-bit accesses to be 64-bit aligned.
See HAVE_64BIT_ALIGNED_ACCESS and commit adab66b71abf ("Revert:
"ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS"") for more details.
Because not all architectures support unaligned memory accesses,
ensure that all metadata (track, orig_size, kasan_{alloc,free}_meta)
in a slab object are word-aligned. struct track, kasan_{alloc,free}_meta
are aligned by adding __aligned(__alignof__(unsigned long)).
For orig_size, use ALIGN(sizeof(unsigned int), sizeof(unsigned long)) to
make clear that its size remains unsigned int but it must be aligned to
a word boundary. On 64-bit architectures, this reserves 8 bytes for
orig_size, which is acceptable since kmalloc's original request size
tracking is intended for debugging rather than production use.
Cc: stable(a)vger.kernel.org
Fixes: 6edf2576a6cc ("mm/slub: enable debugging memory wasting of kmalloc")
Acked-by: Andrey Konovalov <andreyknvl(a)gmail.com>
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
v1 -> v2:
- Added Andrey's Acked-by.
- Added references to HAVE_64BIT_ALIGNED_ACCESS and the commit that
resurrected it.
- Used __alignof__() instead of sizeof(), as suggested by Pedro (off-list).
Note: either __alignof__ or sizeof() produces the exactly same mm/slub.o
files, so there's no functional difference.
Thanks!
mm/kasan/kasan.h | 4 ++--
mm/slub.c | 16 +++++++++++-----
2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 129178be5e64..b86b6e9f456a 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -265,7 +265,7 @@ struct kasan_alloc_meta {
struct kasan_track alloc_track;
/* Free track is stored in kasan_free_meta. */
depot_stack_handle_t aux_stack[2];
-};
+} __aligned(__alignof__(unsigned long));
struct qlist_node {
struct qlist_node *next;
@@ -289,7 +289,7 @@ struct qlist_node {
struct kasan_free_meta {
struct qlist_node quarantine_link;
struct kasan_track free_track;
-};
+} __aligned(__alignof__(unsigned long));
#endif /* CONFIG_KASAN_GENERIC */
diff --git a/mm/slub.c b/mm/slub.c
index a585d0ac45d4..462a39d57b3a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -344,7 +344,7 @@ struct track {
int cpu; /* Was running on cpu */
int pid; /* Pid context */
unsigned long when; /* When did the operation occur */
-};
+} __aligned(__alignof__(unsigned long));
enum track_item { TRACK_ALLOC, TRACK_FREE };
@@ -1196,7 +1196,7 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
off += 2 * sizeof(struct track);
if (slub_debug_orig_size(s))
- off += sizeof(unsigned int);
+ off += ALIGN(sizeof(unsigned int), __alignof__(unsigned long));
off += kasan_metadata_size(s, false);
@@ -1392,7 +1392,8 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
off += 2 * sizeof(struct track);
if (s->flags & SLAB_KMALLOC)
- off += sizeof(unsigned int);
+ off += ALIGN(sizeof(unsigned int),
+ __alignof__(unsigned long));
}
off += kasan_metadata_size(s, false);
@@ -7820,9 +7821,14 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
*/
size += 2 * sizeof(struct track);
- /* Save the original kmalloc request size */
+ /*
+ * Save the original kmalloc request size.
+ * Although the request size is an unsigned int,
+ * make sure that is aligned to word boundary.
+ */
if (flags & SLAB_KMALLOC)
- size += sizeof(unsigned int);
+ size += ALIGN(sizeof(unsigned int),
+ __alignof__(unsigned long));
}
#endif
--
2.43.0
From: Ian Abbott <abbotti(a)mev.co.uk>
commit 3cd212e895ca2d58963fdc6422502b10dd3966bb upstream.
syzbot reports a KMSAN kernel-infoleak in `do_insn_ioctl()`. A kernel
buffer is allocated to hold `insn->n` samples (each of which is an
`unsigned int`). For some instruction types, `insn->n` samples are
copied back to user-space, unless an error code is being returned. The
problem is that not all the instruction handlers that need to return
data to userspace fill in the whole `insn->n` samples, so that there is
an information leak. There is a similar syzbot report for
`do_insnlist_ioctl()`, although it does not have a reproducer for it at
the time of writing.
One culprit is `insn_rw_emulate_bits()` which is used as the handler for
`INSN_READ` or `INSN_WRITE` instructions for subdevices that do not have
a specific handler for that instruction, but do have an `INSN_BITS`
handler. For `INSN_READ` it only fills in at most 1 sample, so if
`insn->n` is greater than 1, the remaining `insn->n - 1` samples copied
to userspace will be uninitialized kernel data.
Another culprit is `vm80xx_ai_insn_read()` in the "vm80xx" driver. It
never returns an error, even if it fails to fill the buffer.
Fix it in `do_insn_ioctl()` and `do_insnlist_ioctl()` by making sure
that uninitialized parts of the allocated buffer are zeroed before
handling each instruction.
Thanks to Arnaud Lecomte for their fix to `do_insn_ioctl()`. That fix
replaced the call to `kmalloc_array()` with `kcalloc()`, but it is not
always necessary to clear the whole buffer.
Fixes: ed9eccbe8970 ("Staging: add comedi core")
Reported-by: syzbot+a5e45f768aab5892da5d(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=a5e45f768aab5892da5d
Reported-by: syzbot+fb4362a104d45ab09cf9(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=fb4362a104d45ab09cf9
Cc: stable <stable(a)kernel.org> # 5.13+
Cc: Arnaud Lecomte <contact(a)arnaud-lcm.com>
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250725125324.80276-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[Andrey Troshin: backport fix from drivers/comedi/comedi_fops.c to drivers/staging/comedi/comedi_fops.c]
Signed-off-by: Andrey Troshin <drtrosh(a)yandex-team.ru>
---
Backport fix for CVE-2025-39684
Link: https://nvd.nist.gov/vuln/detail/CVE-2025-39684
---
drivers/staging/comedi/comedi_fops.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 854b8bdc57a1..0af6e4a2fad9 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1582,6 +1582,9 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
memset(&data[n], 0, (MIN_SAMPLES - n) *
sizeof(unsigned int));
}
+ } else {
+ memset(data, 0, max_t(unsigned int, n, MIN_SAMPLES) *
+ sizeof(unsigned int));
}
ret = parse_insn(dev, insns + i, data, file);
if (ret < 0)
@@ -1665,6 +1668,8 @@ static int do_insn_ioctl(struct comedi_device *dev,
memset(&data[insn->n], 0,
(MIN_SAMPLES - insn->n) * sizeof(unsigned int));
}
+ } else {
+ memset(data, 0, n_data * sizeof(unsigned int));
}
ret = parse_insn(dev, insn, data, file);
if (ret < 0)
--
2.34.1
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
When the driver supports DMA, it enqueues four DMA descriptors per
substream before the substream is started. New descriptors are enqueued in
the DMA completion callback, and each time a new descriptor is queued, the
dma_buffer_pos is incremented.
During suspend, the DMA transactions are terminated. There might be cases
where the four extra enqueued DMA descriptors are not completed and are
instead canceled on suspend. However, the cancel operation does not take
into account that the dma_buffer_pos was already incremented.
Previously, the suspend code reinitialized dma_buffer_pos to zero, but this
is not always correct.
To avoid losing any audio periods during suspend/resume and to prevent
clip sound, save the completed DMA buffer position in the DMA callback and
reinitialize dma_buffer_pos on resume.
Cc: stable(a)vger.kernel.org
Fixes: 1fc778f7c833a ("ASoC: renesas: rz-ssi: Add suspend to RAM support")
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
sound/soc/renesas/rz-ssi.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c
index e00940814157..81b883e8ac92 100644
--- a/sound/soc/renesas/rz-ssi.c
+++ b/sound/soc/renesas/rz-ssi.c
@@ -85,6 +85,7 @@ struct rz_ssi_stream {
struct snd_pcm_substream *substream;
int fifo_sample_size; /* sample capacity of SSI FIFO */
int dma_buffer_pos; /* The address for the next DMA descriptor */
+ int completed_dma_buf_pos; /* The address of the last completed DMA descriptor. */
int period_counter; /* for keeping track of periods transferred */
int sample_width;
int buffer_pos; /* current frame position in the buffer */
@@ -215,6 +216,7 @@ static void rz_ssi_stream_init(struct rz_ssi_stream *strm,
rz_ssi_set_substream(strm, substream);
strm->sample_width = samples_to_bytes(runtime, 1);
strm->dma_buffer_pos = 0;
+ strm->completed_dma_buf_pos = 0;
strm->period_counter = 0;
strm->buffer_pos = 0;
@@ -437,6 +439,10 @@ static void rz_ssi_pointer_update(struct rz_ssi_stream *strm, int frames)
snd_pcm_period_elapsed(strm->substream);
strm->period_counter = current_period;
}
+
+ strm->completed_dma_buf_pos += runtime->period_size;
+ if (strm->completed_dma_buf_pos >= runtime->buffer_size)
+ strm->completed_dma_buf_pos = 0;
}
static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm)
@@ -778,10 +784,14 @@ static int rz_ssi_dma_request(struct rz_ssi_priv *ssi, struct device *dev)
return -ENODEV;
}
-static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi)
+static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm)
{
+ struct snd_pcm_substream *substream = strm->substream;
+ struct snd_pcm_runtime *runtime = substream->runtime;
int ret;
+ strm->dma_buffer_pos = strm->completed_dma_buf_pos + runtime->period_size;
+
if (rz_ssi_is_stream_running(&ssi->playback) ||
rz_ssi_is_stream_running(&ssi->capture))
return 0;
@@ -794,16 +804,6 @@ static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi)
ssi->hw_params_cache.channels);
}
-static void rz_ssi_streams_suspend(struct rz_ssi_priv *ssi)
-{
- if (rz_ssi_is_stream_running(&ssi->playback) ||
- rz_ssi_is_stream_running(&ssi->capture))
- return;
-
- ssi->playback.dma_buffer_pos = 0;
- ssi->capture.dma_buffer_pos = 0;
-}
-
static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd,
struct snd_soc_dai *dai)
{
@@ -813,7 +813,7 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd,
switch (cmd) {
case SNDRV_PCM_TRIGGER_RESUME:
- ret = rz_ssi_trigger_resume(ssi);
+ ret = rz_ssi_trigger_resume(ssi, strm);
if (ret)
return ret;
@@ -852,7 +852,6 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd,
case SNDRV_PCM_TRIGGER_SUSPEND:
rz_ssi_stop(ssi, strm);
- rz_ssi_streams_suspend(ssi);
break;
case SNDRV_PCM_TRIGGER_STOP:
--
2.43.0
The future move of pin-init to `syn` uncovers the following broken
intra-doc link:
error: unresolved link to `crate::pin_init`
--> rust/kernel/sync/condvar.rs:39:40
|
39 | /// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros.
| ^^^^^^^^^^^^^^^^ no item named `pin_init` in module `kernel`
|
= note: `-D rustdoc::broken-intra-doc-links` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(rustdoc::broken_intra_doc_links)]`
Currently, when rendered, the link points to a literal `crate::pin_init!`
URL.
Thus fix it.
Cc: stable(a)vger.kernel.org
Fixes: 129e97be8e28 ("rust: pin-init: fix documentation links")
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
---
rust/kernel/sync/condvar.rs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs
index c6ec64295c9f..aa5b9a7a726d 100644
--- a/rust/kernel/sync/condvar.rs
+++ b/rust/kernel/sync/condvar.rs
@@ -36,7 +36,7 @@ macro_rules! new_condvar {
/// spuriously.
///
/// Instances of [`CondVar`] need a lock class and to be pinned. The recommended way to create such
-/// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros.
+/// instances is with the [`pin_init`](pin_init::pin_init!) and [`new_condvar`] macros.
///
/// # Examples
///
base-commit: dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa
--
2.51.0
[ Upstream commit 83b0177a6c48 ]
[ Patch for 6.1.y, 6.6.y, 6.12.y trees ]
To avoid racing with should_flush_tlb, setting loaded_mm to
LOADED_MM_SWITCHING must happen before reading tlb_gen.
This patch differs from the upstream fix since the ordering issue in
stable trees is different: here, the relevant code blocks are in the
wrong order due to a bad rebase earlier, so the fix is to reorder
them.
Signed-off-by: Stephen Dolan <sdolan(a)janestreet.com>
Acked-by: Dave Hansen <dave.hansen(a)intel.com>
Link: https://lore.kernel.org/lkml/CAHDw0oGd0B4=uuv8NGqbUQ_ZVmSheU2bN70e4QhFXWvuA…
---
arch/x86/mm/tlb.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8629d90fdcd9..ed182831063c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -606,6 +606,14 @@ void switch_mm_irqs_off(struct mm_struct *unused,
struct mm_struct *next,
*/
cond_mitigation(tsk);
+ /*
+ * Indicate that CR3 is about to change. nmi_uaccess_okay()
+ * and others are sensitive to the window where mm_cpumask(),
+ * CR3 and cpu_tlbstate.loaded_mm are not all in sync.
+ */
+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ barrier();
+
/*
* Stop remote flushes for the previous mm.
* Skip kernel threads; we never send init_mm TLB
flushing IPIs,
@@ -623,14 +631,6 @@ void switch_mm_irqs_off(struct mm_struct *unused,
struct mm_struct *next,
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
-
- /*
- * Indicate that CR3 is about to change. nmi_uaccess_okay()
- * and others are sensitive to the window where mm_cpumask(),
- * CR3 and cpu_tlbstate.loaded_mm are not all in sync.
- */
- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
- barrier();
}
new_lam = mm_lam_cr3_mask(next);
--
2.43.7
From: Edward Adam Davis <eadavis(a)qq.com>
commit 96cb948408b3adb69df7e451ba7da9d21f814d00 upstream.
The reproducer passed in an irq number(0x80008000) that was too large,
which triggered the oob.
Added an interrupt number check to prevent users from passing in an irq
number that was too large.
If `it->options[1]` is 31, then `1 << it->options[1]` is still invalid
because it shifts a 1-bit into the sign bit (which is UB in C).
Possible solutions include reducing the upper bound on the
`it->options[1]` value to 30 or lower, or using `1U << it->options[1]`.
The old code would just not attempt to request the IRQ if the
`options[1]` value were invalid. And it would still configure the
device without interrupts even if the call to `request_irq` returned an
error. So it would be better to combine this test with the test below.
Fixes: fff46207245c ("staging: comedi: pcl726: enable the interrupt support code")
Cc: stable <stable(a)kernel.org> # 5.13+
Reported-by: syzbot+5cd373521edd68bebcb3(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=5cd373521edd68bebcb3
Tested-by: syzbot+5cd373521edd68bebcb3(a)syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis(a)qq.com>
Reviewed-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/tencent_3C66983CC1369E962436264A50759176BF09@qq.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[Andrey Troshin: backport fix from drivers/comedi/drivers/pcl726.c to drivers/staging/comedi/drivers/pcl726.c]
Signed-off-by: Andrey Troshin <drtrosh(a)yandex-team.ru>
---
Backport fix for CVE-2025-39685
Link: https://nvd.nist.gov/vuln/detail/CVE-2025-39685
---
drivers/staging/comedi/drivers/pcl726.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/comedi/drivers/pcl726.c b/drivers/staging/comedi/drivers/pcl726.c
index 64eb649c9813..f26d7f07d749 100644
--- a/drivers/staging/comedi/drivers/pcl726.c
+++ b/drivers/staging/comedi/drivers/pcl726.c
@@ -327,7 +327,8 @@ static int pcl726_attach(struct comedi_device *dev,
* Hook up the external trigger source interrupt only if the
* user config option is valid and the board supports interrupts.
*/
- if (it->options[1] && (board->irq_mask & (1 << it->options[1]))) {
+ if (it->options[1] > 0 && it->options[1] < 16 &&
+ (board->irq_mask & (1U << it->options[1]))) {
ret = request_irq(it->options[1], pcl726_interrupt, 0,
dev->board_name, dev);
if (ret == 0) {
--
2.34.1
The future move of pin-init to `syn` uncovers the following private
intra-doc link:
error: public documentation for `Devres` links to private item `Self::inner`
--> rust/kernel/devres.rs:106:7
|
106 | /// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only.
| ^^^^^^^^^^^ this item is private
|
= note: this link will resolve properly if you pass `--document-private-items`
= note: `-D rustdoc::private-intra-doc-links` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(rustdoc::private_intra_doc_links)]`
Currently, when rendered, the link points to "nowhere" (an inexistent
anchor for a "method").
Thus fix it.
Cc: stable(a)vger.kernel.org
Fixes: f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc")
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
---
rust/kernel/devres.rs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
index 10a6a1789854..2392c281459e 100644
--- a/rust/kernel/devres.rs
+++ b/rust/kernel/devres.rs
@@ -103,7 +103,7 @@ struct Inner<T: Send> {
///
/// # Invariants
///
-/// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only.
+/// `Self::inner` is guaranteed to be initialized and is always accessed read-only.
#[pin_data(PinnedDrop)]
pub struct Devres<T: Send> {
dev: ARef<Device>,
base-commit: dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa
--
2.51.0
The padding field in the structure was previously reserved to
maintain a stable interface for potential new fields, ensuring
compatibility with user-space shared data structures.
However,it was accidentally removed by tiantao in a prior commit,
which may lead to incompatibility between user space and the kernel.
This patch reinstates the padding to restore the original structure
layout and preserve compatibility.
Fixes: 8ddde07a3d28 ("dma-mapping: benchmark: extract a common header file for map_benchmark definition")
Cc: stable(a)vger.kernel.org
Acked-by: Barry Song <baohua(a)kernel.org>
Signed-off-by: Qinxin Xia <xiaqinxin(a)huawei.com>
---
include/linux/map_benchmark.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h
index 62674c83bde4..48e2ff95332f 100644
--- a/include/linux/map_benchmark.h
+++ b/include/linux/map_benchmark.h
@@ -27,5 +27,6 @@ struct map_benchmark {
__u32 dma_dir; /* DMA data direction */
__u32 dma_trans_ns; /* time for DMA transmission in ns */
__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
+ __u8 expansion[76]; /* For future use */
};
#endif /* _KERNEL_DMA_BENCHMARK_H */
--
2.33.0
On s390 systems, which use a machine level hypervisor, PCI devices are
always accessed through a form of PCI pass-through which fundamentally
operates on a per PCI function granularity. This is also reflected in the
s390 PCI hotplug driver which creates hotplug slots for individual PCI
functions. Its reset_slot() function, which is a wrapper for
zpci_hot_reset_device(), thus also resets individual functions.
Currently, the kernel's PCI_SLOT() macro assigns the same pci_slot object
to multifunction devices. This approach worked fine on s390 systems that
only exposed virtual functions as individual PCI domains to the operating
system. Since commit 44510d6fa0c0 ("s390/pci: Handling multifunctions")
s390 supports exposing the topology of multifunction PCI devices by
grouping them in a shared PCI domain. When attempting to reset a function
through the hotplug driver, the shared slot assignment causes the wrong
function to be reset instead of the intended one. It also leaks memory as
we do create a pci_slot object for the function, but don't correctly free
it in pci_slot_release().
Add a flag for struct pci_slot to allow per function PCI slots for
functions managed through a hypervisor, which exposes individual PCI
functions while retaining the topology.
Fixes: 44510d6fa0c0 ("s390/pci: Handling multifunctions")
Cc: stable(a)vger.kernel.org
Suggested-by: Niklas Schnelle <schnelle(a)linux.ibm.com>
Signed-off-by: Farhan Ali <alifm(a)linux.ibm.com>
---
drivers/pci/pci.c | 5 +++--
drivers/pci/slot.c | 25 ++++++++++++++++++++++---
include/linux/pci.h | 1 +
3 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006c..36ee38e0d817 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4980,8 +4980,9 @@ static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, bool probe)
static int pci_dev_reset_slot_function(struct pci_dev *dev, bool probe)
{
- if (dev->multifunction || dev->subordinate || !dev->slot ||
- dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
+ if (dev->subordinate || !dev->slot ||
+ dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
+ (dev->multifunction && !dev->slot->per_func_slot))
return -ENOTTY;
return pci_reset_hotplug_slot(dev->slot->hotplug, probe);
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 50fb3eb595fe..ed10fa3ae727 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -63,6 +63,22 @@ static ssize_t cur_speed_read_file(struct pci_slot *slot, char *buf)
return bus_speed_read(slot->bus->cur_bus_speed, buf);
}
+static bool pci_dev_matches_slot(struct pci_dev *dev, struct pci_slot *slot)
+{
+ if (slot->per_func_slot)
+ return dev->devfn == slot->number;
+
+ return PCI_SLOT(dev->devfn) == slot->number;
+}
+
+static bool pci_slot_enabled_per_func(void)
+{
+ if (IS_ENABLED(CONFIG_S390))
+ return true;
+
+ return false;
+}
+
static void pci_slot_release(struct kobject *kobj)
{
struct pci_dev *dev;
@@ -73,7 +89,7 @@ static void pci_slot_release(struct kobject *kobj)
down_read(&pci_bus_sem);
list_for_each_entry(dev, &slot->bus->devices, bus_list)
- if (PCI_SLOT(dev->devfn) == slot->number)
+ if (pci_dev_matches_slot(dev, slot))
dev->slot = NULL;
up_read(&pci_bus_sem);
@@ -166,7 +182,7 @@ void pci_dev_assign_slot(struct pci_dev *dev)
mutex_lock(&pci_slot_mutex);
list_for_each_entry(slot, &dev->bus->slots, list)
- if (PCI_SLOT(dev->devfn) == slot->number)
+ if (pci_dev_matches_slot(dev, slot))
dev->slot = slot;
mutex_unlock(&pci_slot_mutex);
}
@@ -265,6 +281,9 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
slot->bus = pci_bus_get(parent);
slot->number = slot_nr;
+ if (pci_slot_enabled_per_func())
+ slot->per_func_slot = 1;
+
slot->kobj.kset = pci_slots_kset;
slot_name = make_slot_name(name);
@@ -285,7 +304,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
down_read(&pci_bus_sem);
list_for_each_entry(dev, &parent->devices, bus_list)
- if (PCI_SLOT(dev->devfn) == slot_nr)
+ if (pci_dev_matches_slot(dev, slot))
dev->slot = slot;
up_read(&pci_bus_sem);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d1fdf81fbe1e..6ad194597ab5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -78,6 +78,7 @@ struct pci_slot {
struct list_head list; /* Node in list of slots */
struct hotplug_slot *hotplug; /* Hotplug info (move here) */
unsigned char number; /* PCI_SLOT(pci_dev->devfn) */
+ unsigned int per_func_slot:1; /* Allow per function slot */
struct kobject kobj;
};
--
2.43.0
Commit 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.")
switches to check the alignment of the virtio_net_hdr_v1_hash_tunnel
even when doing the transmission even if the feature is not
negotiated. This will cause a series performance degradation of pktgen
as the skb->data can't satisfy the alignment requirement due to the
increase of the header size then virtio-net must prepare at least 2
sgs with indirect descriptors which will introduce overheads in the
device.
Fixing this by calculate the header alignment during probe so when
tunnel gso is not negotiated, we can less strict.
Pktgen in guest + XDP_DROP on TAP + vhost_net shows the TX PPS is
recovered from 2.4Mpps to 4.45Mpps.
Note that we still need a way to recover the performance when tunnel
gso is enabled, probably a new vnet header format.
Fixes: 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason Wang <jasowang(a)redhat.com>
---
drivers/net/virtio_net.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 31bd32bdecaf..5b851df749c0 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -441,6 +441,9 @@ struct virtnet_info {
/* Packet virtio header size */
u8 hdr_len;
+ /* header alignment */
+ size_t hdr_align;
+
/* Work struct for delayed refilling if we run low on memory. */
struct delayed_work refill;
@@ -3308,8 +3311,9 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
can_push = vi->any_header_sg &&
- !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
+ !((unsigned long)skb->data & (vi->hdr_align - 1)) &&
!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
+
/* Even if we can, don't push here yet as this would skew
* csum_start offset below. */
if (can_push)
@@ -6926,15 +6930,20 @@ static int virtnet_probe(struct virtio_device *vdev)
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) ||
- virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
+ virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) {
vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
- else if (vi->has_rss_hash_report)
+ vi->hdr_align = __alignof__(struct virtio_net_hdr_v1_hash_tunnel);
+ } else if (vi->has_rss_hash_report) {
vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
- else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
- virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+ vi->hdr_align = __alignof__(struct virtio_net_hdr_v1_hash);
+ } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
+ virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- else
+ vi->hdr_align = __alignof__(struct virtio_net_hdr_mrg_rxbuf);
+ } else {
vi->hdr_len = sizeof(struct virtio_net_hdr);
+ vi->hdr_align = __alignof__(struct virtio_net_hdr);
+ }
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM))
vi->rx_tnl_csum = true;
--
2.31.1
The function mtk_dp_dt_parse() calls of_graph_get_endpoint_by_regs()
to get the endpoint device node, but fails to call of_node_put() to release
the reference when the function returns. This results in a device node
reference leak.
Fix this by adding the missing of_node_put() call before returning from
the function.
Found via static analysis and code review.
Fixes: f70ac097a2cf ("drm/mediatek: Add MT8195 Embedded DisplayPort driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/gpu/drm/mediatek/mtk_dp.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c
index bef6eeb30d3e..b0b1e158600f 100644
--- a/drivers/gpu/drm/mediatek/mtk_dp.c
+++ b/drivers/gpu/drm/mediatek/mtk_dp.c
@@ -2087,6 +2087,7 @@ static int mtk_dp_dt_parse(struct mtk_dp *mtk_dp,
endpoint = of_graph_get_endpoint_by_regs(pdev->dev.of_node, 1, -1);
len = of_property_count_elems_of_size(endpoint,
"data-lanes", sizeof(u32));
+ of_node_put(endpoint);
if (len < 0 || len > 4 || len == 3) {
dev_err(dev, "invalid data lane size: %d\n", len);
return -EINVAL;
--
2.39.5 (Apple Git-154)
of_graph_get_endpoint_by_regs() gets a reference to the endpoint node
to read the "bus-width" property but fails to call of_node_put()
to release the reference, causing a reference count leak.
Add the missing of_node_put() call to fix this.
Found via static analysis and code review.
Fixes: d284ccd8588c ("drm/bridge: sii902x: Set input bus format based on bus-width")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/gpu/drm/bridge/sii902x.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c
index d537b1d036fb..3a247ac3c7dd 100644
--- a/drivers/gpu/drm/bridge/sii902x.c
+++ b/drivers/gpu/drm/bridge/sii902x.c
@@ -1189,8 +1189,10 @@ static int sii902x_probe(struct i2c_client *client)
sii902x->bus_width = 24;
endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1);
- if (endpoint)
+ if (endpoint) {
of_property_read_u32(endpoint, "bus-width", &sii902x->bus_width);
+ of_node_put(endpoint);
+ }
endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 1, -1);
if (endpoint) {
--
2.39.5 (Apple Git-154)
The function samsung_dsim_parse_dt() calls of_graph_get_endpoint_by_regs()
to get the endpoint device node, but fails to call of_node_put() to release
the reference when the function returns. This results in a device node
reference leak.
Fix this by adding the missing of_node_put() call before returning from
the function.
Found via static analysis and code review.
Fixes: 77169a11d4e9 ("drm/bridge: samsung-dsim: add driver support for exynos7870 DSIM bridge")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/gpu/drm/bridge/samsung-dsim.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index eabc4c32f6ab..1a5acd5077ad 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -2086,6 +2086,7 @@ static int samsung_dsim_parse_dt(struct samsung_dsim *dsi)
if (lane_polarities[1])
dsi->swap_dn_dp_data = true;
}
+ of_node_put(endpoint);
return 0;
}
--
2.39.5 (Apple Git-154)
The bus_find_device_by_name() function returns a device pointer with an
incremented reference count, but the original code was missing put_device()
calls in some return paths, leading to reference count leaks.
Fix this by ensuring put_device() is called before function exit after
bus_find_device_by_name() succeeds
This follows the same pattern used elsewhere in the kernel where
bus_find_device_by_name() is properly paired with put_device().
Found via static analysis and code review.
Fixes: 4f8ef33dd44a ("ASoC: soc_sdw_utils: skip the endpoint that doesn't present")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
sound/soc/sdw_utils/soc_sdw_utils.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c
index 270c66b90228..ea594f84f11a 100644
--- a/sound/soc/sdw_utils/soc_sdw_utils.c
+++ b/sound/soc/sdw_utils/soc_sdw_utils.c
@@ -1278,7 +1278,7 @@ static int is_sdca_endpoint_present(struct device *dev,
struct sdw_slave *slave;
struct device *sdw_dev;
const char *sdw_codec_name;
- int i;
+ int ret, i;
dlc = kzalloc(sizeof(*dlc), GFP_KERNEL);
if (!dlc)
@@ -1308,13 +1308,16 @@ static int is_sdca_endpoint_present(struct device *dev,
}
slave = dev_to_sdw_dev(sdw_dev);
- if (!slave)
- return -EINVAL;
+ if (!slave) {
+ ret = -EINVAL;
+ goto put_device;
+ }
/* Make sure BIOS provides SDCA properties */
if (!slave->sdca_data.interface_revision) {
dev_warn(&slave->dev, "SDCA properties not found in the BIOS\n");
- return 1;
+ ret = 1;
+ goto put_device;
}
for (i = 0; i < slave->sdca_data.num_functions; i++) {
@@ -1323,7 +1326,8 @@ static int is_sdca_endpoint_present(struct device *dev,
if (dai_type == dai_info->dai_type) {
dev_dbg(&slave->dev, "DAI type %d sdca function %s found\n",
dai_type, slave->sdca_data.function[i].name);
- return 1;
+ ret = 1;
+ goto put_device;
}
}
@@ -1331,7 +1335,11 @@ static int is_sdca_endpoint_present(struct device *dev,
"SDCA device function for DAI type %d not supported, skip endpoint\n",
dai_info->dai_type);
- return 0;
+ ret = 0;
+
+put_device:
+ put_device(sdw_dev);
+ return ret;
}
int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card,
--
2.39.5 (Apple Git-154)
If SMT is disabled or a partial SMT state is enabled, when a new kernel
image is loaded for kexec, on reboot the following warning is observed:
kexec: Waking offline cpu 228.
WARNING: CPU: 0 PID: 9062 at arch/powerpc/kexec/core_64.c:223 kexec_prepare_cpus+0x1b0/0x1bc
[snip]
NIP kexec_prepare_cpus+0x1b0/0x1bc
LR kexec_prepare_cpus+0x1a0/0x1bc
Call Trace:
kexec_prepare_cpus+0x1a0/0x1bc (unreliable)
default_machine_kexec+0x160/0x19c
machine_kexec+0x80/0x88
kernel_kexec+0xd0/0x118
__do_sys_reboot+0x210/0x2c4
system_call_exception+0x124/0x320
system_call_vectored_common+0x15c/0x2ec
This occurs as add_cpu() fails due to cpu_bootable() returning false for
CPUs that fail the cpu_smt_thread_allowed() check or non primary
threads if SMT is disabled.
Fix the issue by enabling SMT and resetting the number of SMT threads to
the number of threads per core, before attempting to wake up all present
CPUs.
Fixes: 38253464bc82 ("cpu/SMT: Create topology_smt_thread_allowed()")
Reported-by: Sachin P Bappalige <sachinpb(a)linux.ibm.com>
Cc: stable(a)vger.kernel.org # v6.6+
Signed-off-by: Nysal Jan K.A. <nysal(a)linux.ibm.com>
---
arch/powerpc/kexec/core_64.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 222aa326dace..ff6df43720c4 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -216,6 +216,11 @@ static void wake_offline_cpus(void)
{
int cpu = 0;
+ lock_device_hotplug();
+ cpu_smt_num_threads = threads_per_core;
+ cpu_smt_control = CPU_SMT_ENABLED;
+ unlock_device_hotplug();
+
for_each_present_cpu(cpu) {
if (!cpu_online(cpu)) {
printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
--
2.51.0
From: Hao Ge <gehao(a)kylinos.cn>
When alloc_slab_obj_exts() fails and then later succeeds in allocating
a slab extension vector, it calls handle_failed_objexts_alloc() to
mark all objects in the vector as empty. As a result all objects in
this slab (slabA) will have their extensions set to CODETAG_EMPTY.
Later on if this slabA is used to allocate a slabobj_ext vector for
another slab (slabB), we end up with the slabB->obj_exts pointing to a
slabobj_ext vector that itself has a non-NULL slabobj_ext equal to
CODETAG_EMPTY. When slabB gets freed, free_slab_obj_exts() is called
to free slabB->obj_exts vector. free_slab_obj_exts() calls
mark_objexts_empty(slabB->obj_exts) which will generate a warning
because it expects slabobj_ext vectors to have a NULL obj_ext, not
CODETAG_EMPTY.
Modify mark_objexts_empty() to skip the warning and setting the
obj_ext value if it's already set to CODETAG_EMPTY.
Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hao Ge <gehao(a)kylinos.cn>
---
v2: Update the commit message and code comments for greater accuracy,
incorporating Suren's suggestions.
Thanks for Suren's help.
---
mm/slub.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/slub.c b/mm/slub.c
index d4367f25b20d..589c596163c4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2046,7 +2046,11 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
if (slab_exts) {
unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
obj_exts_slab, obj_exts);
- /* codetag should be NULL */
+
+ if (unlikely(is_codetag_empty(&slab_exts[offs].ref)))
+ return;
+
+ /* codetag should be NULL here */
WARN_ON(slab_exts[offs].ref.ct);
set_codetag_empty(&slab_exts[offs].ref);
}
--
2.25.1
The patch titled
Subject: codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
codetag-debug-handle-existing-codetag_empty-in-mark_objexts_empty-for-slabobj_ext.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Hao Ge <gehao(a)kylinos.cn>
Subject: codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
Date: Wed, 29 Oct 2025 09:43:17 +0800
When alloc_slab_obj_exts() fails and then later succeeds in allocating a
slab extension vector, it calls handle_failed_objexts_alloc() to mark all
objects in the vector as empty. As a result all objects in this slab
(slabA) will have their extensions set to CODETAG_EMPTY.
Later on if this slabA is used to allocate a slabobj_ext vector for
another slab (slabB), we end up with the slabB->obj_exts pointing to a
slabobj_ext vector that itself has a non-NULL slabobj_ext equal to
CODETAG_EMPTY. When slabB gets freed, free_slab_obj_exts() is called to
free slabB->obj_exts vector.
free_slab_obj_exts() calls mark_objexts_empty(slabB->obj_exts) which will
generate a warning because it expects slabobj_ext vectors to have a NULL
obj_ext, not CODETAG_EMPTY.
Modify mark_objexts_empty() to skip the warning and setting the obj_ext
value if it's already set to CODETAG_EMPTY.
To quickly detect this WARN, I modified the code
from:WARN_ON(slab_exts[offs].ref.ct) to WARN_ON(slab_exts[offs].ref.ct
== 1);
We then obtained this message:
[21630.898561] ------------[ cut here ]------------
[21630.898596] kernel BUG at mm/slub.c:2050!
[21630.898611] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[21630.900372] Modules linked in: squashfs isofs vfio_iommu_type1
vhost_vsock vfio vhost_net vmw_vsock_virtio_transport_common vhost tap
vhost_iotlb iommufd vsock binfmt_misc nfsv3 nfs_acl nfs lockd grace
netfs tls rds dns_resolver tun brd overlay ntfs3 exfat btrfs
blake2b_generic xor xor_neon raid6_pq loop sctp ip6_udp_tunnel
udp_tunnel nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct
nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4
nf_tables rfkill ip_set sunrpc vfat fat joydev sg sch_fq_codel nfnetlink
virtio_gpu sr_mod cdrom drm_client_lib virtio_dma_buf drm_shmem_helper
drm_kms_helper drm ghash_ce backlight virtio_net virtio_blk virtio_scsi
net_failover virtio_console failover virtio_mmio dm_mirror
dm_region_hash dm_log dm_multipath dm_mod fuse i2c_dev virtio_pci
virtio_pci_legacy_dev virtio_pci_modern_dev virtio virtio_ring autofs4
aes_neon_bs aes_ce_blk [last unloaded: hwpoison_inject]
[21630.909177] CPU: 3 UID: 0 PID: 3787 Comm: kylin-process-m Kdump:
loaded Tainted: G�������������� W�������������������� 6.18.0-rc1+ #74 PREEMPT(voluntary)
[21630.910495] Tainted: [W]=WARN
[21630.910867] Hardware name: QEMU KVM Virtual Machine, BIOS unknown
2/2/2022
[21630.911625] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS
BTYPE=--)
[21630.912392] pc : __free_slab+0x228/0x250
[21630.912868] lr : __free_slab+0x18c/0x250[21630.913334] sp :
ffff8000a02f73e0
[21630.913830] x29: ffff8000a02f73e0 x28: fffffdffc43fc800 x27:
ffff0000c0011c40
[21630.914677] x26: ffff0000c000cac0 x25: ffff00010fe5e5f0 x24:
ffff000102199b40
[21630.915469] x23: 0000000000000003 x22: 0000000000000003 x21:
ffff0000c0011c40
[21630.916259] x20: fffffdffc4086600 x19: fffffdffc43fc800 x18:
0000000000000000
[21630.917048] x17: 0000000000000000 x16: 0000000000000000 x15:
0000000000000000
[21630.917837] x14: 0000000000000000 x13: 0000000000000000 x12:
ffff70001405ee66
[21630.918640] x11: 1ffff0001405ee65 x10: ffff70001405ee65 x9 :
ffff800080a295dc
[21630.919442] x8 : ffff8000a02f7330 x7 : 0000000000000000 x6 :
0000000000003000
[21630.920232] x5 : 0000000024924925 x4 : 0000000000000001 x3 :
0000000000000007
[21630.921021] x2 : 0000000000001b40 x1 : 000000000000001f x0 :
0000000000000001
[21630.921810] Call trace:
[21630.922130]�� __free_slab+0x228/0x250 (P)
[21630.922669]�� free_slab+0x38/0x118
[21630.923079]�� free_to_partial_list+0x1d4/0x340
[21630.923591]�� __slab_free+0x24c/0x348
[21630.924024]�� ___cache_free+0xf0/0x110
[21630.924468]�� qlist_free_all+0x78/0x130
[21630.924922]�� kasan_quarantine_reduce+0x114/0x148
[21630.925525]�� __kasan_slab_alloc+0x7c/0xb0
[21630.926006]�� kmem_cache_alloc_noprof+0x164/0x5c8
[21630.926699]�� __alloc_object+0x44/0x1f8
[21630.927153]�� __create_object+0x34/0xc8
[21630.927604]�� kmemleak_alloc+0xb8/0xd8
[21630.928052]�� kmem_cache_alloc_noprof+0x368/0x5c8
[21630.928606]�� getname_flags.part.0+0xa4/0x610
[21630.929112]�� getname_flags+0x80/0xd8
[21630.929557]�� vfs_fstatat+0xc8/0xe0
[21630.929975]�� __do_sys_newfstatat+0xa0/0x100
[21630.930469]�� __arm64_sys_newfstatat+0x90/0xd8
[21630.931046]�� invoke_syscall+0xd4/0x258
[21630.931685]�� el0_svc_common.constprop.0+0xb4/0x240
[21630.932467]�� do_el0_svc+0x48/0x68
[21630.932972]�� el0_svc+0x40/0xe0
[21630.933472]�� el0t_64_sync_handler+0xa0/0xe8
[21630.934151]�� el0t_64_sync+0x1ac/0x1b0
[21630.934923] Code: aa1803e0 97ffef2b a9446bf9 17ffff9c (d4210000)
[21630.936461] SMP: stopping secondary CPUs
[21630.939550] Starting crashdump kernel...
[21630.940108] Bye!
Link: https://lkml.kernel.org/r/20251029014317.1533488-1-hao.ge@linux.dev
Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations")
Signed-off-by: Hao Ge <gehao(a)kylinos.cn>
Cc: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: gehao <gehao(a)kylinos.cn>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slub.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/mm/slub.c~codetag-debug-handle-existing-codetag_empty-in-mark_objexts_empty-for-slabobj_ext
+++ a/mm/slub.c
@@ -2046,7 +2046,11 @@ static inline void mark_objexts_empty(st
if (slab_exts) {
unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
obj_exts_slab, obj_exts);
- /* codetag should be NULL */
+
+ if (unlikely(is_codetag_empty(&slab_exts[offs].ref)))
+ return;
+
+ /* codetag should be NULL here */
WARN_ON(slab_exts[offs].ref.ct);
set_codetag_empty(&slab_exts[offs].ref);
}
_
Patches currently in -mm which might be from gehao(a)kylinos.cn are
codetag-debug-handle-existing-codetag_empty-in-mark_objexts_empty-for-slabobj_ext.patch
--
Hi,
PERDIS SUPER U is a leading retail group in France with numerous
outlets across the country. After reviewing your company profile and
products, we’re very interested in establishing a long-term partnership.
Kindly share your product catalog or website so we can review your
offerings and pricing. We are ready to place orders and begin
cooperation.Please note: Our payment terms are SWIFT, 14 days after
delivery.
Looking forward to your response.
Best regards,
Dominique Schelcher
Director, PERDIS SUPER U
RUE DE SAVOIE, 45600 SAINT-PÈRE-SUR-LOIRE
VAT: FR65380071464
www.magasins-u.com
The patch titled
Subject: mm/mremap: honour writable bit in mremap pte batching
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-mremap-honour-writable-bit-in-mremap-pte-batching.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Dev Jain <dev.jain(a)arm.com>
Subject: mm/mremap: honour writable bit in mremap pte batching
Date: Tue, 28 Oct 2025 12:09:52 +0530
Currently mremap folio pte batch ignores the writable bit during figuring
out a set of similar ptes mapping the same folio. Suppose that the first
pte of the batch is writable while the others are not - set_ptes will end
up setting the writable bit on the other ptes, which is a violation of
mremap semantics. Therefore, use FPB_RESPECT_WRITE to check the writable
bit while determining the pte batch.
Link: https://lkml.kernel.org/r/20251028063952.90313-1-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain(a)arm.com>
Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching")
Reported-by: David Hildenbrand <david(a)redhat.com>
Debugged-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Pedro Falcato <pfalcato(a)suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Jann Horn <jannh(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [6.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mremap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/mremap.c~mm-mremap-honour-writable-bit-in-mremap-pte-batching
+++ a/mm/mremap.c
@@ -187,7 +187,7 @@ static int mremap_folio_pte_batch(struct
if (!folio || !folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, ptep, pte, max_nr);
+ return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE);
}
static int move_ptes(struct pagetable_move_control *pmc,
_
Patches currently in -mm which might be from dev.jain(a)arm.com are
mm-mremap-honour-writable-bit-in-mremap-pte-batching.patch
The patch titled
Subject: gcov: add support for GCC 15
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
gcov-add-support-for-gcc-15.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Oberparleiter <oberpar(a)linux.ibm.com>
Subject: gcov: add support for GCC 15
Date: Tue, 28 Oct 2025 12:51:25 +0100
Using gcov on kernels compiled with GCC 15 results in truncated 16-byte
long .gcda files with no usable data. To fix this, update GCOV_COUNTERS
to match the value defined by GCC 15.
Tested with GCC 14.3.0 and GCC 15.2.0.
Link: https://lkml.kernel.org/r/20251028115125.1319410-1-oberpar@linux.ibm.com
Signed-off-by: Peter Oberparleiter <oberpar(a)linux.ibm.com>
Reported-by: Matthieu Baerts <matttbe(a)kernel.org>
Closes: https://github.com/linux-test-project/lcov/issues/445
Tested-by: Matthieu Baerts <matttbe(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/gcov/gcc_4_7.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/kernel/gcov/gcc_4_7.c~gcov-add-support-for-gcc-15
+++ a/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/mm.h>
#include "gcov.h"
-#if (__GNUC__ >= 14)
+#if (__GNUC__ >= 15)
+#define GCOV_COUNTERS 10
+#elif (__GNUC__ >= 14)
#define GCOV_COUNTERS 9
#elif (__GNUC__ >= 10)
#define GCOV_COUNTERS 8
_
Patches currently in -mm which might be from oberpar(a)linux.ibm.com are
gcov-add-support-for-gcc-15.patch
The patch titled
Subject: s390: fix HugeTLB vmemmap optimization crash
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
s390-fix-hugetlb-vmemmap-optimization-crash.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Luiz Capitulino <luizcap(a)redhat.com>
Subject: s390: fix HugeTLB vmemmap optimization crash
Date: Tue, 28 Oct 2025 17:15:33 -0400
A reproducible crash occurs when enabling HugeTLB vmemmap optimization
(HVO) on s390. The crash and the proposed fix were worked on an s390 KVM
guest running on an older hypervisor, as I don't have access to an LPAR.
However, the same issue should occur on bare-metal.
Reproducer (it may take a few runs to trigger):
# sysctl vm.hugetlb_optimize_vmemmap=1
# echo 1 > /proc/sys/vm/nr_hugepages
# echo 0 > /proc/sys/vm/nr_hugepages
Crash log:
[ 52.340369] list_del corruption. prev->next should be 000000d382110008, but was 000000d7116d3880. (prev=000000d7116d3910)
[ 52.340420] ------------[ cut here ]------------
[ 52.340424] kernel BUG at lib/list_debug.c:62!
[ 52.340566] monitor event: 0040 ilc:2 [#1]SMP
[ 52.340573] Modules linked in: ctcm fsm qeth ccwgroup zfcp scsi_transport_fc qdio dasd_fba_mod dasd_eckd_mod dasd_mod xfs ghash_s390 prng des_s390 libdes sha3_512_s390 sha3_256_s390 virtio_net virtio_blk net_failover sha_common failover dm_mirror dm_region_hash dm_log dm_mod paes_s390 crypto_engine pkey_cca pkey_ep11 zcrypt pkey_pckmo pkey aes_s390
[ 52.340606] CPU: 1 UID: 0 PID: 1672 Comm: root-rep2 Kdump: loaded Not tainted 6.18.0-rc3 #1 NONE
[ 52.340610] Hardware name: IBM 3931 LA1 400 (KVM/Linux)
[ 52.340611] Krnl PSW : 0704c00180000000 0000015710cda7fe (__list_del_entry_valid_or_report+0xfe/0x128)
[ 52.340619] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
[ 52.340622] Krnl GPRS: c0000000ffffefff 0000000100000027 000000000000006d 0000000000000000
[ 52.340623] 000000d7116d35d8 000000d7116d35d0 0000000000000002 000000d7116d39b0
[ 52.340625] 000000d7116d3880 000000d7116d3910 000000d7116d3910 000000d382110008
[ 52.340626] 000003ffac1ccd08 000000d7116d39b0 0000015710cda7fa 000000d7116d37d0
[ 52.340632] Krnl Code: 0000015710cda7ee: c020003e496f larl %r2,00000157114a3acc
0000015710cda7f4: c0e5ffd5280e brasl %r14,000001571077f810
#0000015710cda7fa: af000000 mc 0,0
>0000015710cda7fe: b9040029 lgr %r2,%r9
0000015710cda802: c0e5ffe5e193 brasl %r14,0000015710996b28
0000015710cda808: e34090080004 lg %r4,8(%r9)
0000015710cda80e: b9040059 lgr %r5,%r9
0000015710cda812: b9040038 lgr %r3,%r8
[ 52.340643] Call Trace:
[ 52.340645] [<0000015710cda7fe>] __list_del_entry_valid_or_report+0xfe/0x128
[ 52.340649] ([<0000015710cda7fa>] __list_del_entry_valid_or_report+0xfa/0x128)
[ 52.340652] [<0000015710a30b2e>] hugetlb_vmemmap_restore_folios+0x96/0x138
[ 52.340655] [<0000015710a268ac>] update_and_free_pages_bulk+0x64/0x150
[ 52.340659] [<0000015710a26f8a>] set_max_huge_pages+0x4ca/0x6f0
[ 52.340662] [<0000015710a273ba>] hugetlb_sysctl_handler_common+0xea/0x120
[ 52.340665] [<0000015710a27484>] hugetlb_sysctl_handler+0x44/0x50
[ 52.340667] [<0000015710b53ffa>] proc_sys_call_handler+0x17a/0x280
[ 52.340672] [<0000015710a90968>] vfs_write+0x2c8/0x3a0
[ 52.340676] [<0000015710a90bd2>] ksys_write+0x72/0x100
[ 52.340679] [<00000157111483a8>] __do_syscall+0x150/0x318
[ 52.340682] [<0000015711153a5e>] system_call+0x6e/0x90
[ 52.340684] Last Breaking-Event-Address:
[ 52.340684] [<000001571077f85c>] _printk+0x4c/0x58
[ 52.340690] Kernel panic - not syncing: Fatal exception: panic_on_oops
This issue was introduced by commit f13b83fdd996 ("hugetlb: batch TLB
flushes when freeing vmemmap"). Before that change, the HVO
implementation called flush_tlb_kernel_range() each time a vmemmap PMD
split and remapping was performed. The mentioned commit changed this to
issue a few flush_tlb_all() calls after performing all remappings.
However, on s390, flush_tlb_kernel_range() expands to __tlb_flush_kernel()
while flush_tlb_all() is not implemented. As a result, we went from
flushing the TLB for every remapping to no flushing at all.
This commit fixes this by implementing flush_tlb_all() on s390 as an alias
to __tlb_flush_global(). This should cause a flush on all TLB entries on
all CPUs as expected by the flush_tlb_all() semantics.
Link: https://lkml.kernel.org/r/20251028211533.47694-1-luizcap@redhat.com
Fixes: f13b83fdd996 ("hugetlb: batch TLB flushes when freeing vmemmap")
Signed-off-by: Luiz Capitulino <luizcap(a)redhat.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)kernel.org>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/s390/include/asm/tlbflush.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/arch/s390/include/asm/tlbflush.h~s390-fix-hugetlb-vmemmap-optimization-crash
+++ a/arch/s390/include/asm/tlbflush.h
@@ -103,9 +103,13 @@ static inline void __tlb_flush_mm_lazy(s
* flush_tlb_range functions need to do the flush.
*/
#define flush_tlb() do { } while (0)
-#define flush_tlb_all() do { } while (0)
#define flush_tlb_page(vma, addr) do { } while (0)
+static inline void flush_tlb_all(void)
+{
+ __tlb_flush_global();
+}
+
static inline void flush_tlb_mm(struct mm_struct *mm)
{
__tlb_flush_mm_lazy(mm);
_
Patches currently in -mm which might be from luizcap(a)redhat.com are
s390-fix-hugetlb-vmemmap-optimization-crash.patch
The patch titled
Subject: codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
has been added to the -mm mm-new branch. Its filename is
codetag-debug-handle-existing-codetag_empty-in-mark_objexts_empty-for-slabobj_ext.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Hao Ge <gehao(a)kylinos.cn>
Subject: codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
Date: Mon, 27 Oct 2025 16:52:14 +0800
Even though obj_exts was created with the __GFP_NO_OBJ_EXT flag, objects
in the same slab may have their extensions allocated via
alloc_slab_obj_exts, and handle_failed_objexts_alloc may be called within
alloc_slab_obj_exts to set their codetag to CODETAG_EMPTY.
Therefore, both NULL and CODETAG_EMPTY are valid for the codetag of
slabobj_ext, as we do not need to re-set it to CODETAG_EMPTY if it is
already CODETAG_EMPTY. It also resolves the warning triggered when the
codetag is CODETAG_EMPTY during slab freeing.
This issue also occurred during our memory stress testing.
The possibility of its occurrence should be as follows:
When a slab allocates a slabobj_ext, the slab to which this slabobj_ext
belongs may have already allocated its own slabobj_ext
and called handle_failed_objexts_alloc. That is to say, the codetag of
this slabobj_ext has been set to CODETAG_EMPTY.
To quickly detect this WARN, I modified the code
from:WARN_ON(slab_exts[offs].ref.ct) to WARN_ON(slab_exts[offs].ref.ct
== 1);
We then obtained this message:
[21630.898561] ------------[ cut here ]------------
[21630.898596] kernel BUG at mm/slub.c:2050!
[21630.898611] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[21630.900372] Modules linked in: squashfs isofs vfio_iommu_type1
vhost_vsock vfio vhost_net vmw_vsock_virtio_transport_common vhost tap
vhost_iotlb iommufd vsock binfmt_misc nfsv3 nfs_acl nfs lockd grace
netfs tls rds dns_resolver tun brd overlay ntfs3 exfat btrfs
blake2b_generic xor xor_neon raid6_pq loop sctp ip6_udp_tunnel
udp_tunnel nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct
nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4
nf_tables rfkill ip_set sunrpc vfat fat joydev sg sch_fq_codel nfnetlink
virtio_gpu sr_mod cdrom drm_client_lib virtio_dma_buf drm_shmem_helper
drm_kms_helper drm ghash_ce backlight virtio_net virtio_blk virtio_scsi
net_failover virtio_console failover virtio_mmio dm_mirror
dm_region_hash dm_log dm_multipath dm_mod fuse i2c_dev virtio_pci
virtio_pci_legacy_dev virtio_pci_modern_dev virtio virtio_ring autofs4
aes_neon_bs aes_ce_blk [last unloaded: hwpoison_inject]
[21630.909177] CPU: 3 UID: 0 PID: 3787 Comm: kylin-process-m Kdump:
loaded Tainted: G�������������� W�������������������� 6.18.0-rc1+ #74 PREEMPT(voluntary)
[21630.910495] Tainted: [W]=WARN
[21630.910867] Hardware name: QEMU KVM Virtual Machine, BIOS unknown
2/2/2022
[21630.911625] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS
BTYPE=--)
[21630.912392] pc : __free_slab+0x228/0x250
[21630.912868] lr : __free_slab+0x18c/0x250[21630.913334] sp :
ffff8000a02f73e0
[21630.913830] x29: ffff8000a02f73e0 x28: fffffdffc43fc800 x27:
ffff0000c0011c40
[21630.914677] x26: ffff0000c000cac0 x25: ffff00010fe5e5f0 x24:
ffff000102199b40
[21630.915469] x23: 0000000000000003 x22: 0000000000000003 x21:
ffff0000c0011c40
[21630.916259] x20: fffffdffc4086600 x19: fffffdffc43fc800 x18:
0000000000000000
[21630.917048] x17: 0000000000000000 x16: 0000000000000000 x15:
0000000000000000
[21630.917837] x14: 0000000000000000 x13: 0000000000000000 x12:
ffff70001405ee66
[21630.918640] x11: 1ffff0001405ee65 x10: ffff70001405ee65 x9 :
ffff800080a295dc
[21630.919442] x8 : ffff8000a02f7330 x7 : 0000000000000000 x6 :
0000000000003000
[21630.920232] x5 : 0000000024924925 x4 : 0000000000000001 x3 :
0000000000000007
[21630.921021] x2 : 0000000000001b40 x1 : 000000000000001f x0 :
0000000000000001
[21630.921810] Call trace:
[21630.922130]�� __free_slab+0x228/0x250 (P)
[21630.922669]�� free_slab+0x38/0x118
[21630.923079]�� free_to_partial_list+0x1d4/0x340
[21630.923591]�� __slab_free+0x24c/0x348
[21630.924024]�� ___cache_free+0xf0/0x110
[21630.924468]�� qlist_free_all+0x78/0x130
[21630.924922]�� kasan_quarantine_reduce+0x114/0x148
[21630.925525]�� __kasan_slab_alloc+0x7c/0xb0
[21630.926006]�� kmem_cache_alloc_noprof+0x164/0x5c8
[21630.926699]�� __alloc_object+0x44/0x1f8
[21630.927153]�� __create_object+0x34/0xc8
[21630.927604]�� kmemleak_alloc+0xb8/0xd8
[21630.928052]�� kmem_cache_alloc_noprof+0x368/0x5c8
[21630.928606]�� getname_flags.part.0+0xa4/0x610
[21630.929112]�� getname_flags+0x80/0xd8
[21630.929557]�� vfs_fstatat+0xc8/0xe0
[21630.929975]�� __do_sys_newfstatat+0xa0/0x100
[21630.930469]�� __arm64_sys_newfstatat+0x90/0xd8
[21630.931046]�� invoke_syscall+0xd4/0x258
[21630.931685]�� el0_svc_common.constprop.0+0xb4/0x240
[21630.932467]�� do_el0_svc+0x48/0x68
[21630.932972]�� el0_svc+0x40/0xe0
[21630.933472]�� el0t_64_sync_handler+0xa0/0xe8
[21630.934151]�� el0t_64_sync+0x1ac/0x1b0
[21630.934923] Code: aa1803e0 97ffef2b a9446bf9 17ffff9c (d4210000)
[21630.936461] SMP: stopping secondary CPUs
[21630.939550] Starting crashdump kernel...
[21630.940108] Bye!
Link: https://lkml.kernel.org/r/20251027085214.184672-1-hao.ge@linux.dev
Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations")
Signed-off-by: Hao Ge <gehao(a)kylinos.cn>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slub.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
--- a/mm/slub.c~codetag-debug-handle-existing-codetag_empty-in-mark_objexts_empty-for-slabobj_ext
+++ a/mm/slub.c
@@ -2046,7 +2046,17 @@ static inline void mark_objexts_empty(st
if (slab_exts) {
unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
obj_exts_slab, obj_exts);
- /* codetag should be NULL */
+
+ /*
+ * codetag should be either NULL or CODETAG_EMPTY.
+ * When the same slab calls handle_failed_objexts_alloc,
+ * it will set us to CODETAG_EMPTY.
+ *
+ * If codetag is already CODETAG_EMPTY, no action is needed here.
+ */
+ if (unlikely(is_codetag_empty(&slab_exts[offs].ref)))
+ return;
+
WARN_ON(slab_exts[offs].ref.ct);
set_codetag_empty(&slab_exts[offs].ref);
}
_
Patches currently in -mm which might be from gehao(a)kylinos.cn are
codetag-debug-handle-existing-codetag_empty-in-mark_objexts_empty-for-slabobj_ext.patch
The patch titled
Subject: mm/mm_init: fix hash table order logging in alloc_large_system_hash()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-mm_init-fix-hash-table-order-logging-in-alloc_large_system_hash.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Subject: mm/mm_init: fix hash table order logging in alloc_large_system_hash()
Date: Tue, 28 Oct 2025 12:10:12 -0700
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size. This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mm_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/mm_init.c~mm-mm_init-fix-hash-table-order-logging-in-alloc_large_system_hash
+++ a/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(con
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
_
Patches currently in -mm which might be from isaacmanjarres(a)google.com are
mm-mm_init-fix-hash-table-order-logging-in-alloc_large_system_hash.patch
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102058-citadel-crinkly-125f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger(a)amd.com>
Date: Fri, 10 Oct 2025 12:08:35 -0500
Subject: [PATCH] x86/resctrl: Fix miscount of bandwidth event when
reactivating previously unavailable RMID
Users can create as many monitoring groups as the number of RMIDs supported
by the hardware. However, on AMD systems, only a limited number of RMIDs
are guaranteed to be actively tracked by the hardware. RMIDs that exceed
this limit are placed in an "Unavailable" state.
When a bandwidth counter is read for such an RMID, the hardware sets
MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked
again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable
remains set on first read after tracking re-starts and is clear on all
subsequent reads as long as the RMID is tracked.
resctrl miscounts the bandwidth events after an RMID transitions from the
"Unavailable" state back to being tracked. This happens because when the
hardware starts counting again after resetting the counter to zero, resctrl
in turn compares the new count against the counter value stored from the
previous time the RMID was tracked.
This results in resctrl computing an event value that is either undercounting
(when new counter is more than stored counter) or a mistaken overflow (when
new counter is less than stored counter).
Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to
zero whenever the RMID is in the "Unavailable" state to ensure accurate
counting after the RMID resets to zero when it starts to be tracked again.
Example scenario that results in mistaken overflow
==================================================
1. The resctrl filesystem is mounted, and a task is assigned to a
monitoring group.
$mount -t resctrl resctrl /sys/fs/resctrl
$mkdir /sys/fs/resctrl/mon_groups/test1/
$echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
21323 <- Total bytes on domain 0
"Unavailable" <- Total bytes on domain 1
Task is running on domain 0. Counter on domain 1 is "Unavailable".
2. The task runs on domain 0 for a while and then moves to domain 1. The
counter starts incrementing on domain 1.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
7345357 <- Total bytes on domain 0
4545 <- Total bytes on domain 1
3. At some point, the RMID in domain 0 transitions to the "Unavailable"
state because the task is no longer executing in that domain.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
"Unavailable" <- Total bytes on domain 0
434341 <- Total bytes on domain 1
4. Since the task continues to migrate between domains, it may eventually
return to domain 0.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
17592178699059 <- Overflow on domain 0
3232332 <- Total bytes on domain 1
In this case, the RMID on domain 0 transitions from "Unavailable" state to
active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when
the counter is read and begins tracking the RMID counting from 0.
Subsequent reads succeed but return a value smaller than the previously
saved MSR value (7345357). Consequently, the resctrl's overflow logic is
triggered, it compares the previous value (7345357) with the new, smaller
value and incorrectly interprets this as a counter overflow, adding a large
delta.
In reality, this is a false positive: the counter did not overflow but was
simply reset when the RMID transitioned from "Unavailable" back to active
state.
Here is the text from APM [1] available from [2].
"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the
first QM_CTR read when it begins tracking an RMID that it was not
previously tracking. The U bit will be zero for all subsequent reads from
that RMID while it is still tracked by the hardware. Therefore, a QM_CTR
read with the U bit set when that RMID is in use by a processor can be
considered 0 when calculating the difference with a subsequent read."
[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory
Bandwidth (MBM).
[ bp: Split commit message into smaller paragraph chunks for better
consumption. ]
Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature")
Signed-off-by: Babu Moger <babu.moger(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Tested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Cc: stable(a)vger.kernel.org # needs adjustments for <= v6.17
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c8945610d455..2cd25a0d4637 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
int cpu = cpumask_any(&d->hdr.cpu_mask);
+ struct arch_mbm_state *am;
u64 msr_val;
u32 prmid;
int ret;
@@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
prmid = logical_rmid_to_physical_rmid(cpu, rmid);
ret = __rmid_read_phys(prmid, eventid, &msr_val);
- if (ret)
- return ret;
- *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ if (!ret) {
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ } else if (ret == -EINVAL) {
+ am = get_arch_mbm_state(hw_dom, rmid, eventid);
+ if (am)
+ am->prev_msr = 0;
+ }
- return 0;
+ return ret;
}
static int __cntr_id_read(u32 cntr_id, u64 *val)
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 388eff894d6bc5f921e9bfff0e4b0ab2684a96e9
Gitweb: https://git.kernel.org/tip/388eff894d6bc5f921e9bfff0e4b0ab2684a96e9
Author: Chang S. Bae <chang.seok.bae(a)intel.com>
AuthorDate: Mon, 09 Jun 2025 17:16:59 -07:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Tue, 28 Oct 2025 12:10:59 -07:00
x86/fpu: Ensure XFD state on signal delivery
Sean reported [1] the following splat when running KVM tests:
WARNING: CPU: 232 PID: 15391 at xfd_validate_state+0x65/0x70
Call Trace:
<TASK>
fpu__clear_user_states+0x9c/0x100
arch_do_signal_or_restart+0x142/0x210
exit_to_user_mode_loop+0x55/0x100
do_syscall_64+0x205/0x2c0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
Chao further identified [2] a reproducible scenario involving signal
delivery: a non-AMX task is preempted by an AMX-enabled task which
modifies the XFD MSR.
When the non-AMX task resumes and reloads XSTATE with init values,
a warning is triggered due to a mismatch between fpstate::xfd and the
CPU's current XFD state. fpu__clear_user_states() does not currently
re-synchronize the XFD state after such preemption.
Invoke xfd_update_state() which detects and corrects the mismatch if
there is a dynamic feature.
This also benefits the sigreturn path, as fpu__restore_sig() may call
fpu__clear_user_states() when the sigframe is inaccessible.
[ dhansen: minor changelog munging ]
Closes: https://lore.kernel.org/lkml/aDCo_SczQOUaB2rS@google.com [1]
Fixes: 672365477ae8a ("x86/fpu: Update XFD state where required")
Reported-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Chang S. Bae <chang.seok.bae(a)intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Chao Gao <chao.gao(a)intel.com>
Tested-by: Chao Gao <chao.gao(a)intel.com>
Link: https://lore.kernel.org/all/aDWbctO%2FRfTGiCg3@intel.com [2]
Cc:stable@vger.kernel.org
Link: https://patch.msgid.link/20250610001700.4097-1-chang.seok.bae%40intel.com
---
arch/x86/kernel/fpu/core.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 1f71cc1..e88eacb 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -825,6 +825,9 @@ void fpu__clear_user_states(struct fpu *fpu)
!fpregs_state_valid(fpu, smp_processor_id()))
os_xrstor_supervisor(fpu->fpstate);
+ /* Ensure XFD state is in sync before reloading XSTATE */
+ xfd_update_state(fpu->fpstate);
+
/* Reset user states in registers. */
restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
The L1 substates support requires additional steps to work, namely:
-Proper handling of the CLKREQ# sideband signal. (It is mostly handled by
hardware, but software still needs to set the clkreq fields in the
PCIE_CLIENT_POWER_CON register to match the hardware implementation.)
-Program the frequency of the aux clock into the
DSP_PCIE_PL_AUX_CLK_FREQ_OFF register. (During L1 substates the core_clk
is turned off and the aux_clk is used instead.)
These steps are currently missing from the driver.
For more details, see section '18.6.6.4 L1 Substate' in the RK3658 TRM 1.1
Part 2, or section '11.6.6.4 L1 Substate' in the RK3588 TRM 1.0 Part2.
While this has always been a problem when using e.g.
CONFIG_PCIEASPM_POWER_SUPERSAVE=y, or when modifying
/sys/bus/pci/devices/.../link/l1_2_aspm, the lacking driver support for L1
substates became more apparent after commit f3ac2ff14834 ("PCI/ASPM:
Enable all ClockPM and ASPM states for devicetree platforms"), which
enabled ASPM also for CONFIG_PCIEASPM_DEFAULT=y.
When using e.g. an NVMe drive connected to the PCIe controller, the
problem will be seen as:
nvme nvme0: controller is down; will reset: CSTS=0xffffffff, PCI_STATUS=0x10
nvme nvme0: Does your device have a faulty power saving mode enabled?
nvme nvme0: Try "nvme_core.default_ps_max_latency_us=0 pcie_aspm=off pcie_port_pm=off" and report a bug
Thus, prevent advertising L1 Substates support until proper driver support
is added.
Cc: stable(a)vger.kernel.org
Fixes: 0e898eb8df4e ("PCI: rockchip-dwc: Add Rockchip RK356X host controller driver")
Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms")
Acked-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
Changes since v2:
-Improve commit message (Bjorn)
drivers/pci/controller/dwc/pcie-dw-rockchip.c | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index 3e2752c7dd09..84f882abbca5 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -200,6 +200,25 @@ static bool rockchip_pcie_link_up(struct dw_pcie *pci)
return FIELD_GET(PCIE_LINKUP_MASK, val) == PCIE_LINKUP;
}
+/*
+ * See e.g. section '11.6.6.4 L1 Substate' in the RK3588 TRM V1.0 for the steps
+ * needed to support L1 substates. Currently, not a single rockchip platform
+ * performs these steps, so disable L1 substates until there is proper support.
+ */
+static void rockchip_pcie_disable_l1sub(struct dw_pcie *pci)
+{
+ u32 cap, l1subcap;
+
+ cap = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS);
+ if (cap) {
+ l1subcap = dw_pcie_readl_dbi(pci, cap + PCI_L1SS_CAP);
+ l1subcap &= ~(PCI_L1SS_CAP_L1_PM_SS | PCI_L1SS_CAP_ASPM_L1_1 |
+ PCI_L1SS_CAP_ASPM_L1_2 | PCI_L1SS_CAP_PCIPM_L1_1 |
+ PCI_L1SS_CAP_PCIPM_L1_2);
+ dw_pcie_writel_dbi(pci, cap + PCI_L1SS_CAP, l1subcap);
+ }
+}
+
static void rockchip_pcie_enable_l0s(struct dw_pcie *pci)
{
u32 cap, lnkcap;
@@ -264,6 +283,7 @@ static int rockchip_pcie_host_init(struct dw_pcie_rp *pp)
irq_set_chained_handler_and_data(irq, rockchip_pcie_intx_handler,
rockchip);
+ rockchip_pcie_disable_l1sub(pci);
rockchip_pcie_enable_l0s(pci);
return 0;
@@ -301,6 +321,7 @@ static void rockchip_pcie_ep_init(struct dw_pcie_ep *ep)
struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
enum pci_barno bar;
+ rockchip_pcie_disable_l1sub(pci);
rockchip_pcie_enable_l0s(pci);
rockchip_pcie_ep_hide_broken_ats_cap_rk3588(ep);
--
2.51.0
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102056-qualify-unopposed-be08@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger(a)amd.com>
Date: Fri, 10 Oct 2025 12:08:35 -0500
Subject: [PATCH] x86/resctrl: Fix miscount of bandwidth event when
reactivating previously unavailable RMID
Users can create as many monitoring groups as the number of RMIDs supported
by the hardware. However, on AMD systems, only a limited number of RMIDs
are guaranteed to be actively tracked by the hardware. RMIDs that exceed
this limit are placed in an "Unavailable" state.
When a bandwidth counter is read for such an RMID, the hardware sets
MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked
again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable
remains set on first read after tracking re-starts and is clear on all
subsequent reads as long as the RMID is tracked.
resctrl miscounts the bandwidth events after an RMID transitions from the
"Unavailable" state back to being tracked. This happens because when the
hardware starts counting again after resetting the counter to zero, resctrl
in turn compares the new count against the counter value stored from the
previous time the RMID was tracked.
This results in resctrl computing an event value that is either undercounting
(when new counter is more than stored counter) or a mistaken overflow (when
new counter is less than stored counter).
Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to
zero whenever the RMID is in the "Unavailable" state to ensure accurate
counting after the RMID resets to zero when it starts to be tracked again.
Example scenario that results in mistaken overflow
==================================================
1. The resctrl filesystem is mounted, and a task is assigned to a
monitoring group.
$mount -t resctrl resctrl /sys/fs/resctrl
$mkdir /sys/fs/resctrl/mon_groups/test1/
$echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
21323 <- Total bytes on domain 0
"Unavailable" <- Total bytes on domain 1
Task is running on domain 0. Counter on domain 1 is "Unavailable".
2. The task runs on domain 0 for a while and then moves to domain 1. The
counter starts incrementing on domain 1.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
7345357 <- Total bytes on domain 0
4545 <- Total bytes on domain 1
3. At some point, the RMID in domain 0 transitions to the "Unavailable"
state because the task is no longer executing in that domain.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
"Unavailable" <- Total bytes on domain 0
434341 <- Total bytes on domain 1
4. Since the task continues to migrate between domains, it may eventually
return to domain 0.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
17592178699059 <- Overflow on domain 0
3232332 <- Total bytes on domain 1
In this case, the RMID on domain 0 transitions from "Unavailable" state to
active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when
the counter is read and begins tracking the RMID counting from 0.
Subsequent reads succeed but return a value smaller than the previously
saved MSR value (7345357). Consequently, the resctrl's overflow logic is
triggered, it compares the previous value (7345357) with the new, smaller
value and incorrectly interprets this as a counter overflow, adding a large
delta.
In reality, this is a false positive: the counter did not overflow but was
simply reset when the RMID transitioned from "Unavailable" back to active
state.
Here is the text from APM [1] available from [2].
"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the
first QM_CTR read when it begins tracking an RMID that it was not
previously tracking. The U bit will be zero for all subsequent reads from
that RMID while it is still tracked by the hardware. Therefore, a QM_CTR
read with the U bit set when that RMID is in use by a processor can be
considered 0 when calculating the difference with a subsequent read."
[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory
Bandwidth (MBM).
[ bp: Split commit message into smaller paragraph chunks for better
consumption. ]
Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature")
Signed-off-by: Babu Moger <babu.moger(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Tested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Cc: stable(a)vger.kernel.org # needs adjustments for <= v6.17
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c8945610d455..2cd25a0d4637 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
int cpu = cpumask_any(&d->hdr.cpu_mask);
+ struct arch_mbm_state *am;
u64 msr_val;
u32 prmid;
int ret;
@@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
prmid = logical_rmid_to_physical_rmid(cpu, rmid);
ret = __rmid_read_phys(prmid, eventid, &msr_val);
- if (ret)
- return ret;
- *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ if (!ret) {
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ } else if (ret == -EINVAL) {
+ am = get_arch_mbm_state(hw_dom, rmid, eventid);
+ if (am)
+ am->prev_msr = 0;
+ }
- return 0;
+ return ret;
}
static int __cntr_id_read(u32 cntr_id, u64 *val)
From: Viken Dadhaniya <viken.dadhaniya(a)oss.qualcomm.com>
[ Upstream commit fc6a5b540c02d1ec624e4599f45a17f2941a5c00 ]
GENI UART driver currently supports only non-DFS (Dynamic Frequency
Scaling) mode for source frequency selection. However, to operate correctly
in DFS mode, the GENI SCLK register must be programmed with the appropriate
DFS index. Failing to do so can result in incorrect frequency selection
Add support for Dynamic Frequency Scaling (DFS) mode in the GENI UART
driver by configuring the GENI_CLK_SEL register with the appropriate DFS
index. This ensures correct frequency selection when operating in DFS mode.
Replace the UART driver-specific logic for clock selection with the GENI
common driver function to obtain the desired frequency and corresponding
clock index. This improves maintainability and consistency across
GENI-based drivers.
Signed-off-by: Viken Dadhaniya <viken.dadhaniya(a)oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250903063136.3015237-1-viken.dadhaniya@oss.qual…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
YES
- Fixes a real bug in DFS mode: The UART driver previously never
programmed the GENI DFS clock selection register, so on platforms
where the GENI core clock runs in Dynamic Frequency Scaling (DFS)
mode, UART could pick the wrong source clock and thus the wrong baud.
This change explicitly programs the DFS index so the selected source
frequency matches the computed divider.
- New write of the DFS index to the hardware register:
drivers/tty/serial/qcom_geni_serial.c:1306
- DFS clock select register and mask exist in the common header:
include/linux/soc/qcom/geni-se.h:85, include/linux/soc/qcom/geni-
se.h:145
- Uses the common GENI clock-matching helper instead of ad‑hoc logic:
The patch replaces driver-local clock rounding/tolerance code with the
GENI core’s frequency matching routine, ensuring consistent clock
selection across GENI-based drivers and improving maintainability.
- New source frequency selection via common helper:
drivers/tty/serial/qcom_geni_serial.c:1270
- Common helper is present and exported in the GENI core:
drivers/soc/qcom/qcom-geni-se.c:720
- Maintains existing divisor programming and adds a safety check: The
driver still computes and programs the serial clock divider, now with
a guard to avoid overflow of the divider field.
- Divider computation and range check:
drivers/tty/serial/qcom_geni_serial.c:1277,
drivers/tty/serial/qcom_geni_serial.c:1279
- Divider write to both M/S clock cfg registers remains as before:
drivers/tty/serial/qcom_geni_serial.c:1303,
drivers/tty/serial/qcom_geni_serial.c:1304
- Consistency with other GENI drivers already using DFS index
programming: Other GENI protocol drivers (e.g., SPI) already program
`SE_GENI_CLK_SEL` with the index returned by the common helper, so
this change aligns UART with established practice and reduces risk.
- SPI uses the same pattern: drivers/spi/spi-geni-qcom.c:383,
drivers/spi/spi-geni-qcom.c:385–386
- Small, contained, and low-risk:
- Touches a single driver file with a localized change in clock setup.
- No ABI or architectural changes; relies on existing GENI core
helpers and headers.
- Additional register write is standard and used by other GENI
drivers; masks index with `CLK_SEL_MSK`
(include/linux/soc/qcom/geni-se.h:145) for safety.
- Includes defensive error handling if no matching clock level is
found and a divider overflow guard
(drivers/tty/serial/qcom_geni_serial.c:1271–1275,
drivers/tty/serial/qcom_geni_serial.c:1279–1281).
- User impact: Without this, UART on DFS-enabled platforms can run at an
incorrect baud, causing broken serial communication (including
console). The fix directly addresses that functional issue.
- Stable backport criteria:
- Fixes an important, user-visible bug (incorrect baud under DFS).
- Minimal and self-contained change, no new features or interfaces.
- Leverages existing, widely used GENI core APIs already present in
stable series.
Note: One minor nit in the debug print includes an extra newline before
`clk_idx`, but it’s harmless and does not affect functionality
(drivers/tty/serial/qcom_geni_serial.c:1284).
drivers/tty/serial/qcom_geni_serial.c | 92 ++++++---------------------
1 file changed, 21 insertions(+), 71 deletions(-)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 81f385d900d06..ff401e331f1bb 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
+/*
+ * Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
/* Disable MMIO tracing to prevent excessive logging of unwanted MMIO traces */
#define __DISABLE_TRACE_MMIO__
@@ -1253,75 +1256,15 @@ static int qcom_geni_serial_startup(struct uart_port *uport)
return 0;
}
-static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int desired_clk,
- unsigned int *clk_div, unsigned int percent_tol)
-{
- unsigned long freq;
- unsigned long div, maxdiv;
- u64 mult;
- unsigned long offset, abs_tol, achieved;
-
- abs_tol = div_u64((u64)desired_clk * percent_tol, 100);
- maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
- div = 1;
- while (div <= maxdiv) {
- mult = (u64)div * desired_clk;
- if (mult != (unsigned long)mult)
- break;
-
- offset = div * abs_tol;
- freq = clk_round_rate(clk, mult - offset);
-
- /* Can only get lower if we're done */
- if (freq < mult - offset)
- break;
-
- /*
- * Re-calculate div in case rounding skipped rates but we
- * ended up at a good one, then check for a match.
- */
- div = DIV_ROUND_CLOSEST(freq, desired_clk);
- achieved = DIV_ROUND_CLOSEST(freq, div);
- if (achieved <= desired_clk + abs_tol &&
- achieved >= desired_clk - abs_tol) {
- *clk_div = div;
- return freq;
- }
-
- div = DIV_ROUND_UP(freq, desired_clk);
- }
-
- return 0;
-}
-
-static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
- unsigned int sampling_rate, unsigned int *clk_div)
-{
- unsigned long ser_clk;
- unsigned long desired_clk;
-
- desired_clk = baud * sampling_rate;
- if (!desired_clk)
- return 0;
-
- /*
- * try to find a clock rate within 2% tolerance, then within 5%
- */
- ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, 2);
- if (!ser_clk)
- ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, 5);
-
- return ser_clk;
-}
-
static int geni_serial_set_rate(struct uart_port *uport, unsigned int baud)
{
struct qcom_geni_serial_port *port = to_dev_port(uport);
unsigned long clk_rate;
- unsigned int avg_bw_core;
+ unsigned int avg_bw_core, clk_idx;
unsigned int clk_div;
u32 ver, sampling_rate;
u32 ser_clk_cfg;
+ int ret;
sampling_rate = UART_OVERSAMPLING;
/* Sampling rate is halved for IP versions >= 2.5 */
@@ -1329,17 +1272,22 @@ static int geni_serial_set_rate(struct uart_port *uport, unsigned int baud)
if (ver >= QUP_SE_VERSION_2_5)
sampling_rate /= 2;
- clk_rate = get_clk_div_rate(port->se.clk, baud,
- sampling_rate, &clk_div);
- if (!clk_rate) {
- dev_err(port->se.dev,
- "Couldn't find suitable clock rate for %u\n",
- baud * sampling_rate);
+ ret = geni_se_clk_freq_match(&port->se, baud * sampling_rate, &clk_idx, &clk_rate, false);
+ if (ret) {
+ dev_err(port->se.dev, "Failed to find src clk for baud rate: %d ret: %d\n",
+ baud, ret);
+ return ret;
+ }
+
+ clk_div = DIV_ROUND_UP(clk_rate, baud * sampling_rate);
+ /* Check if calculated divider exceeds maximum allowed value */
+ if (clk_div > (CLK_DIV_MSK >> CLK_DIV_SHFT)) {
+ dev_err(port->se.dev, "Calculated clock divider %u exceeds maximum\n", clk_div);
return -EINVAL;
}
- dev_dbg(port->se.dev, "desired_rate = %u, clk_rate = %lu, clk_div = %u\n",
- baud * sampling_rate, clk_rate, clk_div);
+ dev_dbg(port->se.dev, "desired_rate = %u, clk_rate = %lu, clk_div = %u\n, clk_idx = %u\n",
+ baud * sampling_rate, clk_rate, clk_div, clk_idx);
uport->uartclk = clk_rate;
port->clk_rate = clk_rate;
@@ -1359,6 +1307,8 @@ static int geni_serial_set_rate(struct uart_port *uport, unsigned int baud)
writel(ser_clk_cfg, uport->membase + GENI_SER_M_CLK_CFG);
writel(ser_clk_cfg, uport->membase + GENI_SER_S_CLK_CFG);
+ /* Configure clock selection register with the selected clock index */
+ writel(clk_idx & CLK_SEL_MSK, uport->membase + SE_GENI_CLK_SEL);
return 0;
}
--
2.51.0
From: Sakari Ailus <sakari.ailus(a)linux.intel.com>
[ Upstream commit d9f866b2bb3eec38b3734f1fed325ec7c55ccdfa ]
fwnode_graph_get_next_subnode() may return fwnode backed by ACPI
device nodes and there has been no check these devices are present
in the system, unlike there has been on fwnode OF backend.
In order to provide consistent behaviour towards callers,
add a check for device presence by introducing
a new function acpi_get_next_present_subnode(), used as the
get_next_child_node() fwnode operation that also checks device
node presence.
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas(a)ideasonboard.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron(a)huawei.com>
Link: https://patch.msgid.link/20251001102636.1272722-2-sakari.ailus@linux.intel.…
[ rjw: Kerneldoc comment and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
YES – this change fixes a real behavioural bug in the ACPI fwnode
backend and should go to stable.
- `drivers/acpi/property.c:1375` adds `acpi_get_next_present_subnode()`
so `.get_next_child_node` skips ACPI child devices whose `_STA` says
they are absent (`acpi_device_is_present()`), while still returning
data subnodes unchanged.
- The new helper is now wired into the ACPI fwnode ops
(`drivers/acpi/property.c:1731`), making generic helpers such as
`fwnode_get_next_child_node()` and macros like
`fwnode_for_each_child_node` (`include/linux/property.h:167`) behave
the same as the OF backend, which already filtered unavailable
children via `of_get_next_available_child()`
(`drivers/of/property.c:1070`).
- Several core helpers assume disabled endpoints never surface: e.g.
`fwnode_graph_get_endpoint_by_id()` in `drivers/base/property.c:1286`
promises to hide endpoints on disabled devices, and higher layers such
as `v4l2_fwnode_reference_get_int_prop()`
(`drivers/media/v4l2-core/v4l2-fwnode.c:1064`) iterate child nodes
without rechecking availability. On ACPI systems today they still see
powered-off devices, leading to asynchronous notifiers that wait
forever for hardware that can’t appear, or to bogus graph
enumerations. This patch closes that gap.
Risk is low: it only suppresses ACPI device nodes already known to be
absent, aligns behaviour with DT userspace expectations, and leaves data
nodes untouched. No extra dependencies are required, so the fix is self-
contained and appropriate for stable backporting. Suggest running
existing ACPI graph users (media/typec drivers) after backport to
confirm no regressions.
drivers/acpi/property.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c086786fe84cb..d74678f0ba4af 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1357,6 +1357,28 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
return NULL;
}
+/*
+ * acpi_get_next_present_subnode - Return the next present child node handle
+ * @fwnode: Firmware node to find the next child node for.
+ * @child: Handle to one of the device's child nodes or a null handle.
+ *
+ * Like acpi_get_next_subnode(), but the device nodes returned by
+ * acpi_get_next_present_subnode() are guaranteed to be present.
+ *
+ * Returns: The fwnode handle of the next present sub-node.
+ */
+static struct fwnode_handle *
+acpi_get_next_present_subnode(const struct fwnode_handle *fwnode,
+ struct fwnode_handle *child)
+{
+ do {
+ child = acpi_get_next_subnode(fwnode, child);
+ } while (is_acpi_device_node(child) &&
+ !acpi_device_is_present(to_acpi_device_node(child)));
+
+ return child;
+}
+
/**
* acpi_node_get_parent - Return parent fwnode of this fwnode
* @fwnode: Firmware node whose parent to get
@@ -1701,7 +1723,7 @@ static int acpi_fwnode_irq_get(const struct fwnode_handle *fwnode,
.property_read_string_array = \
acpi_fwnode_property_read_string_array, \
.get_parent = acpi_node_get_parent, \
- .get_next_child_node = acpi_get_next_subnode, \
+ .get_next_child_node = acpi_get_next_present_subnode, \
.get_named_child_node = acpi_fwnode_get_named_child_node, \
.get_name = acpi_fwnode_get_name, \
.get_name_prefix = acpi_fwnode_get_name_prefix, \
--
2.51.0
Sean reported [1] the following splat when running KVM tests:
WARNING: CPU: 232 PID: 15391 at xfd_validate_state+0x65/0x70
Call Trace:
<TASK>
fpu__clear_user_states+0x9c/0x100
arch_do_signal_or_restart+0x142/0x210
exit_to_user_mode_loop+0x55/0x100
do_syscall_64+0x205/0x2c0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
Chao further identified [2] a reproducible scenarios involving signal
delivery: a non-AMX task is preempted by an AMX-enabled task which
modifies the XFD MSR.
When the non-AMX task resumes and reloads XSTATE with init values,
a warning is triggered due to a mismatch between fpstate::xfd and the
CPU's current XFD state. fpu__clear_user_states() does not currently
re-synchronize the XFD state after such preemption.
Invoke xfd_update_state() which detects and corrects the mismatch if the
dynamic feature is enabled.
This also benefits the sigreturn path, as fpu__restore_sig() may call
fpu__clear_user_states() when the sigframe is inaccessible.
Fixes: 672365477ae8a ("x86/fpu: Update XFD state where required")
Reported-by: Sean Christopherson <seanjc(a)google.com>
Closes: https://lore.kernel.org/lkml/aDCo_SczQOUaB2rS@google.com [1]
Tested-by: Chao Gao <chao.gao(a)intel.com>
Signed-off-by: Chang S. Bae <chang.seok.bae(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/aDWbctO%2FRfTGiCg3@intel.com [2]
---
arch/x86/kernel/fpu/core.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ea138583dd92..5fa782a2ae7c 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -800,6 +800,9 @@ void fpu__clear_user_states(struct fpu *fpu)
!fpregs_state_valid(fpu, smp_processor_id()))
os_xrstor_supervisor(fpu->fpstate);
+ /* Ensure XFD state is in sync before reloading XSTATE */
+ xfd_update_state(fpu->fpstate);
+
/* Reset user states in registers. */
restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
--
2.48.1
Currently, on ASUS projects, the TAS2781 codec attaches the speaker GPIO
to the first tasdevice_priv instance using devm. This causes
tas2781_read_acpi to fail on subsequent probes since the GPIO is already
managed by the first device. This causes a failure on Xbox Ally X,
because it has two amplifiers, and prevents us from quirking both the
Xbox Ally and Xbox Ally X in the realtek codec driver.
It is unnecessary to attach the GPIO to a device as it is static.
Therefore, instead of attaching it and then reading it when loading the
firmware, read its value directly in tas2781_read_acpi and store it in
the private data structure. Then, make reading the value non-fatal so
that ASUS projects that miss a speaker pin can still work, perhaps using
fallback firmware.
Fixes: 4e7035a75da9 ("ALSA: hda/tas2781: Add speaker id check for ASUS projects")
Cc: stable(a)vger.kernel.org # 6.17
Signed-off-by: Antheas Kapenekakis <lkml(a)antheas.dev>
---
include/sound/tas2781.h | 2 +-
.../hda/codecs/side-codecs/tas2781_hda_i2c.c | 44 +++++++++++--------
2 files changed, 26 insertions(+), 20 deletions(-)
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index 0fbcdb15c74b..29d15ba65f04 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -197,7 +197,6 @@ struct tasdevice_priv {
struct acoustic_data acou_data;
#endif
struct tasdevice_fw *fmw;
- struct gpio_desc *speaker_id;
struct gpio_desc *reset;
struct mutex codec_lock;
struct regmap *regmap;
@@ -215,6 +214,7 @@ struct tasdevice_priv {
unsigned int magic_num;
unsigned int chip_id;
unsigned int sysclk;
+ int speaker_id;
int irq;
int cur_prog;
diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c
index 0357401a6023..c8619995b1d7 100644
--- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c
+++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c
@@ -87,6 +87,7 @@ static const struct acpi_gpio_mapping tas2781_speaker_id_gpios[] = {
static int tas2781_read_acpi(struct tasdevice_priv *p, const char *hid)
{
+ struct gpio_desc *speaker_id;
struct acpi_device *adev;
struct device *physdev;
LIST_HEAD(resources);
@@ -119,19 +120,31 @@ static int tas2781_read_acpi(struct tasdevice_priv *p, const char *hid)
/* Speaker id was needed for ASUS projects. */
ret = kstrtou32(sub, 16, &subid);
if (!ret && upper_16_bits(subid) == PCI_VENDOR_ID_ASUSTEK) {
- ret = devm_acpi_dev_add_driver_gpios(p->dev,
- tas2781_speaker_id_gpios);
- if (ret < 0)
+ ret = acpi_dev_add_driver_gpios(adev, tas2781_speaker_id_gpios);
+ if (ret < 0) {
dev_err(p->dev, "Failed to add driver gpio %d.\n",
ret);
- p->speaker_id = devm_gpiod_get(p->dev, "speakerid", GPIOD_IN);
- if (IS_ERR(p->speaker_id)) {
- dev_err(p->dev, "Failed to get Speaker id.\n");
- ret = PTR_ERR(p->speaker_id);
- goto err;
+ p->speaker_id = -1;
+ goto end_2563;
+ }
+
+ speaker_id = fwnode_gpiod_get_index(acpi_fwnode_handle(adev),
+ "speakerid", 0, GPIOD_IN, NULL);
+ if (!IS_ERR(speaker_id)) {
+ p->speaker_id = gpiod_get_value_cansleep(speaker_id);
+ dev_dbg(p->dev, "Got speaker id gpio from ACPI: %d.\n",
+ p->speaker_id);
+ gpiod_put(speaker_id);
+ } else {
+ p->speaker_id = -1;
+ ret = PTR_ERR(speaker_id);
+ dev_err(p->dev, "Get speaker id gpio failed %d.\n",
+ ret);
}
+
+ acpi_dev_remove_driver_gpios(adev);
} else {
- p->speaker_id = NULL;
+ p->speaker_id = -1;
}
end_2563:
@@ -432,23 +445,16 @@ static void tasdevice_dspfw_init(void *context)
struct tas2781_hda *tas_hda = dev_get_drvdata(tas_priv->dev);
struct tas2781_hda_i2c_priv *hda_priv = tas_hda->hda_priv;
struct hda_codec *codec = tas_priv->codec;
- int ret, spk_id;
+ int ret;
tasdevice_dsp_remove(tas_priv);
tas_priv->fw_state = TASDEVICE_DSP_FW_PENDING;
- if (tas_priv->speaker_id != NULL) {
- // Speaker id need to be checked for ASUS only.
- spk_id = gpiod_get_value(tas_priv->speaker_id);
- if (spk_id < 0) {
- // Speaker id is not valid, use default.
- dev_dbg(tas_priv->dev, "Wrong spk_id = %d\n", spk_id);
- spk_id = 0;
- }
+ if (tas_priv->speaker_id >= 0) {
snprintf(tas_priv->coef_binaryname,
sizeof(tas_priv->coef_binaryname),
"TAS2XXX%04X%d.bin",
lower_16_bits(codec->core.subsystem_id),
- spk_id);
+ tas_priv->speaker_id);
} else {
snprintf(tas_priv->coef_binaryname,
sizeof(tas_priv->coef_binaryname),
base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
--
2.51.1
From: Shawn Guo <shawnguo(a)kernel.org>
Per commit 9442490a0286 ("regmap: irq: Support wake IRQ mask inversion")
the wake_invert flag is to support enable register, so cleared bits are
wake disabled.
Fixes: 68622bdfefb9 ("regmap: irq: document mask/wake_invert flags")
Cc: stable(a)vger.kernel.org
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
---
include/linux/regmap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4e1ac1fbcec4..55343795644b 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1643,7 +1643,7 @@ struct regmap_irq_chip_data;
* @status_invert: Inverted status register: cleared bits are active interrupts.
* @status_is_level: Status register is actuall signal level: Xor status
* register with previous value to get active interrupts.
- * @wake_invert: Inverted wake register: cleared bits are wake enabled.
+ * @wake_invert: Inverted wake register: cleared bits are wake disabled.
* @type_in_mask: Use the mask registers for controlling irq type. Use this if
* the hardware provides separate bits for rising/falling edge
* or low/high level interrupts and they should be combined into
--
2.43.0
Currently mremap folio pte batch ignores the writable bit during figuring
out a set of similar ptes mapping the same folio. Suppose that the first
pte of the batch is writable while the others are not - set_ptes will
end up setting the writable bit on the other ptes, which is a violation
of mremap semantics. Therefore, use FPB_RESPECT_WRITE to check the writable
bit while determining the pte batch.
Cc: stable(a)vger.kernel.org #6.17
Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching")
Reported-by: David Hildenbrand <david(a)redhat.com>
Debugged-by: David Hildenbrand <david(a)redhat.com>
Signed-off-by: Dev Jain <dev.jain(a)arm.com>
---
mm-selftests pass. Based on mm-new. Need David H. to confirm whether
the repro passes.
mm/mremap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index a7f531c17b79..8ad06cf50783 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -187,7 +187,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr
if (!folio || !folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, ptep, pte, max_nr);
+ return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE);
}
static int move_ptes(struct pagetable_move_control *pmc,
--
2.30.2
Evaluaciones Psicométricas para RR.HH.
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Mejora tus procesos de selección con evaluaciones psicométricas fáciles y confiables.
Hola, ,
Sabemos que encontrar al candidato ideal va más allá del currículum. Por eso quiero contarte brevemente sobre PsicoSmart, una plataforma que ayuda a equipos de RR.HH. a evaluar talento con pruebas psicométricas rápidas, confiables y fáciles de aplicar.
Con PsicoSmart puedes:
Aplicar evaluaciones psicométricas 100% en línea.
Elegir entre más de 31 pruebas psicométricas
Generar reportes automáticos, visuales y fáciles de interpretar.
Comparar resultados entre candidatos en segundos.
Ahorrar horas valiosas del proceso de selección.
Si estás buscando mejorar tus contrataciones, te lo recomiendo muchísimo. Si quieres conocer más puedes responder este correo o simplemente contactarme, mis datos están abajo.
Saludos,
-----------------------------
Atte.: Valeria Pérez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewisppiseup">click aquí</a>
From: Junrui Luo <moonafterrain(a)outlook.com>
The asd_pci_remove() function fails to synchronize with pending tasklets
before freeing the asd_ha structure, leading to a potential use-after-free
vulnerability.
When a device removal is triggered (via hot-unplug or module unload), race condition can occur.
The fix adds tasklet_kill() before freeing the asd_ha structure, ensuring
all scheduled tasklets complete before cleanup proceeds.
Reported-by: Yuhao Jiang <danisjiang(a)gmail.com>
Reported-by: Junrui Luo <moonafterrain(a)outlook.com>
Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Junrui Luo <moonafterrain(a)outlook.com>
---
drivers/scsi/aic94xx/aic94xx_init.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index adf3d9145606..95f3620059f7 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -882,6 +882,9 @@ static void asd_pci_remove(struct pci_dev *dev)
asd_disable_ints(asd_ha);
+ /* Ensure all scheduled tasklets complete before freeing resources */
+ tasklet_kill(&asd_ha->seq.dl_tasklet);
+
asd_remove_dev_attrs(asd_ha);
/* XXX more here as needed */
--
2.51.1.dirty
From: Junrui Luo <moonafterrain(a)outlook.com>
The uninorth_insert_memory and uninorth_remove_memory functions lack
proper validation of the pg_start parameter before using it as an array
index into the GATT (Graphics Address Translation Table).
The current bounds check fails to reject negative
pg_start values and potentially causes out-of-bounds writes.
Fix by explicitly checking that pg_start is non-negative before
performing bounds checking. This makes the security requirement clear
and does not rely on implicit type conversion behavior.
The uninorth_remove_memory function has no bounds checking at all, so
add the same validation there.
Reported-by: Yuhao Jiang <danisjiang(a)gmail.com>
Reported-by: Junrui Luo <moonafterrain(a)outlook.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Junrui Luo <moonafterrain(a)outlook.com>
---
drivers/char/agp/uninorth-agp.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index b8d7115b8c9e..4e0b949016f7 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -169,7 +169,9 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int ty
temp = agp_bridge->current_size;
num_entries = A_SIZE_32(temp)->num_entries;
- if ((pg_start + mem->page_count) > num_entries)
+ if (pg_start < 0 ||
+ (pg_start + mem->page_count) > num_entries ||
+ (pg_start + mem->page_count) < pg_start)
return -EINVAL;
gp = (u32 *) &agp_bridge->gatt_table[pg_start];
@@ -200,6 +202,9 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int ty
static int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
{
size_t i;
+ int num_entries;
+ void *temp;
+
u32 *gp;
int mask_type;
@@ -215,6 +220,14 @@ static int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int ty
if (mem->page_count == 0)
return 0;
+ temp = agp_bridge->current_size;
+ num_entries = A_SIZE_32(temp)->num_entries;
+
+ if (pg_start < 0 ||
+ (pg_start + mem->page_count) > num_entries ||
+ (pg_start + mem->page_count) < pg_start)
+ return -EINVAL;
+
gp = (u32 *) &agp_bridge->gatt_table[pg_start];
for (i = 0; i < mem->page_count; ++i) {
gp[i] = scratch_value;
--
2.51.1.dirty
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102054-slacks-ambush-f774@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger(a)amd.com>
Date: Fri, 10 Oct 2025 12:08:35 -0500
Subject: [PATCH] x86/resctrl: Fix miscount of bandwidth event when
reactivating previously unavailable RMID
Users can create as many monitoring groups as the number of RMIDs supported
by the hardware. However, on AMD systems, only a limited number of RMIDs
are guaranteed to be actively tracked by the hardware. RMIDs that exceed
this limit are placed in an "Unavailable" state.
When a bandwidth counter is read for such an RMID, the hardware sets
MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked
again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable
remains set on first read after tracking re-starts and is clear on all
subsequent reads as long as the RMID is tracked.
resctrl miscounts the bandwidth events after an RMID transitions from the
"Unavailable" state back to being tracked. This happens because when the
hardware starts counting again after resetting the counter to zero, resctrl
in turn compares the new count against the counter value stored from the
previous time the RMID was tracked.
This results in resctrl computing an event value that is either undercounting
(when new counter is more than stored counter) or a mistaken overflow (when
new counter is less than stored counter).
Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to
zero whenever the RMID is in the "Unavailable" state to ensure accurate
counting after the RMID resets to zero when it starts to be tracked again.
Example scenario that results in mistaken overflow
==================================================
1. The resctrl filesystem is mounted, and a task is assigned to a
monitoring group.
$mount -t resctrl resctrl /sys/fs/resctrl
$mkdir /sys/fs/resctrl/mon_groups/test1/
$echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
21323 <- Total bytes on domain 0
"Unavailable" <- Total bytes on domain 1
Task is running on domain 0. Counter on domain 1 is "Unavailable".
2. The task runs on domain 0 for a while and then moves to domain 1. The
counter starts incrementing on domain 1.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
7345357 <- Total bytes on domain 0
4545 <- Total bytes on domain 1
3. At some point, the RMID in domain 0 transitions to the "Unavailable"
state because the task is no longer executing in that domain.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
"Unavailable" <- Total bytes on domain 0
434341 <- Total bytes on domain 1
4. Since the task continues to migrate between domains, it may eventually
return to domain 0.
$cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
17592178699059 <- Overflow on domain 0
3232332 <- Total bytes on domain 1
In this case, the RMID on domain 0 transitions from "Unavailable" state to
active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when
the counter is read and begins tracking the RMID counting from 0.
Subsequent reads succeed but return a value smaller than the previously
saved MSR value (7345357). Consequently, the resctrl's overflow logic is
triggered, it compares the previous value (7345357) with the new, smaller
value and incorrectly interprets this as a counter overflow, adding a large
delta.
In reality, this is a false positive: the counter did not overflow but was
simply reset when the RMID transitioned from "Unavailable" back to active
state.
Here is the text from APM [1] available from [2].
"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the
first QM_CTR read when it begins tracking an RMID that it was not
previously tracking. The U bit will be zero for all subsequent reads from
that RMID while it is still tracked by the hardware. Therefore, a QM_CTR
read with the U bit set when that RMID is in use by a processor can be
considered 0 when calculating the difference with a subsequent read."
[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory
Bandwidth (MBM).
[ bp: Split commit message into smaller paragraph chunks for better
consumption. ]
Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature")
Signed-off-by: Babu Moger <babu.moger(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Tested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Cc: stable(a)vger.kernel.org # needs adjustments for <= v6.17
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c8945610d455..2cd25a0d4637 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
int cpu = cpumask_any(&d->hdr.cpu_mask);
+ struct arch_mbm_state *am;
u64 msr_val;
u32 prmid;
int ret;
@@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
prmid = logical_rmid_to_physical_rmid(cpu, rmid);
ret = __rmid_read_phys(prmid, eventid, &msr_val);
- if (ret)
- return ret;
- *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ if (!ret) {
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ } else if (ret == -EINVAL) {
+ am = get_arch_mbm_state(hw_dom, rmid, eventid);
+ if (am)
+ am->prev_msr = 0;
+ }
- return 0;
+ return ret;
}
static int __cntr_id_read(u32 cntr_id, u64 *val)
From: Christian Hitz <christian.hitz(a)bbv.ch>
If a GPIO is used to control the chip's enable pin, it needs to be pulled
high before any i2c communication is attempted.
Currently, the enable GPIO handling is not correct.
Assume the enable GPIO is low when the probe function is entered. In this
case the device is in SHUTDOWN mode and does not react to i2c commands.
During probe the following sequence happens:
1. The call to lp50xx_reset() on line 548 has no effect as i2c is not
possible yet.
2. Then - on line 552 - lp50xx_enable_disable() is called. As
"priv->enable_gpio“ has not yet been initialized, setting the GPIO has
no effect. Also the i2c enable command is not executed as the device
is still in SHUTDOWN.
3. On line 556 the call to lp50xx_probe_dt() finally parses the rest of
the DT and the configured priv->enable_gpio is set up.
As a result the device is still in SHUTDOWN mode and not ready for
operation.
Split lp50xx_enable_disable() into distinct enable and disable functions
to enforce correct ordering between enable_gpio manipulations and i2c
commands.
Read enable_gpio configuration from DT before attempting to manipulate
enable_gpio.
Add delays to observe correct wait timing after manipulating enable_gpio
and before any i2c communication.
Fixes: 242b81170fb8 ("leds: lp50xx: Add the LP50XX family of the RGB LED driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Hitz <christian.hitz(a)bbv.ch>
---
Changes in v2:
- Unconditionally reset in lp50xx_enable
- Define magic numbers
- Improve log message
---
drivers/leds/leds-lp50xx.c | 55 +++++++++++++++++++++++++++-----------
1 file changed, 40 insertions(+), 15 deletions(-)
diff --git a/drivers/leds/leds-lp50xx.c b/drivers/leds/leds-lp50xx.c
index 94f8ef6b482c..d3485d814cf4 100644
--- a/drivers/leds/leds-lp50xx.c
+++ b/drivers/leds/leds-lp50xx.c
@@ -50,6 +50,12 @@
#define LP50XX_SW_RESET 0xff
#define LP50XX_CHIP_EN BIT(6)
+#define LP50XX_CHIP_DISABLE 0x00
+#define LP50XX_START_TIME_US 500
+#define LP50XX_RESET_TIME_US 3
+
+#define LP50XX_EN_GPIO_LOW 0
+#define LP50XX_EN_GPIO_HIGH 1
/* There are 3 LED outputs per bank */
#define LP50XX_LEDS_PER_MODULE 3
@@ -371,19 +377,42 @@ static int lp50xx_reset(struct lp50xx *priv)
return regmap_write(priv->regmap, priv->chip_info->reset_reg, LP50XX_SW_RESET);
}
-static int lp50xx_enable_disable(struct lp50xx *priv, int enable_disable)
+static int lp50xx_enable(struct lp50xx *priv)
{
int ret;
- ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
+ if (priv->enable_gpio) {
+ ret = gpiod_direction_output(priv->enable_gpio, LP50XX_EN_GPIO_HIGH);
+ if (ret)
+ return ret;
+
+ udelay(LP50XX_START_TIME_US);
+ }
+
+ ret = lp50xx_reset(priv);
if (ret)
return ret;
- if (enable_disable)
- return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
- else
- return regmap_write(priv->regmap, LP50XX_DEV_CFG0, 0);
+ return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
+}
+static int lp50xx_disable(struct lp50xx *priv)
+{
+ int ret;
+
+ ret = regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_DISABLE);
+ if (ret)
+ return ret;
+
+ if (priv->enable_gpio) {
+ ret = gpiod_direction_output(priv->enable_gpio, LP50XX_EN_GPIO_LOW);
+ if (ret)
+ return ret;
+
+ udelay(LP50XX_RESET_TIME_US);
+ }
+
+ return 0;
}
static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
@@ -447,6 +476,10 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
return dev_err_probe(priv->dev, PTR_ERR(priv->enable_gpio),
"Failed to get enable GPIO\n");
+ ret = lp50xx_enable(priv);
+ if (ret)
+ return ret;
+
priv->regulator = devm_regulator_get(priv->dev, "vled");
if (IS_ERR(priv->regulator))
priv->regulator = NULL;
@@ -547,14 +580,6 @@ static int lp50xx_probe(struct i2c_client *client)
return ret;
}
- ret = lp50xx_reset(led);
- if (ret)
- return ret;
-
- ret = lp50xx_enable_disable(led, 1);
- if (ret)
- return ret;
-
return lp50xx_probe_dt(led);
}
@@ -563,7 +588,7 @@ static void lp50xx_remove(struct i2c_client *client)
struct lp50xx *led = i2c_get_clientdata(client);
int ret;
- ret = lp50xx_enable_disable(led, 0);
+ ret = lp50xx_disable(led);
if (ret)
dev_err(led->dev, "Failed to disable chip\n");
--
2.51.1
A user reports that on their Lenovo Corsola Magneton with EC firmware
steelix-15194.270.0 the driver probe fails with EINVAL. It turns out
that the power LED does not contain any color components as indicated
by the following "ectool led power query" output:
Brightness range for LED 1:
red : 0x0
green : 0x0
blue : 0x0
yellow : 0x0
white : 0x0
amber : 0x0
The LED also does not react to commands sent manually through ectool and
is generally non-functional.
Instead of failing the probe for all LEDs managed by the EC when one
without color components is encountered, silently skip those.
Fixes: 8d6ce6f3ec9d ("leds: Add ChromeOS EC driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
drivers/leds/leds-cros_ec.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/leds/leds-cros_ec.c b/drivers/leds/leds-cros_ec.c
index 377cf04e202a..bea3cc3fbfd2 100644
--- a/drivers/leds/leds-cros_ec.c
+++ b/drivers/leds/leds-cros_ec.c
@@ -142,9 +142,6 @@ static int cros_ec_led_count_subleds(struct device *dev,
}
}
- if (!num_subleds)
- return -EINVAL;
-
*max_brightness = common_range;
return num_subleds;
}
@@ -189,6 +186,8 @@ static int cros_ec_led_probe_one(struct device *dev, struct cros_ec_device *cros
&priv->led_mc_cdev.led_cdev.max_brightness);
if (num_subleds < 0)
return num_subleds;
+ if (num_subleds == 0)
+ return 0; /* LED without any colors, skip */
priv->cros_ec = cros_ec;
priv->led_id = id;
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20251028-cros_ec-leds-no-colors-18eb8d1efa92
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
When simple_write_to_buffer() succeeds, it returns the number of bytes
actually copied to the buffer, which may be less than the requested
'count' if the buffer size is insufficient. However, the current code
incorrectly uses 'count' as the index for null termination instead of
the actual bytes copied, leading to out-of-bound write.
Add a check for the count and use the return value as the index.
Found via static analysis. This is similar to the
commit da9374819eb3 ("iio: backend: fix out-of-bound write")
Fixes: b1c5d68ea66e ("iio: dac: ad3552r-hs: add support for internal ramp")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/iio/dac/ad3552r-hs.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c
index 41b96b48ba98..a9578afa7015 100644
--- a/drivers/iio/dac/ad3552r-hs.c
+++ b/drivers/iio/dac/ad3552r-hs.c
@@ -549,12 +549,15 @@ static ssize_t ad3552r_hs_write_data_source(struct file *f,
guard(mutex)(&st->lock);
+ if (count >= sizeof(buf))
+ return -ENOSPC;
+
ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf,
count);
if (ret < 0)
return ret;
- buf[count] = '\0';
+ buf[ret] = '\0';
ret = match_string(dbgfs_attr_source, ARRAY_SIZE(dbgfs_attr_source),
buf);
--
2.39.5 (Apple Git-154)
A recent change fixed device reference leaks when looking up drm
platform device driver data during bind() but failed to remove a partial
fix which had been added by commit 80805b62ea5b ("drm/mediatek: Fix
kobject put for component sub-drivers").
This results in a reference imbalance on component bind() failures and
on unbind() which could lead to a user-after-free.
Make sure to only drop the references after retrieving the driver data
by effectively reverting the previous partial fix.
Note that holding a reference to a device does not prevent its driver
data from going away so there is no point in keeping the reference.
Fixes: 1f403699c40f ("drm/mediatek: Fix device/node reference count leaks in mtk_drm_get_all_drm_priv")
Reported-by: Sjoerd Simons <sjoerd(a)collabora.com>
Link: https://lore.kernel.org/r/20251003-mtk-drm-refcount-v1-1-3b3f2813b0db@colla…
Cc: stable(a)vger.kernel.org
Cc: Ma Ke <make24(a)iscas.ac.cn>
Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/gpu/drm/mediatek/mtk_drm_drv.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 384b0510272c..a94c51a83261 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -686,10 +686,6 @@ static int mtk_drm_bind(struct device *dev)
for (i = 0; i < private->data->mmsys_dev_num; i++)
private->all_drm_private[i]->drm = NULL;
err_put_dev:
- for (i = 0; i < private->data->mmsys_dev_num; i++) {
- /* For device_find_child in mtk_drm_get_all_priv() */
- put_device(private->all_drm_private[i]->dev);
- }
put_device(private->mutex_dev);
return ret;
}
@@ -697,18 +693,12 @@ static int mtk_drm_bind(struct device *dev)
static void mtk_drm_unbind(struct device *dev)
{
struct mtk_drm_private *private = dev_get_drvdata(dev);
- int i;
/* for multi mmsys dev, unregister drm dev in mmsys master */
if (private->drm_master) {
drm_dev_unregister(private->drm);
mtk_drm_kms_deinit(private->drm);
drm_dev_put(private->drm);
-
- for (i = 0; i < private->data->mmsys_dev_num; i++) {
- /* For device_find_child in mtk_drm_get_all_priv() */
- put_device(private->all_drm_private[i]->dev);
- }
put_device(private->mutex_dev);
}
private->mtk_drm_bound = false;
--
2.49.1
The code in bmc150-accel-core.c unconditionally calls
bmc150_accel_set_interrupt() in the iio_buffer_setup_ops,
such as on the runtime PM resume path giving a kernel
splat like this if the device has no interrupts:
Unable to handle kernel NULL pointer dereference at virtual
address 00000001 when read
CPU: 0 UID: 0 PID: 393 Comm: iio-sensor-prox Not tainted
6.18.0-rc1-postmarketos-stericsson-00001-g6b43386e3737 #73 PREEMPT
Hardware name: ST-Ericsson Ux5x0 platform (Device Tree Support)
PC is at bmc150_accel_set_interrupt+0x98/0x194
LR is at __pm_runtime_resume+0x5c/0x64
(...)
Call trace:
bmc150_accel_set_interrupt from bmc150_accel_buffer_postenable+0x40/0x108
bmc150_accel_buffer_postenable from __iio_update_buffers+0xbe0/0xcbc
__iio_update_buffers from enable_store+0x84/0xc8
enable_store from kernfs_fop_write_iter+0x154/0x1b4
kernfs_fop_write_iter from do_iter_readv_writev+0x178/0x1e4
do_iter_readv_writev from vfs_writev+0x158/0x3f4
vfs_writev from do_writev+0x74/0xe4
do_writev from __sys_trace_return+0x0/0x10
This bug seems to have been in the driver since the beginning,
but it only manifests recently, I do not know why.
Cc: stable(a)vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij(a)linaro.org>
---
drivers/iio/accel/bmc150-accel-core.c | 5 +++++
drivers/iio/accel/bmc150-accel.h | 1 +
2 files changed, 6 insertions(+)
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index 3c5d1560b163..f4421a3b0ef2 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -523,6 +523,10 @@ static int bmc150_accel_set_interrupt(struct bmc150_accel_data *data, int i,
const struct bmc150_accel_interrupt_info *info = intr->info;
int ret;
+ /* We do not always have an IRQ */
+ if (!data->has_irq)
+ return 0;
+
if (state) {
if (atomic_inc_return(&intr->users) > 1)
return 0;
@@ -1696,6 +1700,7 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq,
}
if (irq > 0) {
+ data->has_irq = true;
ret = devm_request_threaded_irq(dev, irq,
bmc150_accel_irq_handler,
bmc150_accel_irq_thread_handler,
diff --git a/drivers/iio/accel/bmc150-accel.h b/drivers/iio/accel/bmc150-accel.h
index 7a7baf52e595..6e9bb69a1fd4 100644
--- a/drivers/iio/accel/bmc150-accel.h
+++ b/drivers/iio/accel/bmc150-accel.h
@@ -59,6 +59,7 @@ enum bmc150_accel_trigger_id {
struct bmc150_accel_data {
struct regmap *regmap;
struct regulator_bulk_data regulators[2];
+ bool has_irq;
struct bmc150_accel_interrupt interrupts[BMC150_ACCEL_INTERRUPTS];
struct bmc150_accel_trigger triggers[BMC150_ACCEL_TRIGGERS];
struct mutex mutex;
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20251027-fix-bmc150-7e568122b265
Best regards,
--
Linus Walleij <linus.walleij(a)linaro.org>
From: Mika Westerberg <mika.westerberg(a)linux.intel.com>
[ Upstream commit f7e37affbc9085f2b77ccb6596521a44eabf7505 ]
Add Intel Wildcat Lake SPI serial flash PCI ID to the list of supported
devices.
Signed-off-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Link: https://patch.msgid.link/20251020145415.3377022-4-mika.westerberg@linux.int…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
## BACKPORT ANALYSIS COMPLETE
**Backport Status: YES**
---
## DETAILED ANALYSIS
### 1. COMMIT CHARACTERIZATION
**Type**: Hardware enablement (PCI device ID addition)
**Code Changes**:
- Single line addition to `drivers/spi/spi-intel-pci.c`
- Adds PCI device ID `0x4d23` (Intel Wildcat Lake SPI serial flash)
- Maps to existing `cnl_info` configuration structure
- No behavioral changes to existing code paths
### 2. SEMANTIC CODE ANALYSIS FINDINGS
#### Tools Used:
- **Read**: Examined the complete file structure (drivers/spi/spi-intel-
pci.c:68-99)
- **Grep**: Located PCI device table usage and MODULE_DEVICE_TABLE macro
- **Git analysis**: Reviewed commit history and backport precedents
#### Impact Scope Analysis:
- **Direct impact**: Only affects systems with PCI ID 0x4d23 (Wildcat
Lake hardware)
- **Caller analysis**: The PCI device table is used by the kernel's PCI
subsystem for automatic device-driver matching
- **Dependency analysis**: Uses `cnl_info` structure (drivers/spi/spi-
intel-pci.c:38-41), which has existed since 2022
- **Risk assessment**: Zero risk to existing hardware - new entry only
triggers on matching PCI ID
### 3. BACKPORT PRECEDENT (Strong Evidence)
I found multiple similar commits that **WERE backported** to stable
trees:
**Example 1 - Lunar Lake-M** (commit ec33549be99fe):
```
commit ec33549be99fe25c6927c8b3d6ed13918b27656e
Author: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Commit: Sasha Levin <sashal(a)kernel.org> [STABLE MAINTAINER]
spi: intel-pci: Add support for Lunar Lake-M SPI serial flash
[ Upstream commit 8f44e3808200c1434c26ef459722f88f48b306df ]
```
**Example 2 - Granite Rapids** (commit 60446b5e74865):
```
commit 60446b5e74865acff1af5f2d89d99551c8e6e2c1
Author: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Commit: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [STABLE
MAINTAINER]
spi: intel-pci: Add support for Granite Rapids SPI serial flash
[ Upstream commit 9855d60cfc720ff32355484c119acafd3c4dc806 ]
```
**Key Finding**: Both commits were backported to stable trees by stable
maintainers (Sasha Levin, Greg Kroah-Hartman) **despite having NO
explicit stable tags** in the original commits.
### 4. STABLE TREE COMPLIANCE
**Positive Factors**:
- ✅ Fixes a real user problem (Wildcat Lake users cannot access SPI
flash on stable kernels)
- ✅ Extremely low risk (single line, static table entry)
- ✅ No architectural changes
- ✅ No dependencies on unreleased features
- ✅ Follows established backporting pattern for PCI ID additions
- ✅ "Obviously correct" - uses tested configuration from 20+ other Intel
platforms
**Considerations**:
- ⚠️ Technically a "new feature" (new hardware support)
- ✅ However, PCI ID additions are a **special exception** routinely
accepted by stable maintainers
- ✅ Hardware enablement is considered "fixing" missing support for
existing hardware
### 5. DEPENDENCY VERIFICATION
- **cnl_info structure**: Present since 2022, exists in all stable
kernels that would receive this backport
- **PCI_VDEVICE macro**: Standard kernel infrastructure
- **intel_spi_probe()**: Core function, no recent changes that would
conflict
- **Clean backport**: No conflicts expected with any stable kernel
version
### 6. RISK ANALYSIS
**Regression Risk**: **MINIMAL**
- Only triggered when PCI ID 0x4d23 is detected
- No effect on systems without Wildcat Lake hardware
- Uses well-tested code path (cnl_info shared with 20+ other platforms)
- No user-space API changes
**Testing Coverage**:
- Same code path used by numerous Intel platforms (Ice Lake, Tiger Lake,
Alder Lake, etc.)
- Configuration structure (cnl_info) battle-tested across multiple
generations
### 7. COMMIT MESSAGE ANALYSIS
The commit message is:
- ✅ Clear and descriptive
- ✅ Identifies specific hardware (Intel Wildcat Lake)
- ✅ Consistent with previous similar commits
- ❌ No "Fixes:" tag (not applicable - this is new hardware support)
- ❌ No "Cc: stable(a)vger.kernel.org" tag (but precedent shows this
doesn't prevent backporting)
---
## RECOMMENDATION
**YES - This commit SHOULD be backported to stable kernel trees**
### Rationale:
1. **Strong precedent**: Multiple similar PCI ID additions for Intel
platforms (Lunar Lake, Granite Rapids, Arrow Lake) have been
backported to stable trees by stable maintainers
2. **User benefit**: Enables critical SPI flash functionality for
Wildcat Lake hardware owners running stable kernels
3. **Minimal risk**: Single-line change to static table with no impact
on existing hardware
4. **Well-established pattern**: PCI ID additions for hardware
enablement are routinely accepted for stable backports as they "fix"
missing hardware support
5. **Clean backport**: No dependencies or conflicts expected
### Target Stable Trees:
- All currently maintained stable kernels (6.17.x, 6.16.x, 6.15.x, etc.)
- The commit is from v6.18-rc3, so it would benefit users on any stable
kernel before 6.18
drivers/spi/spi-intel-pci.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c
index 4b63cb98df9cc..4bb158a23349e 100644
--- a/drivers/spi/spi-intel-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -75,6 +75,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info },
+ { PCI_VDEVICE(INTEL, 0x4d23), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info },
--
2.51.0
MPTCP creates subflows for data transmission, and these sockets should not
be added to sockmap because MPTCP sets specialized data_ready handlers
that would be overridden by sockmap.
Additionally, for the parent socket of MPTCP subflows (plain TCP socket),
MPTCP sk requires specific protocol handling that conflicts with sockmap's
operation(mptcp_prot).
This patch adds proper checks to reject MPTCP subflows and their parent
sockets from being added to sockmap, while preserving compatibility with
reuseport functionality for listening MPTCP sockets.
We cannot add this logic to sock_map_sk_state_allowed() because the sockops
path doesn't execute this function, and the socket state coming from
sockops might be in states like SYN_RECV. So moving
sock_map_sk_state_allowed() to sock_{map,hash}_update_common() is not
appropriate. Instead, we introduce a new function to handle MPTCP checks.
Fixes: 0b4f33def7bb ("mptcp: fix tcp fallback crash")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Suggested-by: Jakub Sitnicki <jakub(a)cloudflare.com>
---
net/core/sock_map.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 5947b38e4f8b..5be38cdfb5cc 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -467,6 +467,27 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
+/* Disallow MPTCP subflows and their parent sockets. However, a TCP_LISTEN
+ * MPTCP socket is permitted because sockmap can also serve for reuseport
+ * socket selection.
+ */
+static inline bool sock_map_sk_type_allowed(const struct sock *sk)
+{
+ /* MPTCP subflows are not intended for data I/O by user */
+ if (sk_is_tcp(sk) && sk_is_mptcp(sk))
+ goto disallow;
+
+ /* MPTCP parents use mptcp_prot - not supported with sockmap yet */
+ if (sk->sk_protocol == IPPROTO_MPTCP && sk->sk_state != TCP_LISTEN)
+ goto disallow;
+
+ return true;
+
+disallow:
+ pr_err_once("sockmap/sockhash: MPTCP sockets are not supported\n");
+ return false;
+}
+
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
{
@@ -482,6 +503,9 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
if (unlikely(idx >= map->max_entries))
return -E2BIG;
+ if (!sock_map_sk_type_allowed(sk))
+ return -EOPNOTSUPP;
+
link = sk_psock_init_link();
if (!link)
return -ENOMEM;
@@ -1003,6 +1027,9 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
+ if (!sock_map_sk_type_allowed(sk))
+ return -EOPNOTSUPP;
+
link = sk_psock_init_link();
if (!link)
return -ENOMEM;
--
2.43.0
In thermal_of_cm_lookup(), of_parse_phandle() returns a device node with
its reference count incremented. The caller is responsible for releasing
this reference when the node is no longer needed.
Add of_node_put(tr_np) to fix the reference leaks.
Found via static analysis.
Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/thermal/thermal_of.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 1a51a4d240ff..2bb1b8e471cf 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -284,8 +284,11 @@ static bool thermal_of_cm_lookup(struct device_node *cm_np,
int count, i;
tr_np = of_parse_phandle(child, "trip", 0);
- if (tr_np != trip->priv)
+ if (tr_np != trip->priv) {
+ of_node_put(tr_np);
continue;
+ }
+ of_node_put(tr_np);
/* The trip has been found, look up the cdev. */
count = of_count_phandle_with_args(child, "cooling-device",
--
2.39.5 (Apple Git-154)
When the server has MPTCP enabled but receives a non-MP-capable request
from a client, it calls mptcp_fallback_tcp_ops().
Since non-MPTCP connections are allowed to use sockmap, which replaces
sk->sk_prot, using sk->sk_prot to determine the IP version in
mptcp_fallback_tcp_ops() becomes unreliable. This can lead to assigning
incorrect ops to sk->sk_socket->ops.
Additionally, when BPF Sockmap modifies the protocol handlers, the
original WARN_ON_ONCE(sk->sk_prot != &tcp_prot) check would falsely
trigger warnings.
Fix this by using the more stable sk_family to distinguish between IPv4
and IPv6 connections, ensuring correct fallback protocol operations are
selected even when BPF Sockmap has modified the socket protocol handlers.
Fixes: 0b4f33def7bb ("mptcp: fix tcp fallback crash")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Reviewed-by: Jakub Sitnicki <jakub(a)cloudflare.com>
---
net/mptcp/protocol.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0292162a14ee..2393741bc310 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -61,11 +61,16 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
+ /* When BPF sockmap is used, it may replace sk->sk_prot.
+ * Using sk_family is a reliable way to determine the IP version.
+ */
+ unsigned short family = READ_ONCE(sk->sk_family);
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (sk->sk_prot == &tcpv6_prot)
+ if (family == AF_INET6)
return &inet6_stream_ops;
#endif
- WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ WARN_ON_ONCE(family != AF_INET);
return &inet_stream_ops;
}
--
2.43.0
The 2 argument version of v4l2_subdev_state_get_format() takes the pad
as second argument, not the stream.
Fixes: bc0e8d91feec ("media: v4l: subdev: Switch to stream-aware state functions")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hansg(a)kernel.org>
---
Note the problem pre-exists the Fixes: commit but backporting further
will require manual backports and seems unnecessary.
---
drivers/media/i2c/ov01a10.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/media/i2c/ov01a10.c b/drivers/media/i2c/ov01a10.c
index f92867f542f0..0405ec7c75fd 100644
--- a/drivers/media/i2c/ov01a10.c
+++ b/drivers/media/i2c/ov01a10.c
@@ -731,7 +731,7 @@ static int ov01a10_set_format(struct v4l2_subdev *sd,
h_blank);
}
- format = v4l2_subdev_state_get_format(sd_state, fmt->stream);
+ format = v4l2_subdev_state_get_format(sd_state, fmt->pad);
*format = fmt->format;
return 0;
--
2.51.0
From: Benjamin Berg <benjamin.berg(a)intel.com>
The normal timer mechanism assume that timeout further in the future
need a lower accuracy. As an example, the granularity for a timer
scheduled 4096 ms in the future on a 1000 Hz system is already 512 ms.
This granularity is perfectly sufficient for e.g. timeouts, but there
are other types of events that will happen at a future point in time and
require a higher accuracy.
Add a new wiphy_hrtimer_work type that uses an hrtimer internally. The
API is almost identical to the existing wiphy_delayed_work and it can be
used as a drop-in replacement after minor adjustments. The work will be
scheduled relative to the current time with a slack of 1 millisecond.
CC: stable(a)vger.kernel.org # 6.4+
Signed-off-by: Benjamin Berg <benjamin.berg(a)intel.com>
Reviewed-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit(a)intel.com>
---
include/net/cfg80211.h | 78 ++++++++++++++++++++++++++++++++++++++++++
net/wireless/core.c | 56 ++++++++++++++++++++++++++++++
net/wireless/trace.h | 21 ++++++++++++
3 files changed, 155 insertions(+)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 781624f5913a..820e299f06b5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork,
* after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
* use just cancel_work() instead of cancel_work_sync(), it requires
* being in a section protected by wiphy_lock().
+ *
+ * Note that these are scheduled with a timer where the accuracy
+ * becomes less the longer in the future the scheduled timer is. Use
+ * wiphy_hrtimer_work_queue() if the timer must be not be late by more
+ * than approximately 10 percent.
*/
void wiphy_delayed_work_queue(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork,
@@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
bool wiphy_delayed_work_pending(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork);
+struct wiphy_hrtimer_work {
+ struct wiphy_work work;
+ struct wiphy *wiphy;
+ struct hrtimer timer;
+};
+
+enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t);
+
+static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork,
+ wiphy_work_func_t func)
+{
+ hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer,
+ CLOCK_BOOTTIME, HRTIMER_MODE_REL);
+ wiphy_work_init(&hrwork->work, func);
+}
+
+/**
+ * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @hrwork: the high resolution timer worker
+ * @delay: the delay given as a ktime_t
+ *
+ * Please refer to wiphy_delayed_work_queue(). The difference is that
+ * the hrtimer work uses a high resolution timer for scheduling. This
+ * may be needed if timeouts might be scheduled further in the future
+ * and the accuracy of the normal timer is not sufficient.
+ *
+ * Expect a delay of a few milliseconds as the timer is scheduled
+ * with some slack and some more time may pass between queueing the
+ * work and its start.
+ */
+void wiphy_hrtimer_work_queue(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork,
+ ktime_t delay);
+
+/**
+ * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work
+ * @wiphy: the wiphy, for debug purposes
+ * @hrtimer: the hrtimer work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_hrtimer_work_cancel(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrtimer);
+
+/**
+ * wiphy_hrtimer_work_flush - flush previously queued hrtimer work
+ * @wiphy: the wiphy, for debug purposes
+ * @hrwork: the hrtimer work to flush
+ *
+ * Flush the work (i.e. run it if pending). This must be called
+ * under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_hrtimer_work_flush(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork);
+
+/**
+ * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer
+ * work item is currently pending.
+ *
+ * @wiphy: the wiphy, for debug purposes
+ * @hrwork: the hrtimer work in question
+ *
+ * Return: true if timer is pending, false otherwise
+ *
+ * Please refer to the wiphy_delayed_work_pending() documentation as
+ * this is the equivalent function for hrtimer based delayed work
+ * items.
+ */
+bool wiphy_hrtimer_work_pending(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork);
+
/**
* enum ieee80211_ap_reg_power - regulatory power for an Access Point
*
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 797f9f2004a6..54a34d8d356e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy,
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending);
+enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t)
+{
+ struct wiphy_hrtimer_work *hrwork =
+ container_of(t, struct wiphy_hrtimer_work, timer);
+
+ wiphy_work_queue(hrwork->wiphy, &hrwork->work);
+
+ return HRTIMER_NORESTART;
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer);
+
+void wiphy_hrtimer_work_queue(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork,
+ ktime_t delay)
+{
+ trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay);
+
+ if (!delay) {
+ hrtimer_cancel(&hrwork->timer);
+ wiphy_work_queue(wiphy, &hrwork->work);
+ return;
+ }
+
+ hrwork->wiphy = wiphy;
+ hrtimer_start_range_ns(&hrwork->timer, delay,
+ 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue);
+
+void wiphy_hrtimer_work_cancel(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ hrtimer_cancel(&hrwork->timer);
+ wiphy_work_cancel(wiphy, &hrwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel);
+
+void wiphy_hrtimer_work_flush(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ hrtimer_cancel(&hrwork->timer);
+ wiphy_work_flush(wiphy, &hrwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush);
+
+bool wiphy_hrtimer_work_pending(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork)
+{
+ return hrtimer_is_queued(&hrwork->timer);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending);
+
static int __init cfg80211_init(void)
{
int err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 8a4c34112eb5..2b71f1d867a0 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue,
__entry->delay)
);
+TRACE_EVENT(wiphy_hrtimer_work_queue,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work,
+ ktime_t delay),
+ TP_ARGS(wiphy, work, delay),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(void *, instance)
+ __field(void *, func)
+ __field(ktime_t, delay)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->instance = work;
+ __entry->func = work->func;
+ __entry->delay = delay;
+ ),
+ TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu",
+ WIPHY_PR_ARG, __entry->instance, __entry->func,
+ __entry->delay)
+);
+
TRACE_EVENT(wiphy_work_worker_start,
TP_PROTO(struct wiphy *wiphy),
TP_ARGS(wiphy),
--
2.34.1
In omap_usb2_probe(), of_parse_phandle() returns a device node with its
reference count incremented. The caller is responsible for releasing this
reference when the node is no longer needed.
Add of_node_put(control_node) after usage to fix the
reference leak.
Found via static analysis.
Fixes: 478b6c7436c2 ("usb: phy: omap-usb2: Don't use omap_get_control_dev()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/phy/ti/phy-omap-usb2.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c
index 1eb252604441..660df3181e4f 100644
--- a/drivers/phy/ti/phy-omap-usb2.c
+++ b/drivers/phy/ti/phy-omap-usb2.c
@@ -426,6 +426,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
}
control_pdev = of_find_device_by_node(control_node);
+ of_node_put(control_node);
if (!control_pdev) {
dev_err(&pdev->dev, "Failed to get control device\n");
return -EINVAL;
--
2.39.5 (Apple Git-154)
The driver_find_device_by_of_node() function calls driver_find_device
and returns a device with its reference count incremented.
Add the missing put_device() call to
release this reference after the device is used.
Found via static analysis.
Fixes: 0b7c6075022c ("soc: samsung: exynos-pmu: Add regmap support for SoCs that protect PMU regs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/soc/samsung/exynos-pmu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/soc/samsung/exynos-pmu.c b/drivers/soc/samsung/exynos-pmu.c
index 22c50ca2aa79..a53c1f882e1a 100644
--- a/drivers/soc/samsung/exynos-pmu.c
+++ b/drivers/soc/samsung/exynos-pmu.c
@@ -346,6 +346,7 @@ struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
if (!dev)
return ERR_PTR(-EPROBE_DEFER);
+ put_device(dev);
return syscon_node_to_regmap(pmu_np);
}
EXPORT_SYMBOL_GPL(exynos_get_pmu_regmap_by_phandle);
--
2.39.5 (Apple Git-154)
There are two reference count leaks in this driver:
1. In nforce2_fsb_read(): pci_get_subsys() increases the reference count
of the PCI device, but pci_dev_put() is never called to release it,
thus leaking the reference.
2. In nforce2_detect_chipset(): pci_get_subsys() gets a reference to the
nforce2_dev which is stored in a global variable, but the reference
is never released when the module is unloaded.
Fix both by:
- Adding pci_dev_put(nforce2_sub5) in nforce2_fsb_read() after reading
the configuration.
- Adding pci_dev_put(nforce2_dev) in nforce2_exit() to release the
global device reference.
Found via static analysis.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/cpufreq/cpufreq-nforce2.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index fedad1081973..fbbbe501cf2d 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -145,6 +145,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
fsb /= 1000000;
+ pci_dev_put(nforce2_sub5);
+
/* Check if PLL register is already set */
pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
@@ -426,6 +428,7 @@ static int __init nforce2_init(void)
static void __exit nforce2_exit(void)
{
cpufreq_unregister_driver(&nforce2_driver);
+ pci_dev_put(nforce2_dev);
}
module_init(nforce2_init);
--
2.39.5 (Apple Git-154)
F2FS can mount filesystems with corrupted directory depth values that
get runtime-clamped to MAX_DIR_HASH_DEPTH. When RENAME_WHITEOUT
operations are performed on such directories, f2fs_rename performs
directory modifications (updating target entry and deleting source
entry) before attempting to add the whiteout entry via f2fs_add_link.
If f2fs_add_link fails due to the corrupted directory structure, the
function returns an error to VFS, but the partial directory
modifications have already been committed to disk. VFS assumes the
entire rename operation failed and does not update the dentry cache,
leaving stale mappings.
In the error path, VFS does not call d_move() to update the dentry
cache. This results in new_dentry still pointing to the old inode
(new_inode) which has already had its i_nlink decremented to zero.
The stale cache causes subsequent operations to incorrectly reference
the freed inode.
This causes subsequent operations to use cached dentry information that
no longer matches the on-disk state. When a second rename targets the
same entry, VFS attempts to decrement i_nlink on the stale inode, which
may already have i_nlink=0, triggering a WARNING in drop_nlink().
Example sequence:
1. First rename (RENAME_WHITEOUT): file2 → file1
- f2fs updates file1 entry on disk (points to inode 8)
- f2fs deletes file2 entry on disk
- f2fs_add_link(whiteout) fails (corrupted directory)
- Returns error to VFS
- VFS does not call d_move() due to error
- VFS cache still has: file1 → inode 7 (stale!)
- inode 7 has i_nlink=0 (already decremented)
2. Second rename: file3 → file1
- VFS uses stale cache: file1 → inode 7
- Tries to drop_nlink on inode 7 (i_nlink already 0)
- WARNING in drop_nlink()
Fix this by explicitly invalidating old_dentry and new_dentry when
f2fs_add_link fails during whiteout creation. This forces VFS to
refresh from disk on subsequent operations, ensuring cache consistency
even when the rename partially succeeds.
Reproducer:
1. Mount F2FS image with corrupted i_current_depth
2. renameat2(file2, file1, RENAME_WHITEOUT)
3. renameat2(file3, file1, 0)
4. System triggers WARNING in drop_nlink()
Fixes: 7e01e7ad746b ("f2fs: support RENAME_WHITEOUT")
Reported-by: syzbot+632cf32276a9a564188d(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=632cf32276a9a564188d
Suggested-by: Chao Yu <chao(a)kernel.org>
Link: https://lore.kernel.org/all/20251022233349.102728-1-kartikey406@gmail.com/ [v1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Deepanshu Kartikey <kartikey406(a)gmail.com>
---
Changes in v2:
- Added detailed explanation about VFS not calling d_move() in error path,
resulting in new_dentry still pointing to inode with zeroed i_nlink
(suggested by Chao Yu)
- Added Fixes tag pointing to commit 7e01e7ad746b
- Added Cc: stable(a)vger.kernel.org for backporting to stable kernels
---
fs/f2fs/namei.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b882771e4699..712479b7b93d 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1053,9 +1053,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (whiteout) {
set_inode_flag(whiteout, FI_INC_LINK);
err = f2fs_add_link(old_dentry, whiteout);
- if (err)
+ if (err) {
+ d_invalidate(old_dentry);
+ d_invalidate(new_dentry);
goto put_out_dir;
-
+ }
spin_lock(&whiteout->i_lock);
whiteout->i_state &= ~I_LINKABLE;
spin_unlock(&whiteout->i_lock);
--
2.43.0
The qm_get_qos_value() function calls bus_find_device_by_name() which
increases the device reference count, but fails to call put_device()
to balance the reference count and lead to a device reference leak.
Add put_device() calls in both the error path and success path to
properly balance the reference count.
Found via static analysis.
Fixes: 22d7a6c39cab ("crypto: hisilicon/qm - add pci bdf number check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/crypto/hisilicon/qm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index a5b96adf2d1e..3b391a146635 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3871,10 +3871,12 @@ static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf,
pdev = container_of(dev, struct pci_dev, dev);
if (pci_physfn(pdev) != qm->pdev) {
pci_err(qm->pdev, "the pdev input does not match the pf!\n");
+ put_device(dev);
return -EINVAL;
}
*fun_index = pdev->devfn;
+ put_device(dev);
return 0;
}
--
2.39.5 (Apple Git-154)
dclk_vop2_src currently has CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT
flags set, which is vastly different than dclk_vop0_src or dclk_vop1_src,
which have none of those.
With these flags in dclk_vop2_src, actually setting the clock then results
in a lot of other peripherals breaking, because setting the rate results
in the PLL source getting changed:
[ 14.898718] clk_core_set_rate_nolock: setting rate for dclk_vop2 to 152840000
[ 15.155017] clk_change_rate: setting rate for pll_gpll to 1680000000
[ clk adjusting every gpll user ]
This includes possibly the other vops, i2s, spdif and even the uarts.
Among other possible things, this breaks the uart console on a board
I use. Sometimes it recovers later on, but there will be a big block
of garbled output for a while at least.
Shared PLLs should not be changed by individual users, so drop these
flags from dclk_vop2_src and make the flags the same as on dclk_vop0
and dclk_vop1.
Fixes: f1c506d152ff ("clk: rockchip: add clock controller for the RK3588")
Cc: stable(a)vger.kernel.org
Signed-off-by: Heiko Stuebner <heiko(a)sntech.de>
---
drivers/clk/rockchip/clk-rk3588.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index 1694223f4f84..cf83242d1726 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -2094,7 +2094,7 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
COMPOSITE(DCLK_VOP1_SRC, "dclk_vop1_src", gpll_cpll_v0pll_aupll_p, 0,
RK3588_CLKSEL_CON(111), 14, 2, MFLAGS, 9, 5, DFLAGS,
RK3588_CLKGATE_CON(52), 11, GFLAGS),
- COMPOSITE(DCLK_VOP2_SRC, "dclk_vop2_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+ COMPOSITE(DCLK_VOP2_SRC, "dclk_vop2_src", gpll_cpll_v0pll_aupll_p, 0,
RK3588_CLKSEL_CON(112), 5, 2, MFLAGS, 0, 5, DFLAGS,
RK3588_CLKGATE_CON(52), 12, GFLAGS),
COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p,
--
2.47.2
From: Sven Eckelmann <sven(a)narfation.org>
Trying to dump the originators or the neighbors via netlink for a meshif
with an inactive primary interface is not allowed. The dump functions were
checking this correctly but they didn't handle non-existing primary
interfaces and existing _inactive_ interfaces differently.
(Primary) batadv_hard_ifaces hold a references to a net_device. And
accessing them is only allowed when either being in a RCU/spinlock
protected section or when holding a valid reference to them. The netlink
dump functions use the latter.
But because the missing specific error handling for inactive primary
interfaces, the reference was never dropped. This reference counting error
was only detected when the interface should have been removed from the
system:
unregister_netdevice: waiting for batadv_slave_0 to become free. Usage count = 2
Cc: stable(a)vger.kernel.org
Fixes: 6ecc4fd6c2f4 ("batman-adv: netlink: reduce duplicate code by returning interfaces")
Reported-by: syzbot+881d65229ca4f9ae8c84(a)syzkaller.appspotmail.com
Reported-by: Tetsuo Handa <penguin-kernel(a)i-love.sakura.ne.jp>
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
---
net/batman-adv/originator.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index a464ff96b9291..ed89d7fd1e7f4 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -764,11 +764,16 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
bat_priv = netdev_priv(mesh_iface);
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ if (!primary_if) {
ret = -ENOENT;
goto out_put_mesh_iface;
}
+ if (primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out_put_primary_if;
+ }
+
hard_iface = batadv_netlink_get_hardif(bat_priv, cb);
if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) {
ret = PTR_ERR(hard_iface);
@@ -1333,11 +1338,16 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
bat_priv = netdev_priv(mesh_iface);
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ if (!primary_if) {
ret = -ENOENT;
goto out_put_mesh_iface;
}
+ if (primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out_put_primary_if;
+ }
+
hard_iface = batadv_netlink_get_hardif(bat_priv, cb);
if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) {
ret = PTR_ERR(hard_iface);
--
2.47.3
The current code directly overwrites the scratch pointer with the
return value of kvrealloc(). If kvrealloc() fails and returns NULL,
the original buffer becomes unreachable, causing a memory leak.
Fix this by using a temporary variable to store kvrealloc()'s return
value and only update the scratch pointer on success.
Found via static anlaysis and this is similar to commit 42378a9ca553
("bpf, verifier: Fix memory leak in array reallocation for stack state")
Fixes: be17c0df6795 ("riscv: module: Optimize PLT/GOT entry counting")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
arch/riscv/kernel/module-sections.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
index 75551ac6504c..1675cbad8619 100644
--- a/arch/riscv/kernel/module-sections.c
+++ b/arch/riscv/kernel/module-sections.c
@@ -119,6 +119,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned int num_plts = 0;
unsigned int num_gots = 0;
Elf_Rela *scratch = NULL;
+ Elf_Rela *new_scratch;
size_t scratch_size = 0;
int i;
@@ -168,9 +169,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch);
if (scratch_size_needed > scratch_size) {
scratch_size = scratch_size_needed;
- scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
- if (!scratch)
+ new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
+ if (!new_scratch) {
+ kvfree(scratch);
return -ENOMEM;
+ }
+ scratch = new_scratch;
}
for (size_t j = 0; j < num_relas; j++)
--
2.39.5 (Apple Git-154)
From: ZhangGuoDong <zhangguodong(a)kylinos.cn>
[ Upstream commit 6fced056d2cc8d01b326e6fcfabaacb9850b71a4 ]
Memory leak occurs when ksmbd_vfs_read() fails.
Fix this by adding the missing kvfree().
Co-developed-by: ChenXiaoSong <chenxiaosong(a)kylinos.cn>
Signed-off-by: ChenXiaoSong <chenxiaosong(a)kylinos.cn>
Signed-off-by: ZhangGuoDong <zhangguodong(a)kylinos.cn>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
## **Backport Status: YES**
### **Extensive Analysis**
Based on comprehensive semantic code analysis and repository
examination, this commit **should be backported** to stable kernel
trees.
---
## **1. Code Change Analysis**
The commit adds a single line to fix a memory leak in
`fs/smb/server/smb2pdu.c`:
```c
nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
if (nbytes < 0) {
+ kvfree(aux_payload_buf); // <-- Added line
err = nbytes;
goto out;
}
```
**What was wrong:** Memory allocated at line 6821 (`aux_payload_buf =
kvzalloc(ALIGN(length, 8), KSMBD_DEFAULT_GFP)`) was not freed when
`ksmbd_vfs_read()` fails, while all other error paths properly call
`kvfree()`.
---
## **2. Semantic Analysis Tools Used**
### **Tool 1: mcp__semcode__find_function**
- Located `smb2_read()` in `fs/smb/server/smb2pdu.c:6727-6895`
- Confirmed it's an SMB2 protocol handler (169 lines, 24 function calls)
- Return type: `int` (returns error codes)
### **Tool 2: mcp__semcode__find_callers**
- Result: No direct function callers
- However, cross-referenced with `smb2ops.c:183` showing `smb2_read` is
registered as a handler: `[SMB2_READ_HE] = { .proc = smb2_read }`
- **Conclusion:** This is a protocol handler invoked by the SMB2 message
dispatcher, meaning it's **directly user-triggerable** via network
requests
### **Tool 3: mcp__semcode__find_calls**
- Analyzed `ksmbd_vfs_read()` dependencies
- Found it can fail with multiple error codes: `-EISDIR`, `-EACCES`,
`-EAGAIN`, plus any errors from `kernel_read()`
- **All of these failure paths trigger the memory leak**
### **Tool 4: git blame & git log**
- Bug introduced: commit `e2f34481b24db2` (2021-03-16) - **4 years
old!**
- Recent modification: commit `06a025448b572c` (2024-11-30) changed
allocation to `ALIGN(length, 8)` but didn't fix the leak
- Found 15+ similar "memory leak" fixes in ksmbd history, indicating
active maintenance
---
## **3. Impact Scope Analysis**
### **User Exposure: CRITICAL**
- **Protocol Handler:** Any SMB client can trigger this by sending SMB2
READ requests
- **Network-facing:** ksmbd is a kernel SMB server exposed to network
clients
- **No authentication required to trigger:** The error path can be
reached even with permission errors
### **Trigger Conditions (from VFS analysis):**
1. **-EISDIR**: Client tries to read a directory
2. **-EACCES**: Permission denied (no FILE_READ_DATA or FILE_EXECUTE
access)
3. **-EAGAIN**: File is locked by another process
4. **kernel_read() failures**: Various VFS/filesystem errors
All of these are **easily triggerable** by malicious or misbehaving
clients.
### **Memory Leak Severity: HIGH**
- **Allocation size:** `ALIGN(length, 8)` where `length` is client-
controlled
- **Maximum per leak:** Up to `SMB3_MAX_IOSIZE` = **8 MB** (from
smb2pdu.h:28)
- **Default size:** `SMB21_DEFAULT_IOSIZE` = **1 MB** (from
smb2pdu.h:25)
- **Attack scenario:** An attacker could repeatedly:
1. Send READ requests for locked files (triggers -EAGAIN)
2. Each failed request leaks up to 8MB
3. 100 requests = 800MB leaked
4. Can exhaust server memory leading to **DoS**
---
## **4. Regression Risk Analysis**
### **Risk Level: VERY LOW**
- **Change size:** Single line addition
- **Operation:** Adding missing cleanup (defensive programming)
- **No behavior change:** Only affects error path that already returns
failure
- **Idempotent:** `kvfree()` is safe to call and simply frees allocated
memory
- **No dependencies:** No API changes or external impact
---
## **5. Stable Tree Compliance**
| Criterion | Status | Evidence |
|-----------|--------|----------|
| Fixes important bug | ✅ YES | Memory leak leading to potential DoS |
| Small and contained | ✅ YES | 1 line change, single function |
| No new features | ✅ YES | Pure bug fix |
| No architectural changes | ✅ YES | No structural modifications |
| Low regression risk | ✅ YES | Minimal, defensive change |
| Confined to subsystem | ✅ YES | Only affects ksmbd |
| User-facing impact | ✅ YES | Affects all ksmbd users |
---
## **6. Comparison with Similar Commits**
Recent ksmbd fixes in the repository show similar patterns:
- `379510a815cb2`: "fix possible refcount leak in smb2_sess_setup()"
- `5929e98f3bb76`: "fix potential double free on smb2_read_pipe() error
path"
- `e523a26c05672`: "fix passing freed memory 'aux_payload_buf'"
- `809d50d36e71b`: "fix memory leak in smb2_lock()"
**All these are typical stable tree backport candidates** with similar
characteristics (small resource leak fixes).
---
## **7. Missing Stable Tags**
**Notable:** The commit lacks explicit stable tree tags:
- ❌ No `Cc: stable(a)vger.kernel.org`
- ❌ No `Fixes:` tag pointing to the original buggy commit
However, this doesn't diminish backport worthiness - the technical
merits clearly justify backporting.
---
## **Conclusion**
This commit is an **excellent candidate for backporting** because:
1. **Security concern:** Remote memory exhaustion DoS vector
2. **Long-standing bug:** Present since ksmbd's introduction (4+ years)
3. **Wide impact:** Affects all ksmbd deployments
4. **Minimal risk:** Single-line defensive fix
5. **Clear bug fix:** Unambiguous resource leak on error path
6. **Pattern consistency:** Matches other successfully backported ksmbd
fixes
**Recommendation:** Backport to all active stable trees where ksmbd
exists (5.15+).
fs/smb/server/smb2pdu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 287200d7c0764..409b85af82e1c 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -6826,6 +6826,7 @@ int smb2_read(struct ksmbd_work *work)
nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
if (nbytes < 0) {
+ kvfree(aux_payload_buf);
err = nbytes;
goto out;
}
--
2.51.0
From: Dave Vasilevsky <dave(a)vasilevsky.ca>
On 32-bit book3s with hash-MMUs, tlb_flush() was a no-op. This was
unnoticed because all uses until recently were for unmaps, and thus
handled by __tlb_remove_tlb_entry().
After commit 4a18419f71cd ("mm/mprotect: use mmu_gather") in kernel 5.19,
tlb_gather_mmu() started being used for mprotect as well. This caused
mprotect to simply not work on these machines:
int *ptr = mmap(NULL, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
*ptr = 1; // force HPTE to be created
mprotect(ptr, 4096, PROT_READ);
*ptr = 2; // should segfault, but succeeds
Fixed by making tlb_flush() actually flush TLB pages. This finally
agrees with the behaviour of boot3s64's tlb_flush().
Fixes: 4a18419f71cd ("mm/mprotect: use mmu_gather")
Signed-off-by: Dave Vasilevsky <dave(a)vasilevsky.ca>
---
arch/powerpc/include/asm/book3s/32/tlbflush.h | 8 ++++++--
arch/powerpc/mm/book3s32/tlb.c | 6 ++++++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index e43534da5207aa3b0cb3c07b78e29b833c141f3f..b8c587ad2ea954f179246a57d6e86e45e91dcfdc 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -11,6 +11,7 @@
void hash__flush_tlb_mm(struct mm_struct *mm);
void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+void hash__flush_gather(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
void _tlbie(unsigned long address);
@@ -28,9 +29,12 @@ void _tlbia(void);
*/
static inline void tlb_flush(struct mmu_gather *tlb)
{
- /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
- if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ hash__flush_gather(tlb);
+ } else {
+ /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
_tlbia();
+ }
}
static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
index 9ad6b56bfec96e989b96f027d075ad5812500854..3da95ecfbbb296303082e378425e92a5fbdbfac8 100644
--- a/arch/powerpc/mm/book3s32/tlb.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -105,3 +105,9 @@ void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
}
EXPORT_SYMBOL(hash__flush_tlb_page);
+
+void hash__flush_gather(struct mmu_gather *tlb)
+{
+ hash__flush_range(tlb->mm, tlb->start, tlb->end);
+}
+EXPORT_SYMBOL(hash__flush_gather);
---
base-commit: dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa
change-id: 20251027-vasi-mprotect-g3-f8f5278d4140
Best regards,
--
Dave Vasilevsky <dave(a)vasilevsky.ca>