The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 87c4e1459e80bf65066f864c762ef4dc932fad4b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072854-unpledged-repaint-0eac@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 87c4e1459e80bf65066f864c762ef4dc932fad4b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Fri, 20 Jun 2025 19:08:09 +0100
Subject: [PATCH] ARM: 9448/1: Use an absolute path to unified.h in
KBUILD_AFLAGS
After commit d5c8d6e0fa61 ("kbuild: Update assembler calls to use proper
flags and language target"), which updated as-instr to use the
'assembler-with-cpp' language option, the Kbuild version of as-instr
always fails internally for arch/arm with
<command-line>: fatal error: asm/unified.h: No such file or directory
compilation terminated.
because '-include' flags are now taken into account by the compiler
driver and as-instr does not have '$(LINUXINCLUDE)', so unified.h is not
found.
This went unnoticed at the time of the Kbuild change because the last
use of as-instr in Kbuild that arch/arm could reach was removed in 5.7
by commit 541ad0150ca4 ("arm: Remove 32bit KVM host support") but a
stable backport of the Kbuild change to before that point exposed this
potential issue if one were to be reintroduced.
Follow the general pattern of '-include' paths throughout the tree and
make unified.h absolute using '$(srctree)' to ensure KBUILD_AFLAGS can
be used independently.
Closes: https://lore.kernel.org/CACo-S-1qbCX4WAVFA63dWfHtrRHZBTyyr2js8Lx=Az03XHTTHg…
Cc: stable(a)vger.kernel.org
Fixes: d5c8d6e0fa61 ("kbuild: Update assembler calls to use proper flags and language target")
Reported-by: KernelCI bot <bot(a)kernelci.org>
Reviewed-by: Masahiro Yamada <masahiroy(a)kernel.org>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4808d3ed98e4..e31e95ffd33f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -149,7 +149,7 @@ endif
# Need -Uarm for gcc < 3.x
KBUILD_CPPFLAGS +=$(cpp-y)
KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
-KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float
+KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include $(srctree)/arch/arm/include/asm/unified.h -msoft-float
KBUILD_RUSTFLAGS += --target=arm-unknown-linux-gnueabi
CHECKFLAGS += -D__arm__
[ Upstream commit 46d8c744136ce2454aa4c35c138cc06817f92b8e ]
Some Comedi subdevice instruction handlers are known to access
instruction data elements beyond the first `insn->n` elements in some
cases. The `do_insn_ioctl()` and `do_insnlist_ioctl()` functions
allocate at least `MIN_SAMPLES` (16) data elements to deal with this,
but they do not initialize all of that. For Comedi instruction codes
that write to the subdevice, the first `insn->n` data elements are
copied from user-space, but the remaining elements are left
uninitialized. That could be a problem if the subdevice instruction
handler reads the uninitialized data. Ensure that the first
`MIN_SAMPLES` elements are initialized before calling these instruction
handlers, filling the uncopied elements with 0. For
`do_insnlist_ioctl()`, the same data buffer elements are used for
handling a list of instructions, so ensure the first `MIN_SAMPLES`
elements are initialized for each instruction that writes to the
subdevice.
Fixes: ed9eccbe8970 ("Staging: add comedi core")
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250707161439.88385-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[ Reworked for before commit bac42fb21259 ("comedi: get rid of compat_alloc_user_space() mess in COMEDI_CMD{,TEST} compat") ]
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
drivers/staging/comedi/comedi_fops.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 413863bc929b..e4dfb5e7843d 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1544,7 +1544,7 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
if (copy_from_user(&insnlist, arg, sizeof(insnlist)))
return -EFAULT;
- ret = check_insnlist_len(dev, insnlist32.n_insns);
+ ret = check_insnlist_len(dev, insnlist.n_insns);
if (ret)
return ret;
insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL);
@@ -1580,21 +1580,27 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
}
for (i = 0; i < insnlist.n_insns; ++i) {
+ unsigned int n = insns[i].n;
+
if (insns[i].insn & INSN_MASK_WRITE) {
if (copy_from_user(data, insns[i].data,
- insns[i].n * sizeof(unsigned int))) {
+ n * sizeof(unsigned int))) {
dev_dbg(dev->class_dev,
"copy_from_user failed\n");
ret = -EFAULT;
goto error;
}
+ if (n < MIN_SAMPLES) {
+ memset(&data[n], 0, (MIN_SAMPLES - n) *
+ sizeof(unsigned int));
+ }
}
ret = parse_insn(dev, insns + i, data, file);
if (ret < 0)
goto error;
if (insns[i].insn & INSN_MASK_READ) {
if (copy_to_user(insns[i].data, data,
- insns[i].n * sizeof(unsigned int))) {
+ n * sizeof(unsigned int))) {
dev_dbg(dev->class_dev,
"copy_to_user failed\n");
ret = -EFAULT;
@@ -1661,6 +1667,10 @@ static int do_insn_ioctl(struct comedi_device *dev,
ret = -EFAULT;
goto error;
}
+ if (insn.n < MIN_SAMPLES) {
+ memset(&data[insn.n], 0,
+ (MIN_SAMPLES - insn.n) * sizeof(unsigned int));
+ }
}
ret = parse_insn(dev, &insn, data, file);
if (ret < 0)
--
2.47.2
Greg recently reported 3 patches that could not be applied without
conflicts in v6.6:
- f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic")
- def5b7b2643e ("mptcp: plug races between subflow fail and subflow creation")
- da9b2fc7b73d ("mptcp: reset fallback status gracefully at disconnect() time")
Conflicts, if any, have been resolved, and documented in each patch.
Paolo Abeni (3):
mptcp: make fallback action and fallback decision atomic
mptcp: plug races between subflow fail and subflow creation
mptcp: reset fallback status gracefully at disconnect() time
net/mptcp/options.c | 3 ++-
net/mptcp/pm.c | 8 +++++-
net/mptcp/protocol.c | 58 +++++++++++++++++++++++++++++++++++++-------
net/mptcp/protocol.h | 27 ++++++++++++++++-----
net/mptcp/subflow.c | 30 ++++++++++++++---------
5 files changed, 98 insertions(+), 28 deletions(-)
--
2.50.0
From: Yang Yingliang <yangyingliang(a)huawei.com>
[ Upstream commit 4225fea1cb28370086e17e82c0f69bec2779dca0 ]
I got memory leak as follows when doing fault injection test:
unreferenced object 0xffff88800906c618 (size 8):
comm "i2c-idt82p33931", pid 4421, jiffies 4294948083 (age 13.188s)
hex dump (first 8 bytes):
70 74 70 30 00 00 00 00 ptp0....
backtrace:
[<00000000312ed458>] __kmalloc_track_caller+0x19f/0x3a0
[<0000000079f6e2ff>] kvasprintf+0xb5/0x150
[<0000000026aae54f>] kvasprintf_const+0x60/0x190
[<00000000f323a5f7>] kobject_set_name_vargs+0x56/0x150
[<000000004e35abdd>] dev_set_name+0xc0/0x100
[<00000000f20cfe25>] ptp_clock_register+0x9f4/0xd30 [ptp]
[<000000008bb9f0de>] idt82p33_probe.cold+0x8b6/0x1561 [ptp_idt82p33]
When posix_clock_register() returns an error, the name allocated
in dev_set_name() will be leaked, the put_device() should be used
to give up the device reference, then the name will be freed in
kobject_cleanup() and other memory will be freed in ptp_clock_release().
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Fixes: a33121e5487b ("ptp: fix the race between the release of ptp_clock and cdev")
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
[Shivani: Modified to apply on 5.10.y, Removed
kfree(ptp->vclock_index) in the ptach, since vclock_index is
introduced in later versions]
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
---
drivers/ptp/ptp_clock.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index c895e26b1f17..869023f0987e 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -283,15 +283,20 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
/* Create a posix clock and link it to the device. */
err = posix_clock_register(&ptp->clock, &ptp->dev);
if (err) {
+ if (ptp->pps_source)
+ pps_unregister_source(ptp->pps_source);
+
+ if (ptp->kworker)
+ kthread_destroy_worker(ptp->kworker);
+
+ put_device(&ptp->dev);
+
pr_err("failed to create posix clock\n");
- goto no_clock;
+ return ERR_PTR(err);
}
return ptp;
-no_clock:
- if (ptp->pps_source)
- pps_unregister_source(ptp->pps_source);
no_pps:
ptp_cleanup_pin_groups(ptp);
no_pin_groups:
--
2.40.4
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 87c4e1459e80bf65066f864c762ef4dc932fad4b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072848-unwitting-glandular-25db@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 87c4e1459e80bf65066f864c762ef4dc932fad4b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Fri, 20 Jun 2025 19:08:09 +0100
Subject: [PATCH] ARM: 9448/1: Use an absolute path to unified.h in
KBUILD_AFLAGS
After commit d5c8d6e0fa61 ("kbuild: Update assembler calls to use proper
flags and language target"), which updated as-instr to use the
'assembler-with-cpp' language option, the Kbuild version of as-instr
always fails internally for arch/arm with
<command-line>: fatal error: asm/unified.h: No such file or directory
compilation terminated.
because '-include' flags are now taken into account by the compiler
driver and as-instr does not have '$(LINUXINCLUDE)', so unified.h is not
found.
This went unnoticed at the time of the Kbuild change because the last
use of as-instr in Kbuild that arch/arm could reach was removed in 5.7
by commit 541ad0150ca4 ("arm: Remove 32bit KVM host support") but a
stable backport of the Kbuild change to before that point exposed this
potential issue if one were to be reintroduced.
Follow the general pattern of '-include' paths throughout the tree and
make unified.h absolute using '$(srctree)' to ensure KBUILD_AFLAGS can
be used independently.
Closes: https://lore.kernel.org/CACo-S-1qbCX4WAVFA63dWfHtrRHZBTyyr2js8Lx=Az03XHTTHg…
Cc: stable(a)vger.kernel.org
Fixes: d5c8d6e0fa61 ("kbuild: Update assembler calls to use proper flags and language target")
Reported-by: KernelCI bot <bot(a)kernelci.org>
Reviewed-by: Masahiro Yamada <masahiroy(a)kernel.org>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4808d3ed98e4..e31e95ffd33f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -149,7 +149,7 @@ endif
# Need -Uarm for gcc < 3.x
KBUILD_CPPFLAGS +=$(cpp-y)
KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
-KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float
+KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include $(srctree)/arch/arm/include/asm/unified.h -msoft-float
KBUILD_RUSTFLAGS += --target=arm-unknown-linux-gnueabi
CHECKFLAGS += -D__arm__
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6d496e9569983a0d7a05be6661126d0702cf94f7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072830-reveler-drop-down-d571@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6d496e9569983a0d7a05be6661126d0702cf94f7 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Tue, 15 Jul 2025 17:58:16 +0200
Subject: [PATCH] Revert "drm/gem-shmem: Use dma_buf from GEM object instance"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 1a148af06000e545e714fe3210af3d77ff903c11.
The dma_buf field in struct drm_gem_object is not stable over the
object instance's lifetime. The field becomes NULL when user space
releases the final GEM handle on the buffer object. This resulted
in a NULL-pointer deref.
Workarounds in commit 5307dce878d4 ("drm/gem: Acquire references on
GEM handles for framebuffers") and commit f6bfc9afc751 ("drm/framebuffer:
Acquire internal references on GEM handles") only solved the problem
partially. They especially don't work for buffer objects without a DRM
framebuffer associated.
Hence, this revert to going back to using .import_attach->dmabuf.
v3:
- cc stable
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reviewed-by: Simona Vetter <simona.vetter(a)ffwll.ch>
Acked-by: Christian König <christian.koenig(a)amd.com>
Acked-by: Zack Rusin <zack.rusin(a)broadcom.com>
Cc: <stable(a)vger.kernel.org> # v6.15+
Link: https://lore.kernel.org/r/20250715155934.150656-7-tzimmermann@suse.de
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index aa43265f4f4f..a5dbee6974ab 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -349,7 +349,7 @@ int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem,
int ret = 0;
if (drm_gem_is_imported(obj)) {
- ret = dma_buf_vmap(obj->dma_buf, map);
+ ret = dma_buf_vmap(obj->import_attach->dmabuf, map);
} else {
pgprot_t prot = PAGE_KERNEL;
@@ -409,7 +409,7 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
struct drm_gem_object *obj = &shmem->base;
if (drm_gem_is_imported(obj)) {
- dma_buf_vunmap(obj->dma_buf, map);
+ dma_buf_vunmap(obj->import_attach->dmabuf, map);
} else {
dma_resv_assert_held(shmem->base.resv);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4ff12d82dac119b4b99b5a78b5af3bf2474c0a36
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072801-earflap-sleet-7b13@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ff12d82dac119b4b99b5a78b5af3bf2474c0a36 Mon Sep 17 00:00:00 2001
From: Haoxiang Li <haoxiang_li2024(a)163.com>
Date: Thu, 3 Jul 2025 17:52:32 +0800
Subject: [PATCH] ice: Fix a null pointer dereference in
ice_copy_and_init_pkg()
Add check for the return value of devm_kmemdup()
to prevent potential null pointer dereference.
Fixes: c76488109616 ("ice: Implement Dynamic Device Personalization (DDP) download")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
Reviewed-by: Michal Swiatkowski <michal.swiatkowski(a)linux.intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rinitha S <sx.rinitha(a)intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
index 59323c019544..351824dc3c62 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -2301,6 +2301,8 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf,
return ICE_DDP_PKG_ERR;
buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL);
+ if (!buf_copy)
+ return ICE_DDP_PKG_ERR;
state = ice_init_pkg(hw, buf_copy, len);
if (!ice_is_init_pkg_successful(state)) {
From: Victor Shih <victor.shih(a)genesyslogic.com.tw>
Due to a flaw in the hardware design, the GL9763e replay timer frequently
times out when ASPM is enabled. As a result, the warning messages will
often appear in the system log when the system accesses the GL9763e
PCI config. Therefore, the replay timer timeout must be masked.
Also rename the gli_set_gl9763e() to gl9763e_hw_setting() for consistency.
Signed-off-by: Victor Shih <victor.shih(a)genesyslogic.com.tw>
Cc: stable(a)vger.kernel.org
---
drivers/mmc/host/sdhci-pci-gli.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index 98ee3191b02f..7165dde9b6b8 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -1753,7 +1753,7 @@ static int gl9763e_add_host(struct sdhci_pci_slot *slot)
return ret;
}
-static void gli_set_gl9763e(struct sdhci_pci_slot *slot)
+static void gl9763e_hw_setting(struct sdhci_pci_slot *slot)
{
struct pci_dev *pdev = slot->chip->pdev;
u32 value;
@@ -1782,6 +1782,9 @@ static void gli_set_gl9763e(struct sdhci_pci_slot *slot)
value |= FIELD_PREP(GLI_9763E_HS400_RXDLY, GLI_9763E_HS400_RXDLY_5);
pci_write_config_dword(pdev, PCIE_GLI_9763E_CLKRXDLY, value);
+ /* mask the replay timer timeout of AER */
+ sdhci_gli_mask_replay_timer_timeout(pdev);
+
pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
value &= ~GLI_9763E_VHS_REV;
value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R);
@@ -1925,7 +1928,7 @@ static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot)
gli_pcie_enable_msi(slot);
host->mmc_host_ops.hs400_enhanced_strobe =
gl9763e_hs400_enhanced_strobe;
- gli_set_gl9763e(slot);
+ gl9763e_hw_setting(slot);
sdhci_enable_v4_mode(host);
return 0;
--
2.43.0
From: Mingcong Bai <jeffbai(a)aosc.io>
As this component hooks into userspace API, it should be assumed that it
will play well with non-4KiB/64KiB pages.
Use `PAGE_SIZE' as the final reference for page alignment instead.
Cc: stable(a)vger.kernel.org
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Fixes: 801989b08aff ("drm/xe/uapi: Make constant comments visible in kernel doc")
Tested-by: Mingcong Bai <jeffbai(a)aosc.io>
Tested-by: Wenbin Fang <fangwenbin(a)vip.qq.com>
Tested-by: Haien Liang <27873200(a)qq.com>
Tested-by: Jianfeng Liu <liujianfeng1994(a)gmail.com>
Tested-by: Shirong Liu <lsr1024(a)qq.com>
Tested-by: Haofeng Wu <s2600cw2(a)126.com>
Link: https://github.com/FanFansfan/loongson-linux/commit/22c55ab3931c32410a077b3…
Link: https://t.me/c/1109254909/768552
Co-developed-by: Shang Yatsen <429839446(a)qq.com>
Signed-off-by: Shang Yatsen <429839446(a)qq.com>
Signed-off-by: Mingcong Bai <jeffbai(a)aosc.io>
---
drivers/gpu/drm/xe/xe_query.c | 2 +-
include/uapi/drm/xe_drm.h | 7 +++++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 44d44bbc71dc..f695d5d0610d 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -347,7 +347,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY;
config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
- xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+ xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : PAGE_SIZE;
config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
xe_exec_queue_device_get_max_priority(xe);
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index e2426413488f..5ba76b9369ba 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -397,8 +397,11 @@ struct drm_xe_query_mem_regions {
* has low latency hint support
* - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the
* device has CPU address mirroring support
- * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
- * required by this device, typically SZ_4K or SZ_64K
+ * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment required
+ * by this device and the CPU. The minimum page size for the device is
+ * usually SZ_4K or SZ_64K, while for the CPU, it is PAGE_SIZE. This value
+ * is calculated by max(min_gpu_page_size, PAGE_SIZE). This alignment is
+ * enforced on buffer object allocations and VM binds.
* - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
* - %DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY - Value of the highest
* available exec queue priority
--
2.47.2
Hi
The IPQ50xx chip integrates only a 2x2 2.4GHz Wi-Fi module. Through the high-performance NSS core and PCIe expansion and integration
the application range of the IPQ50xx chip ranges from Wi-Fi mesh node to Enterprise AP. OEM manufacturers can think in a unified way when
doing circuit design and PCB layout, and the same packaging can really save designers a lot of trouble.
.# Part Number Manufacturer Date Code Quantity Unit Price Lead Time Condition (PCS) USD/Each one 1 IPQ-5018-0-MRQFN232-TR-01-0 QUALCOMM 2022+ 30000pcs US$3.50/pcs 7days New & original - stock 2 QCN-6102-0-DRQFN116-TR-01-1 QUALCOMM 2022+ 30000pcs US$2.50/pcs 3 QCN-9024-0-MSP234-TR-01-0 QUALCOMM 2022+ 5000pcs US$3.50/pcs 4 QCN-6112-0-DRQFN116-TR-01-0 QUALCOMM 2023+ 15000pcs US$2.70/pcs 5 QCA-8337-AL3C-R QUALCOMM 2023+ 20000pcs US$1.30/pcs
The above are our company's current inventory, all of which are genuine and original packaging.
If you need anything, please feel free to contact me, thank you
Best Regards
Maintain your product-savvy edge with . Stay Updated on News
If you prefer to exit, choose Review Communication Options.
Memory hotunplug is done under the hotplug lock and ptdump walk is done
under the init_mm.mmap_lock. Therefore, ptdump and hotunplug can run
simultaneously without any synchronization. During hotunplug,
free_empty_tables() is ultimately called to free up the pagetables.
The following race can happen, where x denotes the level of the pagetable:
CPU1 CPU2
free_empty_pxd_table
ptdump_walk_pgd()
Get p(x+1)d table from pxd entry
pxd_clear
free_hotplug_pgtable_page(p(x+1)dp)
Still using the p(x+1)d table
which leads to a user-after-free.
To solve this, we need to synchronize ptdump_walk_pgd() with
free_hotplug_pgtable_page() in such a way that ptdump never takes a
reference on a freed pagetable.
Since this race is very unlikely to happen in practice, we do not want to
penalize other code paths taking the init_mm mmap_lock. Therefore, we use
static keys. ptdump will enable the static key - upon observing that,
the free_empty_pxd_table() functions will get patched in with an
mmap_read_lock/unlock sequence. A code comment explains in detail, how
a combination of acquire semantics of static_branch_enable() and the
barriers in __flush_tlb_kernel_pgtable() ensures that ptdump will never
get a hold on the address of a freed pagetable - either ptdump will block
the table freeing path due to write locking the mmap_lock, or, the nullity
of the pxd entry will be observed by ptdump, therefore having no access to
the isolated p(x+1)d pagetable.
This bug was found by code inspection, as a result of working on [1].
1. https://lore.kernel.org/all/20250723161827.15802-1-dev.jain@arm.com/
Cc: <stable(a)vger.kernel.org>
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Signed-off-by: Dev Jain <dev.jain(a)arm.com>
---
Rebased on Linux 6.16.
arch/arm64/include/asm/ptdump.h | 2 ++
arch/arm64/mm/mmu.c | 61 +++++++++++++++++++++++++++++++++
arch/arm64/mm/ptdump.c | 11 ++++--
3 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index fded5358641f..4760168cbd6e 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -7,6 +7,8 @@
#include <linux/ptdump.h>
+DECLARE_STATIC_KEY_FALSE(arm64_ptdump_key);
+
#ifdef CONFIG_PTDUMP
#include <linux/mm_types.h>
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 00ab1d648db6..d2feef270880 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -46,6 +46,8 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
+DEFINE_STATIC_KEY_FALSE(arm64_ptdump_key);
+
enum pgtable_type {
TABLE_PTE,
TABLE_PMD,
@@ -1002,6 +1004,61 @@ static void unmap_hotplug_range(unsigned long addr, unsigned long end,
} while (addr = next, addr < end);
}
+/*
+ * Our objective is to prevent ptdump from reading a pagetable which has
+ * been freed. Assume that ptdump_walk_pgd() (call this thread T1)
+ * executes completely on CPU1 and free_hotplug_pgtable_page() (call this
+ * thread T2) executes completely on CPU2. Let the region sandwiched by the
+ * mmap_write_lock/unlock in T1 be called CS (the critical section).
+ *
+ * Claim: The CS of T1 will never operate on a freed pagetable.
+ *
+ * Proof:
+ *
+ * Case 1: The static branch is visible to T2.
+ *
+ * Case 1 (a): T1 acquires the lock before T2 can.
+ * T2 will block until T1 drops the lock, so free_hotplug_pgtable_page() will
+ * only be executed after T1 exits CS.
+ *
+ * Case 1 (b): T2 acquires the lock before T1 can.
+ * The acquire semantics of mmap_read_lock() ensure that an empty pagetable
+ * entry (via pxd_clear()) is visible to T1 before T1 can enter CS, therefore
+ * it is impossible for the CS to get hold of the isolated level + 1 pagetable.
+ *
+ * Case 2: The static branch is not visible to T2.
+ *
+ * Since static_branch_enable() and mmap_write_lock() (via smp_mb()) have
+ * acquire semantics, it implies that the static branch will be visible to
+ * all CPUs before T1 can enter CS. The static branch not being visible to
+ * T2 therefore implies that T1 has not yet entered CS .... (i)
+ *
+ * The sequence of barriers via __flush_tlb_kernel_pgtable() in T2
+ * implies that if the invisibility of the static branch has been
+ * observed by T2 (i.e static_branch_unlikely() is observed as false),
+ * then all CPUs will have observed an empty pagetable entry via
+ * pxd_clear() ... (ii)
+ *
+ * Combining (i) and (ii), we conclude that T1 observes an empty pagetable
+ * entry before entering CS => it is impossible for the CS to get hold of
+ * the isolated level + 1 pagetable. Q.E.D
+ *
+ * We have proven that the claim is true on the assumption that
+ * there is no context switch for T1 and T2. Note that the reasoning
+ * of the proof uses barriers operating on the inner shareable domain,
+ * which means that they will affect all CPUs, and also a context switch
+ * will insert extra barriers into the code paths => the claim will
+ * stand true even if we drop the assumption.
+ */
+static void synchronize_with_ptdump(void)
+{
+ if (!static_branch_unlikely(&arm64_ptdump_key))
+ return;
+
+ mmap_read_lock(&init_mm);
+ mmap_read_unlock(&init_mm);
+}
+
static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned long floor,
unsigned long ceiling)
@@ -1036,6 +1093,7 @@ static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
pmd_clear(pmdp);
__flush_tlb_kernel_pgtable(start);
+ synchronize_with_ptdump();
free_hotplug_pgtable_page(virt_to_page(ptep));
}
@@ -1076,6 +1134,7 @@ static void free_empty_pmd_table(pud_t *pudp, unsigned long addr,
pud_clear(pudp);
__flush_tlb_kernel_pgtable(start);
+ synchronize_with_ptdump();
free_hotplug_pgtable_page(virt_to_page(pmdp));
}
@@ -1116,6 +1175,7 @@ static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
p4d_clear(p4dp);
__flush_tlb_kernel_pgtable(start);
+ synchronize_with_ptdump();
free_hotplug_pgtable_page(virt_to_page(pudp));
}
@@ -1156,6 +1216,7 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
pgd_clear(pgdp);
__flush_tlb_kernel_pgtable(start);
+ synchronize_with_ptdump();
free_hotplug_pgtable_page(virt_to_page(p4dp));
}
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 421a5de806c6..d543c9f8ffa8 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -283,6 +283,13 @@ void note_page_flush(struct ptdump_state *pt_st)
note_page(pt_st, 0, -1, pte_val(pte_zero));
}
+static void arm64_ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm)
+{
+ static_branch_enable(&arm64_ptdump_key);
+ ptdump_walk_pgd(st, mm, NULL);
+ static_branch_disable(&arm64_ptdump_key);
+}
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
@@ -311,7 +318,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
}
};
- ptdump_walk_pgd(&st.ptdump, info->mm, NULL);
+ arm64_ptdump_walk_pgd(&st.ptdump, info->mm);
}
static void __init ptdump_initialize(void)
@@ -353,7 +360,7 @@ bool ptdump_check_wx(void)
}
};
- ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ arm64_ptdump_walk_pgd(&st.ptdump, &init_mm);
if (st.wx_pages || st.uxn_pages) {
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
--
2.30.2
Ever since commit c2ff29e99a76 ("siw: Inline do_tcp_sendpages()"),
we have been doing this:
static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
size_t size)
[...]
/* Calculate the number of bytes we need to push, for this page
* specifically */
size_t bytes = min_t(size_t, PAGE_SIZE - offset, size);
/* If we can't splice it, then copy it in, as normal */
if (!sendpage_ok(page[i]))
msg.msg_flags &= ~MSG_SPLICE_PAGES;
/* Set the bvec pointing to the page, with len $bytes */
bvec_set_page(&bvec, page[i], bytes, offset);
/* Set the iter to $size, aka the size of the whole sendpages (!!!) */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
try_page_again:
lock_sock(sk);
/* Sendmsg with $size size (!!!) */
rv = tcp_sendmsg_locked(sk, &msg, size);
This means we've been sending oversized iov_iters and tcp_sendmsg calls
for a while. This has a been a benign bug because sendpage_ok() always
returned true. With the recent slab allocator changes being slowly
introduced into next (that disallow sendpage on large kmalloc
allocations), we have recently hit out-of-bounds crashes, due to slight
differences in iov_iter behavior between the MSG_SPLICE_PAGES and
"regular" copy paths:
(MSG_SPLICE_PAGES)
skb_splice_from_iter
iov_iter_extract_pages
iov_iter_extract_bvec_pages
uses i->nr_segs to correctly stop in its tracks before OoB'ing everywhere
skb_splice_from_iter gets a "short" read
(!MSG_SPLICE_PAGES)
skb_copy_to_page_nocache copy=iov_iter_count
[...]
copy_from_iter
/* this doesn't help */
if (unlikely(iter->count < len))
len = iter->count;
iterate_bvec
... and we run off the bvecs
Fix this by properly setting the iov_iter's byte count, plus sending the
correct byte count to tcp_sendmsg_locked.
Cc: stable(a)vger.kernel.org
Fixes: c2ff29e99a76 ("siw: Inline do_tcp_sendpages()")
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202507220801.50a7210-lkp@intel.com
Signed-off-by: Pedro Falcato <pfalcato(a)suse.de>
---
drivers/infiniband/sw/siw/siw_qp_tx.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 3a08f57d2211..9576a2b766c4 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -340,11 +340,11 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
if (!sendpage_ok(page[i]))
msg.msg_flags &= ~MSG_SPLICE_PAGES;
bvec_set_page(&bvec, page[i], bytes, offset);
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, bytes);
try_page_again:
lock_sock(sk);
- rv = tcp_sendmsg_locked(sk, &msg, size);
+ rv = tcp_sendmsg_locked(sk, &msg, bytes);
release_sock(sk);
if (rv > 0) {
--
2.50.1
From: Edip Hazuri <edip(a)medip.dev>
The mute led on this laptop is using ALC245 but requires a quirk to work
This patch enables the existing quirk for the device.
Tested on Victus 16-r1xxx Laptop. The LED behaviour works
as intended.
v2:
- adapt the HD-audio code changes and rebase on for-next branch of tiwai/sound.git
- link to v1: https://lore.kernel.org/linux-sound/20250724210756.61453-2-edip@medip.dev/
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Edip Hazuri <edip(a)medip.dev>
---
sound/hda/codecs/realtek/alc269.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c
index 05019fa73..33ef08d25 100644
--- a/sound/hda/codecs/realtek/alc269.c
+++ b/sound/hda/codecs/realtek/alc269.c
@@ -6580,6 +6580,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c99, "HP Victus 16-r1xxx (MB 8C99)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
--
2.50.1
Since commit 3d1f08b032dc ("mtd: spinand: Use the external ECC engine
logic") the spinand_write_page() function ignores the errors returned
by spinand_wait(). Change the code to propagate those up to the stack
as it was done before the offending change.
Cc: stable(a)vger.kernel.org
Fixes: 3d1f08b032dc ("mtd: spinand: Use the external ECC engine logic")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
---
drivers/mtd/nand/spi/core.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 7099db7a62be61f563380b724ac849057a834211..8cce63aef1b5ad7cda2f6ab28d29565aa979498f 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -688,7 +688,10 @@ int spinand_write_page(struct spinand_device *spinand,
SPINAND_WRITE_INITIAL_DELAY_US,
SPINAND_WRITE_POLL_DELAY_US,
&status);
- if (!ret && (status & STATUS_PROG_FAILED))
+ if (ret)
+ return ret;
+
+ if (status & STATUS_PROG_FAILED)
return -EIO;
return nand_ecc_finish_io_req(nand, (struct nand_page_io_req *)req);
---
base-commit: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494
change-id: 20250708-spinand-propagate-wait-timeout-a0220ea7c322
Best regards,
--
Gabor Juhos <j4g8y7(a)gmail.com>
It seems like what was intended is to test if the dma_map of the
previous line failed but the wrong dma address was passed.
Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver")
Signed-off-by: Thomas Fourier <fourier.thomas(a)gmail.com>
---
v1 -> v2:
- Add stable(a)vger.kernel.org
- Fix subject prefix
drivers/mtd/nand/raw/atmel/nand-controller.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index dedcca87defc..84ab4a83cbd6 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -373,7 +373,7 @@ static int atmel_nand_dma_transfer(struct atmel_nand_controller *nc,
dma_cookie_t cookie;
buf_dma = dma_map_single(nc->dev, buf, len, dir);
- if (dma_mapping_error(nc->dev, dev_dma)) {
+ if (dma_mapping_error(nc->dev, buf_dma)) {
dev_err(nc->dev,
"Failed to prepare a buffer for DMA access\n");
goto err;
--
2.43.0
Before commit df6d7277e552 ("i2c: core: Do not dereference fwnode in struct
device"), i2c_unregister_device() only called fwnode_handle_put() on
of_node-s in the form of calling of_node_put(client->dev.of_node).
But after this commit the i2c_client's fwnode now unconditionally gets
fwnode_handle_put() on it.
When the i2c_client has no primary (ACPI / OF) fwnode but it does have
a software fwnode, the software-node will be the primary node and
fwnode_handle_put() will put() it.
But for the software fwnode device_remove_software_node() will also put()
it leading to a double free:
[ 82.665598] ------------[ cut here ]------------
[ 82.665609] refcount_t: underflow; use-after-free.
[ 82.665808] WARNING: CPU: 3 PID: 1502 at lib/refcount.c:28 refcount_warn_saturate+0xba/0x11
...
[ 82.666830] RIP: 0010:refcount_warn_saturate+0xba/0x110
...
[ 82.666962] <TASK>
[ 82.666971] i2c_unregister_device+0x60/0x90
Fix this by not calling fwnode_handle_put() when the primary fwnode is
a software-node.
Fixes: df6d7277e552 ("i2c: core: Do not dereference fwnode in struct device")
Cc: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hansg(a)kernel.org>
---
drivers/i2c/i2c-core-base.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 2ad2b1838f0f..0849aa44952d 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1066,7 +1066,13 @@ void i2c_unregister_device(struct i2c_client *client)
of_node_clear_flag(to_of_node(fwnode), OF_POPULATED);
else if (is_acpi_device_node(fwnode))
acpi_device_clear_enumerated(to_acpi_device_node(fwnode));
- fwnode_handle_put(fwnode);
+
+ /*
+ * If the primary fwnode is a software node it is free-ed by
+ * device_remove_software_node() below, avoid double-free.
+ */
+ if (!is_software_node(fwnode))
+ fwnode_handle_put(fwnode);
device_remove_software_node(&client->dev);
device_unregister(&client->dev);
--
2.49.0
From: Kairui Song <kasong(a)tencent.com>
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seems the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
---
mm/shmem.c | 39 ++++++++++++++++++++++++++++++---------
1 file changed, 30 insertions(+), 9 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 7570a24e0ae4..1d0fd266c29b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -891,7 +891,9 @@ static int shmem_add_to_page_cache(struct folio *folio,
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -903,14 +905,25 @@ static int shmem_add_to_page_cache(struct folio *folio,
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = radix_to_swp_entry(expected);
do {
+ iter = swap;
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2359,7 +2372,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2384,15 +2397,23 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
+ index = round_down(index, 1 << folio_order(folio));
}
alloced:
- /* We have to do this with folio locked to prevent races */
+ /*
+ * We have to do this with the folio locked to prevent races.
+ * The shmem_confirm_swap below only checks if the first swap
+ * entry matches the folio, that's enough to ensure the folio
+ * is not used outside of shmem, as shmem swap entries
+ * and swap cache folios are never partially freed.
+ */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
!shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
--
2.50.1