The mhi_ep_read_channel function incorrectly assumes the End of Transfer
(EOT) bit is received with the doorbell in chained transactions, causing
it to advance mhi_chan->rd_offset beyond wr_offset during host-to-device
transfers when EOT has not yet arrived, leading to access of unmapped host
memory that causes IOMMU faults and processing of stale TREs.
Modify the loop condition to ensure mhi_queue is not empty, allowing the
function to process only valid TREs up to the current write pointer to
prevent premature reads and ensure safe traversal of chained TREs.
Remove buf_left from the while loop condition to avoid exiting prematurely
before reading the ring completely, and remove write_offset since it will
always be zero because the new cache buffer is allocated every time.
Fixes: 5301258899773 ("bus: mhi: ep: Add support for reading from the host")
Cc: stable(a)vger.kernel.org
Co-developed-by: Akhil Vinod <akhil.vinod(a)oss.qualcomm.com>
Signed-off-by: Akhil Vinod <akhil.vinod(a)oss.qualcomm.com>
Signed-off-by: Sumit Kumar <sumit.kumar(a)oss.qualcomm.com>
---
Changes in v3:
- Update commit message
- Migrated to new mail
- Link to v2: https://lore.kernel.org/r/20250822-chained_transfer-v2-1-7aeb5ac215b6@quici…
Changes in v2:
- Use mhi_ep_queue_is_empty in while loop (Mani).
- Remove do while loop in mhi_ep_process_ch_ring (Mani).
- Remove buf_left, wr_offset, tr_done.
- Haven't added Reviewed-by as there is change in logic.
- Link to v1: https://lore.kernel.org/r/20250709-chained_transfer-v1-1-2326a4605c9c@quici…
---
drivers/bus/mhi/ep/main.c | 37 ++++++++++++-------------------------
1 file changed, 12 insertions(+), 25 deletions(-)
diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c
index b3eafcf2a2c50d95e3efd3afb27038ecf55552a5..cdea24e9291959ae0a92487c1b9698dc8164d2f1 100644
--- a/drivers/bus/mhi/ep/main.c
+++ b/drivers/bus/mhi/ep/main.c
@@ -403,17 +403,13 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
{
struct mhi_ep_chan *mhi_chan = &mhi_cntrl->mhi_chan[ring->ch_id];
struct device *dev = &mhi_cntrl->mhi_dev->dev;
- size_t tr_len, read_offset, write_offset;
+ size_t tr_len, read_offset;
struct mhi_ep_buf_info buf_info = {};
u32 len = MHI_EP_DEFAULT_MTU;
struct mhi_ring_element *el;
- bool tr_done = false;
void *buf_addr;
- u32 buf_left;
int ret;
- buf_left = len;
-
do {
/* Don't process the transfer ring if the channel is not in RUNNING state */
if (mhi_chan->state != MHI_CH_STATE_RUNNING) {
@@ -426,24 +422,23 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
/* Check if there is data pending to be read from previous read operation */
if (mhi_chan->tre_bytes_left) {
dev_dbg(dev, "TRE bytes remaining: %u\n", mhi_chan->tre_bytes_left);
- tr_len = min(buf_left, mhi_chan->tre_bytes_left);
+ tr_len = min(len, mhi_chan->tre_bytes_left);
} else {
mhi_chan->tre_loc = MHI_TRE_DATA_GET_PTR(el);
mhi_chan->tre_size = MHI_TRE_DATA_GET_LEN(el);
mhi_chan->tre_bytes_left = mhi_chan->tre_size;
- tr_len = min(buf_left, mhi_chan->tre_size);
+ tr_len = min(len, mhi_chan->tre_size);
}
read_offset = mhi_chan->tre_size - mhi_chan->tre_bytes_left;
- write_offset = len - buf_left;
buf_addr = kmem_cache_zalloc(mhi_cntrl->tre_buf_cache, GFP_KERNEL);
if (!buf_addr)
return -ENOMEM;
buf_info.host_addr = mhi_chan->tre_loc + read_offset;
- buf_info.dev_addr = buf_addr + write_offset;
+ buf_info.dev_addr = buf_addr;
buf_info.size = tr_len;
buf_info.cb = mhi_ep_read_completion;
buf_info.cb_buf = buf_addr;
@@ -459,16 +454,12 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
goto err_free_buf_addr;
}
- buf_left -= tr_len;
mhi_chan->tre_bytes_left -= tr_len;
- if (!mhi_chan->tre_bytes_left) {
- if (MHI_TRE_DATA_GET_IEOT(el))
- tr_done = true;
-
+ if (!mhi_chan->tre_bytes_left)
mhi_chan->rd_offset = (mhi_chan->rd_offset + 1) % ring->ring_size;
- }
- } while (buf_left && !tr_done);
+ /* Read until the some buffer is left or the ring becomes not empty */
+ } while (!mhi_ep_queue_is_empty(mhi_chan->mhi_dev, DMA_TO_DEVICE));
return 0;
@@ -502,15 +493,11 @@ static int mhi_ep_process_ch_ring(struct mhi_ep_ring *ring)
mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
} else {
/* UL channel */
- do {
- ret = mhi_ep_read_channel(mhi_cntrl, ring);
- if (ret < 0) {
- dev_err(&mhi_chan->mhi_dev->dev, "Failed to read channel\n");
- return ret;
- }
-
- /* Read until the ring becomes empty */
- } while (!mhi_ep_queue_is_empty(mhi_chan->mhi_dev, DMA_TO_DEVICE));
+ ret = mhi_ep_read_channel(mhi_cntrl, ring);
+ if (ret < 0) {
+ dev_err(&mhi_chan->mhi_dev->dev, "Failed to read channel\n");
+ return ret;
+ }
}
return 0;
---
base-commit: 4c06e63b92038fadb566b652ec3ec04e228931e8
change-id: 20250910-final_chained-750c213725f1
Best regards,
--
Sumit Kumar <sumit.kumar(a)oss.qualcomm.com>
Some recent Lenovo and Inspur machines with Zhaoxin CPUs fail to create
/sys/class/backlight/acpi_video0 on v6.6 kernels, while the same hardware
works correctly on v5.4.
Our analysis shows that the current implementation assumes the presence of a
GPU. The backlight registration is only triggered if a GPU is detected, but on
these platforms the backlight is handled purely by the EC without any GPU.
As a result, the detection path does not create the expected backlight node.
To fix this, move the following logic:
/* Use ACPI video if available, except when native should be preferred. */
if ((video_caps & ACPI_VIDEO_BACKLIGHT) &&
!(native_available && prefer_native_over_acpi_video()))
return acpi_backlight_video;
above the if (auto_detect) *auto_detect = true; statement.
This ensures that the ACPI video backlight node is created even when no GPU is
present, restoring the correct behavior observed on older kernels.
Fixes: 78dfc9d1d1ab ("ACPI: video: Add auto_detect arg to __acpi_video_get_backlight_type()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zihuan Zhang <zhangzihuan(a)kylinos.cn>
---
drivers/acpi/video_detect.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index d507d5e08435..c1bb22b57f56 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -1011,6 +1011,11 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto
if (acpi_backlight_dmi != acpi_backlight_undef)
return acpi_backlight_dmi;
+ /* Use ACPI video if available, except when native should be preferred. */
+ if ((video_caps & ACPI_VIDEO_BACKLIGHT) &&
+ !(native_available && prefer_native_over_acpi_video()))
+ return acpi_backlight_video;
+
if (auto_detect)
*auto_detect = true;
@@ -1024,11 +1029,6 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto
if (dell_uart_present)
return acpi_backlight_dell_uart;
- /* Use ACPI video if available, except when native should be preferred. */
- if ((video_caps & ACPI_VIDEO_BACKLIGHT) &&
- !(native_available && prefer_native_over_acpi_video()))
- return acpi_backlight_video;
-
/* Use native if available */
if (native_available)
return acpi_backlight_native;
--
2.25.1
If copy_from_user() fails, write() currently returns -EFAULT, but any
partially written data leaves the TX FIFO in an inconsistent state.
Subsequent write() calls then fail with "transmit length mismatch"
errors.
Once partial data is written to the hardware FIFO, it cannot be removed
without a TX reset. Commit c6e8d85fafa7 ("staging: axis-fifo: Remove
hardware resets for user errors") removed a full FIFO reset for this case,
which fixed a potential RX data loss, but introduced this TX issue.
Fix this by introducing a bounce buffer: copy the full packet from
userspace first, and write to the hardware FIFO only if the copy
was successful.
Fixes: c6e8d85fafa7 ("staging: axis-fifo: Remove hardware resets for user errors")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ovidiu Panait <ovidiu.panait.oss(a)gmail.com>
---
drivers/staging/axis-fifo/axis-fifo.c | 36 ++++++++-------------------
1 file changed, 10 insertions(+), 26 deletions(-)
diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c
index f54614ba1aa8..c47c6a022402 100644
--- a/drivers/staging/axis-fifo/axis-fifo.c
+++ b/drivers/staging/axis-fifo/axis-fifo.c
@@ -43,7 +43,6 @@
#define DRIVER_NAME "axis_fifo"
#define READ_BUF_SIZE 128U /* read buffer length in words */
-#define WRITE_BUF_SIZE 128U /* write buffer length in words */
#define AXIS_FIFO_DEBUG_REG_NAME_MAX_LEN 4
@@ -305,11 +304,8 @@ static ssize_t axis_fifo_write(struct file *f, const char __user *buf,
{
struct axis_fifo *fifo = (struct axis_fifo *)f->private_data;
unsigned int words_to_write;
- unsigned int copied;
- unsigned int copy;
- unsigned int i;
+ u32 *txbuf;
int ret;
- u32 tmp_buf[WRITE_BUF_SIZE];
if (len % sizeof(u32)) {
dev_err(fifo->dt_device,
@@ -374,32 +370,20 @@ static ssize_t axis_fifo_write(struct file *f, const char __user *buf,
}
}
- /* write data from an intermediate buffer into the fifo IP, refilling
- * the buffer with userspace data as needed
- */
- copied = 0;
- while (words_to_write > 0) {
- copy = min(words_to_write, WRITE_BUF_SIZE);
-
- if (copy_from_user(tmp_buf, buf + copied * sizeof(u32),
- copy * sizeof(u32))) {
- ret = -EFAULT;
- goto end_unlock;
- }
-
- for (i = 0; i < copy; i++)
- iowrite32(tmp_buf[i], fifo->base_addr +
- XLLF_TDFD_OFFSET);
-
- copied += copy;
- words_to_write -= copy;
+ txbuf = vmemdup_user(buf, len);
+ if (IS_ERR(txbuf)) {
+ ret = PTR_ERR(txbuf);
+ goto end_unlock;
}
- ret = copied * sizeof(u32);
+ for (int i = 0; i < words_to_write; ++i)
+ iowrite32(txbuf[i], fifo->base_addr + XLLF_TDFD_OFFSET);
/* write packet size to fifo */
- iowrite32(ret, fifo->base_addr + XLLF_TLR_OFFSET);
+ iowrite32(len, fifo->base_addr + XLLF_TLR_OFFSET);
+ ret = len;
+ kvfree(txbuf);
end_unlock:
mutex_unlock(&fifo->write_lock);
--
2.50.0
Hello,
This series is based on commit
320475fbd590 Merge tag 'mtd/fixes-for-6.17-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux
of Mainline Linux.
The first patch in the series has been posted as a Fix in contrast to
its predecessor at:
https://lore.kernel.org/r/20250903124505.365913-10-s-vadapalli@ti.com/
based on the feedback provided by Jiri Slaby <jirislaby(a)kernel.org> at:
https://lore.kernel.org/r/3d3a4b52-e343-42f3-9d69-94c259812143@kernel.org/
Since the Fix is independent of enabling loadable module support for the
pci-keystone.c driver, it is being posted as a new patch.
Checking out at the commit of Mainline Linux which this series is based
on, I noticed an exception triggered by the pci-keystone.c driver during
its probe. Although this is not a fatal exception and Linux continues to
boot, the driver is non-functional. I root-caused the exception to
free_initmem() freeing the memory associated with the ks_pcie_host_init()
function in the driver before the driver's probe was invoked. This
appears to be a race condition but it is easily reproducible with the
Linux .config that I have used. The fix therefore is to remove the
__init macro which is implemented by the second patch in the series.
For reference, the logs for the case where Linux is built by checking
out at the base commit of Mainline Linux are:
https://gist.github.com/Siddharth-Vadapalli-at-TI/f4891b707921c53dfb464ad2f…
and the logs clearly prove that the print associated with free_initmem()
which is:
[ 2.446834] Freeing unused kernel memory: 4864K
is displayed prior to the prints associated with the pci-keystone.c
driver being probed which is:
[ 7.707103] keystone-pcie 5500000.pcie: host bridge /bus@100000/pcie@5500000 ranges:
Building Linux by applying both patches in the series on the base commit of
Mainline Linux, the driver probes successfully without any exceptions or
errors. This was tested on AM654-EVM with an NVMe SSD connected to the
PCIe Connector on the board. The NVMe SSD enumerates successfully.
Additionally, the 'hdparm' utility was used to read from the SSD
confirming that the SSD is functional. The logs corresponding to this are:
https://gist.github.com/Siddharth-Vadapalli-at-TI/1b09a12a53db4233e82c5bcfc…
Regards,
Siddharth.
Siddharth Vadapalli (2):
PCI: keystone: Use devm_request_irq() to free "ks-pcie-error-irq" on
exit
PCI: keystone: Remove the __init macro for the ks_pcie_host_init()
callback
drivers/pci/controller/dwc/pci-keystone.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--
2.43.0
A recent innocuous internal optimization change in LLVM [1] causes the
same issue that necessitated commit 660942f2441d ("drm: omapdrm: reduce
clang stack usage") to occur in dispc_runtime_suspend() from inlinling
dispc_save_context().
drivers/gpu/drm/omapdrm/dss/dispc.c:4720:27: error: stack frame size (2272) exceeds limit (2048) in 'dispc_runtime_suspend' [-Werror,-Wframe-larger-than]
4720 | static __maybe_unused int dispc_runtime_suspend(struct device *dev)
| ^
There is an unfortunate interaction between the inner loops of
dispc_save_context() getting unrolled and the calculation of the index
into the ctx array being spilled to the stack when sanitizers are
enabled [2].
While this should obviously be addressed on the LLVM side, such a fix
may not be easy to craft and it is simple enough to work around the
issue in the same manner as before by marking dispc_save_context() with
noinline_for_stack, which makes it use the same amount of stack as
dispc_restore_context() does after the same change.
Cc: stable(a)vger.kernel.org
Link: https://github.com/llvm/llvm-project/commit/055bfc027141bbfafd51fb43f5ab81b… [1]
Link: https://llvm.org/pr143908 [2]
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
drivers/gpu/drm/omapdrm/dss/dispc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index cf055815077cffad554a4ae58cfd7b81edcbb0d4..d079f557c8f24d1afd0bc182edd13165cb9c356c 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -417,7 +417,7 @@ static bool dispc_has_feature(struct dispc_device *dispc,
#define RR(dispc, reg) \
dispc_write_reg(dispc, DISPC_##reg, dispc->ctx[DISPC_##reg / sizeof(u32)])
-static void dispc_save_context(struct dispc_device *dispc)
+static noinline_for_stack void dispc_save_context(struct dispc_device *dispc)
{
int i, j;
---
base-commit: 76eeb9b8de9880ca38696b2fb56ac45ac0a25c6c
change-id: 20250911-omapdrm-reduce-clang-stack-usage-pt-2-9a9ae9263b91
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
The bug is a typo in the compatible string for the touchscreen node.
According to Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml,
the correct compatible is "focaltech,ft8719", but the device tree used
"focaltech,fts8719".
Fixes: 45882459159de (arm64: dts: qcom: sdm845: add device tree for SHIFT6mq)
Cc: stable(a)vger.kernel.org
Signed-off-by: Tamura Dai <kirinode0(a)gmail.com>
---
arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
index 2cf7b5e1243c..a0b288d6162f 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
@@ -432,7 +432,7 @@ &i2c5 {
status = "okay";
touchscreen@38 {
- compatible = "focaltech,fts8719";
+ compatible = "focaltech,ft8719";
reg = <0x38>;
wakeup-source;
interrupt-parent = <&tlmm>;
--
2.34.1
Merge a hibernation regression fix and an fix related to energy model
The bug is a typo in the compatible string for the touchscreen node.
According to Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml,
the correct compatible is "focaltech,ft8719", but the device tree used
"focaltech,fts8719".
Fixes: 45882459159de (arm64: dts: qcom: sdm845: add device tree for SHIFT6mq)
Cc: stable(a)vger.kernel.org
Signed-off-by: Tamura Dai <kirinode0(a)gmail.com>
---
arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
index 2cf7b5e1243c..a0b288d6162f 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
@@ -432,7 +432,7 @@ &i2c5 {
status = "okay";
touchscreen@38 {
- compatible = "focaltech,fts8719";
+ compatible = "focaltech,ft8719";
reg = <0x38>;
wakeup-source;
interrupt-parent = <&tlmm>;
--
2.34.1
It has been completely removed since v6.14-rc6 by
commit dd5bdaf2b72da81d57f4f99e518af80002b6562e
Author: Ingo Molnar <mingo(a)kernel.org>
AuthorDate: Mon Mar 17 11:42:54 2025 +0100
Commit: Ingo Molnar <mingo(a)kernel.org>
CommitDate: Wed Mar 19 22:20:53 2025 +0100
sched/debug: Make CONFIG_SCHED_DEBUG functionality unconditional
Fixes yocto meta-arm sbsa-ref kernel config warning which
uses kernel.org arm64 defconfig:
DEBUG: Executing python function do_kernel_configcheck
WARNING: [kernel config]: This BSP contains fragments with warnings:
[INFO]: the following symbols were not found in the active
configuration:
- CONFIG_SCHED_DEBUG
DEBUG: Python function do_kernel_configcheck finished
Fixes: dd5bdaf2b72d ("sched/debug: Make CONFIG_SCHED_DEBUG functionality unconditional")
Cc: <stable(a)vger.kernel.org>
Cc: Jon Mason <jon.mason(a)arm.com>
Cc: Ross Burton <ross.burton(a)arm.com>
Cc: bruce.ashfield(a)gmail.com
Signed-off-by: Mikko Rapeli <mikko.rapeli(a)linaro.org>
---
arch/arm64/configs/defconfig | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 58f87d09366cd..4126281665bf2 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1798,7 +1798,6 @@ CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_INFO_REDUCED=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-# CONFIG_SCHED_DEBUG is not set
# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_FTRACE is not set
CONFIG_CORESIGHT=m
--
2.34.1
Hi Greg, sorry I mistyped the "stable" email address, so I'm
forwarding this patch to you.
Here's the original email:
https://lore.kernel.org/lkml/20250911222501.1417765-1-max.kellermann@ionos.…
---------- Forwarded message ---------
From: Max Kellermann <max.kellermann(a)ionos.com>
Date: Fri, Sep 12, 2025 at 12:25 AM
Subject: [PATCH] fs/netfs: fix reference leak
To: David Howells <dhowells(a)redhat.com>, Paulo Alcantara
<pc(a)manguebit.org>, Christian Brauner <brauner(a)kernel.org>,
<netfs(a)lists.linux.dev>, <linux-fsdevel(a)vger.kernel.org>,
<linux-kernel(a)vger.kernel.org>
Cc: Max Kellermann <max.kellermann(a)ionos.com>, <linux-stable(a)vger.kernel.org>
Commit 20d72b00ca81 ("netfs: Fix the request's work item to not
require a ref") modified netfs_alloc_request() to initialize the
reference counter to 2 instead of 1. The rationale was that the
requet's "work" would release the second reference after completion
(via netfs_{read,write}_collection_worker()). That works most of the
time if all goes well.
However, it leaks this additional reference if the request is released
before the I/O operation has been submitted: the error code path only
decrements the reference counter once and the work item will never be
queued because there will never be a completion.
This has caused outages of our whole server cluster today because
tasks were blocked in netfs_wait_for_outstanding_io(), leading to
deadlocks in Ceph (another bug that I will address soon in another
patch). This was caused by a netfs_pgpriv2_begin_copy_to_cache() call
which failed in fscache_begin_write_operation(). The leaked
netfs_io_request was never completed, leaving `netfs_inode.io_count`
with a positive value forever.
All of this is super-fragile code. Finding out which code paths will
lead to an eventual completion and which do not is hard to see:
- Some functions like netfs_create_write_req() allocate a request, but
will never submit any I/O.
- netfs_unbuffered_read_iter_locked() calls netfs_unbuffered_read()
and then netfs_put_request(); however, netfs_unbuffered_read() can
also fail early before submitting the I/O request, therefore another
netfs_put_request() call must be added there.
A rule of thumb is that functions that return a `netfs_io_request` do
not submit I/O, and all of their callers must be checked.
For my taste, the whole netfs code needs an overhaul to make reference
counting easier to understand and less fragile & obscure. But to fix
this bug here and now and produce a patch that is adequate for a
stable backport, I tried a minimal approach that quickly frees the
request object upon early failure.
I decided against adding a second netfs_put_request() each time
because that would cause code duplication which obscures the code
further. Instead, I added the function netfs_put_failed_request()
which frees such a failed request synchronously under the assumption
that the reference count is exactly 2 (as initially set by
netfs_alloc_request() and never touched), verified by a
WARN_ON_ONCE(). It then deinitializes the request object (without
going through the "cleanup_work" indirection) and frees the allocation
(without the "call_rcu" indirection). This should be safe because
this is the same context that allocated/initialized the request and
nobody else has a pointer to this object.
All code paths that fail early have been changed to call
netfs_put_failed_request() instead of netfs_put_request().
Additionally, I have added a netfs_put_request() call to
netfs_unbuffered_read() as explained above because the
netfs_put_failed_request() approach does not work there.
Fixes: 20d72b00ca81 ("netfs: Fix the request's work item to not require a ref")
Cc: linux-stable(a)vger.kernel.org
Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com>
---
fs/netfs/buffered_read.c | 10 +++++-----
fs/netfs/direct_read.c | 7 ++++++-
fs/netfs/direct_write.c | 6 +++++-
fs/netfs/internal.h | 1 +
fs/netfs/objects.c | 32 +++++++++++++++++++++++++++++---
fs/netfs/read_pgpriv2.c | 2 +-
fs/netfs/read_single.c | 2 +-
fs/netfs/write_issue.c | 3 +--
8 files changed, 49 insertions(+), 14 deletions(-)
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 18b3dc74c70e..37ab6f28b5ad 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -369,7 +369,7 @@ void netfs_readahead(struct readahead_control *ractl)
return netfs_put_request(rreq, netfs_rreq_trace_put_return);
cleanup_free:
- return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
+ return netfs_put_failed_request(rreq);
}
EXPORT_SYMBOL(netfs_readahead);
@@ -472,7 +472,7 @@ static int netfs_read_gaps(struct file *file,
struct folio *folio)
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, netfs_rreq_trace_put_discard);
+ netfs_put_failed_request(rreq);
alloc_error:
folio_unlock(folio);
return ret;
@@ -532,7 +532,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, netfs_rreq_trace_put_discard);
+ netfs_put_failed_request(rreq);
alloc_error:
folio_unlock(folio);
return ret;
@@ -699,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
return 0;
error_put:
- netfs_put_request(rreq, netfs_rreq_trace_put_failed);
+ netfs_put_failed_request(rreq);
error:
if (folio) {
folio_unlock(folio);
@@ -754,7 +754,7 @@ int netfs_prefetch_for_write(struct file *file,
struct folio *folio,
return ret < 0 ? ret : 0;
error_put:
- netfs_put_request(rreq, netfs_rreq_trace_put_discard);
+ netfs_put_failed_request(rreq);
error:
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index a05e13472baf..a498ee8d6674 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -131,6 +131,7 @@ static ssize_t netfs_unbuffered_read(struct
netfs_io_request *rreq, bool sync)
if (rreq->len == 0) {
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
return -EIO;
}
@@ -205,7 +206,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct
kiocb *iocb, struct iov_iter *i
if (user_backed_iter(iter)) {
ret = netfs_extract_user_iter(iter, rreq->len,
&rreq->buffer.iter, 0);
if (ret < 0)
- goto out;
+ goto error_put;
rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
rreq->direct_bv_count = ret;
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
@@ -238,6 +239,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct
kiocb *iocb, struct iov_iter *i
if (ret > 0)
orig_count -= ret;
return ret;
+
+error_put:
+ netfs_put_failed_request(rreq);
+ return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index a16660ab7f83..a9d1c3b2c084 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -57,7 +57,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct
kiocb *iocb, struct iov_iter *
n = netfs_extract_user_iter(iter, len,
&wreq->buffer.iter, 0);
if (n < 0) {
ret = n;
- goto out;
+ goto error_put;
}
wreq->direct_bv = (struct bio_vec
*)wreq->buffer.iter.bvec;
wreq->direct_bv_count = n;
@@ -101,6 +101,10 @@ ssize_t netfs_unbuffered_write_iter_locked(struct
kiocb *iocb, struct iov_iter *
out:
netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
+
+error_put:
+ netfs_put_failed_request(wreq);
+ return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index d4f16fefd965..4319611f5354 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -87,6 +87,7 @@ struct netfs_io_request *netfs_alloc_request(struct
address_space *mapping,
void netfs_get_request(struct netfs_io_request *rreq, enum
netfs_rreq_ref_trace what);
void netfs_clear_subrequests(struct netfs_io_request *rreq);
void netfs_put_request(struct netfs_io_request *rreq, enum
netfs_rreq_ref_trace what);
+void netfs_put_failed_request(struct netfs_io_request *rreq);
struct netfs_io_subrequest *netfs_alloc_subrequest(struct
netfs_io_request *rreq);
static inline void netfs_see_request(struct netfs_io_request *rreq,
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index e8c99738b5bb..9a3fbb73325e 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -116,10 +116,8 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
netfs_stat_d(&netfs_n_rh_rreq);
}
-static void netfs_free_request(struct work_struct *work)
+static void netfs_deinit_request(struct netfs_io_request *rreq)
{
- struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, cleanup_work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
@@ -149,6 +147,14 @@ static void netfs_free_request(struct work_struct *work)
if (atomic_dec_and_test(&ictx->io_count))
wake_up_var(&ictx->io_count);
+}
+
+static void netfs_free_request(struct work_struct *work)
+{
+ struct netfs_io_request *rreq =
+ container_of(work, struct netfs_io_request, cleanup_work);
+
+ netfs_deinit_request(rreq);
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
@@ -167,6 +173,26 @@ void netfs_put_request(struct netfs_io_request
*rreq, enum netfs_rreq_ref_trace
}
}
+/*
+ * Free a request (synchronously) that was just allocated but has
+ * failed before it could be submitted.
+ */
+void netfs_put_failed_request(struct netfs_io_request *rreq)
+{
+ /* new requests have two references (see
+ * netfs_alloc_request(), and this function is only allowed on
+ * new request objects
+ */
+ WARN_ON_ONCE(refcount_read(&rreq->ref) != 2);
+
+ trace_netfs_rreq_ref(rreq->debug_id, 0, netfs_rreq_trace_put_failed);
+
+ netfs_deinit_request(rreq);
+
+ mempool_free(rreq, rreq->netfs_ops->request_pool ?:
&netfs_request_pool);
+ netfs_stat_d(&netfs_n_rh_rreq);
+}
+
/*
* Allocate and partially initialise an I/O request structure.
*/
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index 8097bc069c1d..a1489aa29f78 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -118,7 +118,7 @@ static struct netfs_io_request
*netfs_pgpriv2_begin_copy_to_cache(
return creq;
cancel_put:
- netfs_put_request(creq, netfs_rreq_trace_put_return);
+ netfs_put_failed_request(creq);
cancel:
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index fa622a6cd56d..5c0dc4efc792 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -189,7 +189,7 @@ ssize_t netfs_read_single(struct inode *inode,
struct file *file, struct iov_ite
return ret;
cleanup_free:
- netfs_put_request(rreq, netfs_rreq_trace_put_failed);
+ netfs_put_failed_request(rreq);
return ret;
}
EXPORT_SYMBOL(netfs_read_single);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 0584cba1a043..dd8743bc8d7f 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -133,8 +133,7 @@ struct netfs_io_request
*netfs_create_write_req(struct address_space *mapping,
return wreq;
nomem:
- wreq->error = -ENOMEM;
- netfs_put_request(wreq, netfs_rreq_trace_put_failed);
+ netfs_put_failed_request(wreq);
return ERR_PTR(-ENOMEM);
}
--
2.47.3
This patch series enables a future version of tune2fs to be able to
modify certain parts of the ext4 superblock without to write to the
block device.
The first patch fixes a potential buffer overrun caused by a
maliciously moified superblock. The second patch adds support for
32-bit uid and gid's which can have access to the reserved blocks pool.
The last patch adds the ioctl's which will be used by tune2fs.
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
---
Theodore Ts'o (3):
ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
ext4: add support for 32-bit default reserved uid and gid values
ext4: implemet new ioctls to set and get superblock parameters
fs/ext4/ext4.h | 16 ++++-
fs/ext4/ioctl.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
fs/ext4/super.c | 25 +++-----
include/uapi/linux/ext4.h | 75 ++++++++++++++++++++++
4 files changed, 348 insertions(+), 24 deletions(-)
---
base-commit: b320789d6883cc00ac78ce83bccbfe7ed58afcf0
change-id: 20250830-tune2fs-3376beb72403
Best regards,
--
Theodore Ts'o <tytso(a)mit.edu>
Commit 88e6c42e40de ("io_uring/io-wq: add check free worker before
create new worker") reused the variable `do_create` for something
else, abusing it for the free worker check.
This caused the value to effectively always be `true` at the time
`nr_workers < max_workers` was checked, but it should really be
`false`. This means the `max_workers` setting was ignored, and worse:
if the limit had already been reached, incrementing `nr_workers` was
skipped even though another worker would be created.
When later lots of workers exit, the `nr_workers` field could easily
underflow, making the problem worse because more and more workers
would be created without incrementing `nr_workers`.
The simple solution is to use a different variable for the free worker
check instead of using one variable for two different things.
Cc: stable(a)vger.kernel.org
Fixes: 88e6c42e40de ("io_uring/io-wq: add check free worker before create new worker")
Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com>
---
io_uring/io-wq.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 17dfaa0395c4..1d03b2fc4b25 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -352,16 +352,16 @@ static void create_worker_cb(struct callback_head *cb)
struct io_wq *wq;
struct io_wq_acct *acct;
- bool do_create = false;
+ bool activated_free_worker, do_create = false;
worker = container_of(cb, struct io_worker, create_work);
wq = worker->wq;
acct = worker->acct;
rcu_read_lock();
- do_create = !io_acct_activate_free_worker(acct);
+ activated_free_worker = io_acct_activate_free_worker(acct);
rcu_read_unlock();
- if (!do_create)
+ if (activated_free_worker)
goto no_need_create;
raw_spin_lock(&acct->workers_lock);
--
2.47.3
Running sha224_kunit on a KMSAN-enabled kernel results in a crash in
kmsan_internal_set_shadow_origin():
BUG: unable to handle page fault for address: ffffbc3840291000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 1810067 P4D 1810067 PUD 192d067 PMD 3c17067 PTE 0
Oops: 0000 [#1] SMP NOPTI
CPU: 0 UID: 0 PID: 81 Comm: kunit_try_catch Tainted: G N 6.17.0-rc3 #10 PREEMPT(voluntary)
Tainted: [N]=TEST
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
RIP: 0010:kmsan_internal_set_shadow_origin+0x91/0x100
[...]
Call Trace:
<TASK>
__msan_memset+0xee/0x1a0
sha224_final+0x9e/0x350
test_hash_buffer_overruns+0x46f/0x5f0
? kmsan_get_shadow_origin_ptr+0x46/0xa0
? __pfx_test_hash_buffer_overruns+0x10/0x10
kunit_try_run_case+0x198/0xa00
This occurs when memset() is called on a buffer that is not 4-byte
aligned and extends to the end of a guard page, i.e. the next page is
unmapped.
The bug is that the loop at the end of
kmsan_internal_set_shadow_origin() accesses the wrong shadow memory
bytes when the address is not 4-byte aligned. Since each 4 bytes are
associated with an origin, it rounds the address and size so that it can
access all the origins that contain the buffer. However, when it checks
the corresponding shadow bytes for a particular origin, it incorrectly
uses the original unrounded shadow address. This results in reads from
shadow memory beyond the end of the buffer's shadow memory, which
crashes when that memory is not mapped.
To fix this, correctly align the shadow address before accessing the 4
shadow bytes corresponding to each origin.
Fixes: 2ef3cec44c60 ("kmsan: do not wipe out origin when doing partial unpoisoning")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
v2: Added test case to kmsan_test.
mm/kmsan/core.c | 10 +++++++---
mm/kmsan/kmsan_test.c | 16 ++++++++++++++++
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c
index 1ea711786c522..8bca7fece47f0 100644
--- a/mm/kmsan/core.c
+++ b/mm/kmsan/core.c
@@ -193,11 +193,12 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
u32 origin, bool checked)
{
u64 address = (u64)addr;
- u32 *shadow_start, *origin_start;
+ void *shadow_start;
+ u32 *aligned_shadow, *origin_start;
size_t pad = 0;
KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(addr, size));
shadow_start = kmsan_get_metadata(addr, KMSAN_META_SHADOW);
if (!shadow_start) {
@@ -212,13 +213,16 @@ void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
}
return;
}
__memset(shadow_start, b, size);
- if (!IS_ALIGNED(address, KMSAN_ORIGIN_SIZE)) {
+ if (IS_ALIGNED(address, KMSAN_ORIGIN_SIZE)) {
+ aligned_shadow = shadow_start;
+ } else {
pad = address % KMSAN_ORIGIN_SIZE;
address -= pad;
+ aligned_shadow = shadow_start - pad;
size += pad;
}
size = ALIGN(size, KMSAN_ORIGIN_SIZE);
origin_start =
(u32 *)kmsan_get_metadata((void *)address, KMSAN_META_ORIGIN);
@@ -228,11 +232,11 @@ void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
* and unconditionally overwrite the old origin slot.
* If the new origin is zero, overwrite the old origin slot iff the
* corresponding shadow slot is zero.
*/
for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++) {
- if (origin || !shadow_start[i])
+ if (origin || !aligned_shadow[i])
origin_start[i] = origin;
}
}
struct page *kmsan_vmalloc_to_page_or_null(void *vaddr)
diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c
index c6c5b2bbede0c..902ec48b1e3e6 100644
--- a/mm/kmsan/kmsan_test.c
+++ b/mm/kmsan/kmsan_test.c
@@ -554,10 +554,25 @@ static void test_memcpy_initialized_gap(struct kunit *test)
DEFINE_TEST_MEMSETXX(16)
DEFINE_TEST_MEMSETXX(32)
DEFINE_TEST_MEMSETXX(64)
+/* Test case: ensure that KMSAN does not access shadow memory out of bounds. */
+static void test_memset_on_guarded_buffer(struct kunit *test)
+{
+ void *buf = vmalloc(PAGE_SIZE);
+
+ kunit_info(test,
+ "memset() on ends of guarded buffer should not crash\n");
+
+ for (size_t size = 0; size <= 128; size++) {
+ memset(buf, 0xff, size);
+ memset(buf + PAGE_SIZE - size, 0xff, size);
+ }
+ vfree(buf);
+}
+
static noinline void fibonacci(int *array, int size, int start)
{
if (start < 2 || (start == size))
return;
array[start] = array[start - 1] + array[start - 2];
@@ -675,10 +690,11 @@ static struct kunit_case kmsan_test_cases[] = {
KUNIT_CASE(test_memcpy_aligned_to_unaligned),
KUNIT_CASE(test_memcpy_initialized_gap),
KUNIT_CASE(test_memset16),
KUNIT_CASE(test_memset32),
KUNIT_CASE(test_memset64),
+ KUNIT_CASE(test_memset_on_guarded_buffer),
KUNIT_CASE(test_long_origin_chain),
KUNIT_CASE(test_stackdepot_roundtrip),
KUNIT_CASE(test_unpoison_memory),
KUNIT_CASE(test_copy_from_kernel_nofault),
{},
base-commit: e59a039119c3ec241228adf12dca0dd4398104d0
--
2.51.0
Running sha224_kunit on a KMSAN-enabled kernel results in a crash in
kmsan_internal_set_shadow_origin():
BUG: unable to handle page fault for address: ffffbc3840291000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 1810067 P4D 1810067 PUD 192d067 PMD 3c17067 PTE 0
Oops: 0000 [#1] SMP NOPTI
CPU: 0 UID: 0 PID: 81 Comm: kunit_try_catch Tainted: G N 6.17.0-rc3 #10 PREEMPT(voluntary)
Tainted: [N]=TEST
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
RIP: 0010:kmsan_internal_set_shadow_origin+0x91/0x100
[...]
Call Trace:
<TASK>
__msan_memset+0xee/0x1a0
sha224_final+0x9e/0x350
test_hash_buffer_overruns+0x46f/0x5f0
? kmsan_get_shadow_origin_ptr+0x46/0xa0
? __pfx_test_hash_buffer_overruns+0x10/0x10
kunit_try_run_case+0x198/0xa00
This occurs when memset() is called on a buffer that is not 4-byte
aligned and extends to the end of a guard page, i.e. the next page is
unmapped.
The bug is that the loop at the end of
kmsan_internal_set_shadow_origin() accesses the wrong shadow memory
bytes when the address is not 4-byte aligned. Since each 4 bytes are
associated with an origin, it rounds the address and size so that it can
access all the origins that contain the buffer. However, when it checks
the corresponding shadow bytes for a particular origin, it incorrectly
uses the original unrounded shadow address. This results in reads from
shadow memory beyond the end of the buffer's shadow memory, which
crashes when that memory is not mapped.
To fix this, correctly align the shadow address before accessing the 4
shadow bytes corresponding to each origin.
Fixes: 2ef3cec44c60 ("kmsan: do not wipe out origin when doing partial unpoisoning")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
mm/kmsan/core.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c
index 1ea711786c522..8bca7fece47f0 100644
--- a/mm/kmsan/core.c
+++ b/mm/kmsan/core.c
@@ -193,11 +193,12 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
u32 origin, bool checked)
{
u64 address = (u64)addr;
- u32 *shadow_start, *origin_start;
+ void *shadow_start;
+ u32 *aligned_shadow, *origin_start;
size_t pad = 0;
KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(addr, size));
shadow_start = kmsan_get_metadata(addr, KMSAN_META_SHADOW);
if (!shadow_start) {
@@ -212,13 +213,16 @@ void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
}
return;
}
__memset(shadow_start, b, size);
- if (!IS_ALIGNED(address, KMSAN_ORIGIN_SIZE)) {
+ if (IS_ALIGNED(address, KMSAN_ORIGIN_SIZE)) {
+ aligned_shadow = shadow_start;
+ } else {
pad = address % KMSAN_ORIGIN_SIZE;
address -= pad;
+ aligned_shadow = shadow_start - pad;
size += pad;
}
size = ALIGN(size, KMSAN_ORIGIN_SIZE);
origin_start =
(u32 *)kmsan_get_metadata((void *)address, KMSAN_META_ORIGIN);
@@ -228,11 +232,11 @@ void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
* and unconditionally overwrite the old origin slot.
* If the new origin is zero, overwrite the old origin slot iff the
* corresponding shadow slot is zero.
*/
for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++) {
- if (origin || !shadow_start[i])
+ if (origin || !aligned_shadow[i])
origin_start[i] = origin;
}
}
struct page *kmsan_vmalloc_to_page_or_null(void *vaddr)
base-commit: 1b237f190eb3d36f52dffe07a40b5eb210280e00
--
2.50.1
Hallo
6.16.7 does not compile here
config is the same as with 6.16.6
any advice ?
===
...
CC init/initramfs.o
AS arch/x86/realmode/rm/wakeup_asm.o
CC arch/x86/entry/vdso/vdso32/vclock_gettime.o
CC arch/x86/realmode/rm/wakemain.o
In file included from ./include/uapi/linux/posix_types.h:5,
from ./include/uapi/linux/types.h:14,
from ./include/linux/types.h:6,
from arch/x86/realmode/rm/wakeup.h:11,
from arch/x86/realmode/rm/wakemain.c:2:
./include/linux/stddef.h:11:9: error: cannot use keyword ‘false’ as
enumeration constant
11 | false = 0,
| ^~~~~
./include/linux/stddef.h:11:9: note: ‘false’ is a keyword with
‘-std=c23’ onwards
./include/linux/types.h:35:33: error: ‘bool’ cannot be defined via ‘typedef’
35 | typedef _Bool bool;
| ^~~~
./include/linux/types.h:35:33: note: ‘bool’ is a keyword with ‘-std=c23’
onwards
./include/linux/types.h:35:1: warning: useless type name in empty
declaration
35 | typedef _Bool bool;
| ^~~~~~~
CC init/calibrate.o
make[5]: *** [scripts/Makefile.build:243:
arch/x86/realmode/rm/wakemain.o] Error 1
...
CC kernel/bpf/trampoline.o
CC fs/smb/client/readdir.o
fs/btrfs/print-tree.c:29:49: warning: initializer-string for array of
‘char’ truncates NUL terminator but destination lacks ‘nonstring’
attribute (17 chars into 16 available)
[-Wunterminated-string-initialization]
29 | { BTRFS_BLOCK_GROUP_TREE_OBJECTID,
"BLOCK_GROUP_TREE" },
| ^~~~~~~~~~~~~~~~~~
CC fs/btrfs/root-tree.o
CC fs/efivarfs/inode.o
CC kernel/trace/trace_probe.o
CC [M] fs/pstore/ram_core.o
In file included from ./include/linux/string.h:294,
from ./include/linux/bitmap.h:11,
from ./include/linux/inetdevice.h:7,
from fs/smb/server/smb2pdu.c:7:
In function ‘fortify_memset_chk’,
inlined from ‘ntlm_negotiate’ at fs/smb/server/smb2pdu.c:1354:2,
inlined from ‘smb2_sess_setup’ at fs/smb/server/smb2pdu.c:1828:10:
./include/linux/fortify-string.h:493:25: warning: call to
‘__write_overflow_field’ declared with attribute warning: detected write
beyond size of field (1st parameter); maybe use struct_group()?
[-Wattribute-warning]
493 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CC fs/smb/server/ksmbd_spnego_negtokeninit.asn1.o
...
CC fs/btrfs/discard.o
In file included from ./include/linux/string.h:294,
from ./include/linux/bitmap.h:11,
from ./include/linux/cpumask.h:12,
from ./arch/x86/include/asm/cpumask.h:5,
from ./arch/x86/include/asm/msr.h:11,
from ./arch/x86/include/asm/processor.h:23,
from ./arch/x86/include/asm/cpufeature.h:5,
from ./arch/x86/include/asm/thread_info.h:53,
from ./include/linux/thread_info.h:60,
from ./arch/x86/include/asm/preempt.h:9,
from ./include/linux/preempt.h:79,
from ./include/linux/spinlock.h:56,
from ./include/linux/wait.h:9,
from ./include/linux/wait_bit.h:8,
from ./include/linux/fs.h:6,
from fs/smb/client/cifssmb.c:17:
In function ‘fortify_memcpy_chk’,
inlined from ‘CIFSSMBSetPathInfo’ at fs/smb/client/cifssmb.c:5375:2:
./include/linux/fortify-string.h:583:25: warning: call to
‘__write_overflow_field’ declared with attribute warning: detected write
beyond size of field (1st parameter); maybe use struct_group()?
[-Wattribute-warning]
583 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
AR fs/smb/client/built-in.a
AR fs/smb/built-in.a
CC fs/btrfs/reflink.o
CC fs/btrfs/subpage.o
CC fs/btrfs/tree-mod-log.o
CC fs/btrfs/extent-io-tree.o
CC fs/btrfs/fs.o
CC fs/btrfs/messages.o
CC fs/btrfs/bio.o
CC fs/btrfs/lru_cache.o
CC fs/btrfs/acl.o
CC fs/btrfs/zoned.o
CC fs/btrfs/verity.o
fs/btrfs/send.c: In function ‘btrfs_ioctl_send’:
fs/btrfs/send.c:8208:44: warning: ‘kvcalloc’ sizes specified with
‘sizeof’ in the earlier argument and not in the later argument
[-Wcalloc-transposed-args]
8208 | sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
| ^
fs/btrfs/send.c:8208:44: note: earlier argument should specify number of
elements, later size of each element
AR fs/btrfs/built-in.a
AR fs/built-in.a
make[1]: *** [/home/DATA/DEVEL/linux-6.6.17/Makefile:1913: .] Error 2
make: *** [Makefile:234: __sub-make] Error 2
error in make
Hi,
This series fixes the long standing issue with ACS in DT platforms. There are
two fixes in this series, both fixing independent issues on their own, but both
are needed to properly enable ACS on DT platforms (well, patch 1 is only needed
for Juno board, but that was a blocker for patch 2, more below...).
Issue(s) background
===================
Back in 2024, Xingang Wang first noted a failure in attaching the HiSilicon SEC
device to QEMU ARM64 pci-root-port device [1]. He then tracked down the issue to
ACS not being enabled for the QEMU Root Port device and he proposed a patch to
fix it [2].
Once the patch got applied, people reported PCIe issues with linux-next on the
ARM Juno Development boards, where they saw failure in enumerating the endpoint
devices [3][4]. So soon, the patch got dropped, but the actual issue with the
ARM Juno boards was left behind.
Fast forward to 2024, Pavan resubmitted the same fix [5] for his own usecase,
hoping that someone in the community would fix the issue with ARM Juno boards.
But the patch was rightly rejected, as a patch that was known to cause issues
should not be merged to the kernel. But again, no one investigated the Juno
issue and it was left behind again.
Now it ended up in my plate and I managed to track down the issue with the help
of Naresh who got access to the Juno boards in LKFT. The Juno issue is with the
PCIe switch from Microsemi/IDT, which triggers ACS Source Validation error on
Completions received for the Configuration Read Request from a device connected
to the downstream port that has not yet captured the PCIe bus number. As per the
PCIe spec r6.0 sec 2.2.6.2, "Functions must capture the Bus and Device Numbers
supplied with all Type 0 Configuration Write Requests completed by the Function
and supply these numbers in the Bus and Device Number fields of the Requester ID
for all Requests". So during the first Configuration Read Request issued by the
switch downstream port during enumeration (for reading Vendor ID), Bus and
Device numbers will be unknown to the device. So it responds to the Read Request
with Completion having Bus and Device number as 0. The switch interprets the
Completion as an ACS Source Validation error and drops the completion, leading
to the failure in detecting the endpoint device. Though the PCIe spec r6.0, sec
6.12.1.1, states that "Completions are never affected by ACS Source Validation".
This behavior is in violation of the spec.
This issue was already found and addressed with a quirk for a different device
from Microsemi with 'commit, aa667c6408d2 ("PCI: Workaround IDT switch ACS
Source Validation erratum")'. Apparently, this issue seems to be documented in
the erratum #36 of IDT 89H32H8G3-YC, which is not publicly available.
Solution for Juno issue
=======================
To fix this issue, I've extended the quirk to the Device ID of the switch
found in Juno R2 boards. I believe the same switch is also present in Juno R1
board as well.
With Patch 1, the Juno R2 boards can now detect the endpoints even with ACS
enabled for the Switch downstream ports. Finally, I added patch 2 that properly
enables ACS for all the PCI devices on DT platforms.
It should be noted that even without patch 2 which enables ACS for the Root
Port, the Juno boards were failing since 'commit, bcb81ac6ae3c ("iommu: Get
DT/ACPI parsing into the proper probe path")' as reported in LKFT [6]. I
believe, this commit made sure pci_request_acs() gets called before the
enumeration of the switch downstream ports. The LKFT team ended up disabling
ACS using cmdline param 'pci=config_acs=000000@pci:0:0'. So I added the above
mentioned commit as a Fixes tag for patch 1.
Also, to mitigate this issue, one could enumerate all the PCIe devices in
bootloader without enabling ACS (as also noted by Robin in the LKFT thread).
This will make sure that the endpoint device has a valid bus number when it
responds to the first Configuration Read Request from the switch downstream
port. So the ACS Source Validation error doesn't get triggered.
Solution for ACS issue
======================
To fix this issue, I've kept the patch from Xingang as is (with rewording of the
patch subject/description). This patch moves the pci_request_acs() call to
devm_of_pci_bridge_init(), which gets called during the host bridge
registration. This makes sure that the 'pci_acs_enable' flag set by
pci_request_acs() is getting set before the enumeration of the Root Port device.
So now, ACS will be enabled for all ACS capable devices of DT platforms.
[1] https://lore.kernel.org/all/038397a6-57e2-b6fc-6e1c-7c03b7be9d96@huawei.com
[2] https://lore.kernel.org/all/1621566204-37456-1-git-send-email-wangxingang5@…
[3] https://lore.kernel.org/all/01314d70-41e6-70f9-e496-84091948701a@samsung.com
[4] https://lore.kernel.org/all/CADYN=9JWU3CMLzMEcD5MSQGnaLyDRSKc5SofBFHUax6YuT…
[5] https://lore.kernel.org/linux-pci/20241107-pci_acs_fix-v1-1-185a2462a571@qu…
[6] https://lists.linaro.org/archives/list/lkft-triage@lists.linaro.org/message…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
---
Manivannan Sadhasivam (1):
PCI: Extend pci_idt_bus_quirk() for IDT switch with Device ID 0x8090
Xingang Wang (1):
iommu/of: Call pci_request_acs() before enumerating the Root Port device
drivers/iommu/of_iommu.c | 1 -
drivers/pci/of.c | 8 +++++++-
drivers/pci/probe.c | 2 +-
3 files changed, 8 insertions(+), 3 deletions(-)
---
base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585
change-id: 20250910-pci-acs-cb4fa3983a2c
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
I'm announcing the release of the 6.16.7 kernel.
All users of the 6.16 kernel series must upgrade.
The updated 6.16.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.16.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-devices-system-cpu | 1
Documentation/admin-guide/hw-vuln/index.rst | 1
Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++
Documentation/admin-guide/kernel-parameters.txt | 11
Makefile | 2
arch/x86/Kconfig | 9
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/entry-common.h | 7
arch/x86/include/asm/nospec-branch.h | 2
arch/x86/kernel/cpu/bugs.c | 285 ++++++++++++++-------
arch/x86/kernel/cpu/common.c | 86 ++++--
arch/x86/kvm/x86.c | 9
drivers/base/cpu.c | 3
include/linux/cpu.h | 1
14 files changed, 415 insertions(+), 114 deletions(-)
Greg Kroah-Hartman (1):
Linux 6.16.7
Pawan Gupta (7):
Documentation/hw-vuln: Add VMSCAPE documentation
x86/vmscape: Enumerate VMSCAPE bug
x86/vmscape: Add conditional IBPB mitigation
x86/vmscape: Enable the mitigation
x86/bugs: Move cpu_bugs_smt_update() down
x86/vmscape: Warn when STIBP is disabled with SMT
x86/vmscape: Add old Intel CPUs to affected list
I'm announcing the release of the 6.12.47 kernel.
All users of the 6.12 kernel series must upgrade.
The updated 6.12.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.12.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-devices-system-cpu | 1
Documentation/admin-guide/hw-vuln/index.rst | 1
Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++
Documentation/admin-guide/kernel-parameters.txt | 11
Makefile | 2
arch/x86/Kconfig | 9
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/entry-common.h | 7
arch/x86/include/asm/nospec-branch.h | 2
arch/x86/kernel/cpu/bugs.c | 257 ++++++++++++++-------
arch/x86/kernel/cpu/common.c | 84 ++++--
arch/x86/kvm/x86.c | 9
drivers/base/cpu.c | 3
include/linux/cpu.h | 1
14 files changed, 394 insertions(+), 105 deletions(-)
Greg Kroah-Hartman (1):
Linux 6.12.47
Pawan Gupta (7):
Documentation/hw-vuln: Add VMSCAPE documentation
x86/vmscape: Enumerate VMSCAPE bug
x86/vmscape: Add conditional IBPB mitigation
x86/vmscape: Enable the mitigation
x86/bugs: Move cpu_bugs_smt_update() down
x86/vmscape: Warn when STIBP is disabled with SMT
x86/vmscape: Add old Intel CPUs to affected list
I'm announcing the release of the 6.6.106 kernel.
All users of the 6.6 kernel series must upgrade.
The updated 6.6.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.6.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-devices-system-cpu | 1
Documentation/admin-guide/hw-vuln/index.rst | 1
Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++
Documentation/admin-guide/kernel-parameters.txt | 11
Makefile | 2
arch/x86/Kconfig | 9
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/entry-common.h | 7
arch/x86/include/asm/nospec-branch.h | 2
arch/x86/kernel/cpu/bugs.c | 257 ++++++++++++++-------
arch/x86/kernel/cpu/common.c | 82 ++++--
arch/x86/kvm/x86.c | 9
drivers/base/cpu.c | 3
include/linux/cpu.h | 1
14 files changed, 392 insertions(+), 105 deletions(-)
Greg Kroah-Hartman (1):
Linux 6.6.106
Pawan Gupta (7):
Documentation/hw-vuln: Add VMSCAPE documentation
x86/vmscape: Enumerate VMSCAPE bug
x86/vmscape: Add conditional IBPB mitigation
x86/vmscape: Enable the mitigation
x86/bugs: Move cpu_bugs_smt_update() down
x86/vmscape: Warn when STIBP is disabled with SMT
x86/vmscape: Add old Intel CPUs to affected list
I'm announcing the release of the 6.1.152 kernel.
All users of the 6.1 kernel series must upgrade.
The updated 6.1.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.1.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-devices-system-cpu | 1
Documentation/admin-guide/hw-vuln/index.rst | 1
Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++
Documentation/admin-guide/kernel-parameters.txt | 11
Makefile | 2
arch/x86/Kconfig | 9
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/entry-common.h | 7
arch/x86/include/asm/nospec-branch.h | 2
arch/x86/kernel/cpu/bugs.c | 257 ++++++++++++++-------
arch/x86/kernel/cpu/common.c | 80 ++++--
arch/x86/kvm/x86.c | 9
drivers/base/cpu.c | 7
include/linux/cpu.h | 1
14 files changed, 394 insertions(+), 105 deletions(-)
Greg Kroah-Hartman (1):
Linux 6.1.152
Pawan Gupta (7):
Documentation/hw-vuln: Add VMSCAPE documentation
x86/vmscape: Enumerate VMSCAPE bug
x86/vmscape: Add conditional IBPB mitigation
x86/vmscape: Enable the mitigation
x86/bugs: Move cpu_bugs_smt_update() down
x86/vmscape: Warn when STIBP is disabled with SMT
x86/vmscape: Add old Intel CPUs to affected list
I'm announcing the release of the 5.15.193 kernel.
All users of the 5.15 kernel series must upgrade.
The updated 5.15.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.15.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-devices-system-cpu | 1
Documentation/admin-guide/hw-vuln/index.rst | 1
Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++
Documentation/admin-guide/kernel-parameters.txt | 11
Makefile | 2
arch/x86/Kconfig | 9
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/entry-common.h | 7
arch/x86/include/asm/nospec-branch.h | 2
arch/x86/kernel/cpu/bugs.c | 264 ++++++++++++++-------
arch/x86/kernel/cpu/common.c | 77 +++---
arch/x86/kvm/x86.c | 9
drivers/base/cpu.c | 6
include/linux/cpu.h | 1
14 files changed, 393 insertions(+), 109 deletions(-)
Greg Kroah-Hartman (1):
Linux 5.15.193
Pawan Gupta (7):
Documentation/hw-vuln: Add VMSCAPE documentation
x86/vmscape: Enumerate VMSCAPE bug
x86/vmscape: Add conditional IBPB mitigation
x86/vmscape: Enable the mitigation
x86/bugs: Move cpu_bugs_smt_update() down
x86/vmscape: Warn when STIBP is disabled with SMT
x86/vmscape: Add old Intel CPUs to affected list
I'm announcing the release of the 5.10.244 kernel.
All users of the 5.10 kernel series must upgrade.
The updated 5.10.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.10.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-devices-system-cpu | 1
Documentation/admin-guide/hw-vuln/index.rst | 1
Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++
Documentation/admin-guide/kernel-parameters.txt | 11
Makefile | 2
arch/x86/Kconfig | 9
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/entry-common.h | 7
arch/x86/include/asm/nospec-branch.h | 2
arch/x86/kernel/cpu/bugs.c | 264 ++++++++++++++-------
arch/x86/kernel/cpu/common.c | 74 +++--
arch/x86/kvm/x86.c | 9
drivers/base/cpu.c | 6
include/linux/cpu.h | 1
14 files changed, 390 insertions(+), 109 deletions(-)
Greg Kroah-Hartman (1):
Linux 5.10.244
Pawan Gupta (7):
Documentation/hw-vuln: Add VMSCAPE documentation
x86/vmscape: Enumerate VMSCAPE bug
x86/vmscape: Add conditional IBPB mitigation
x86/vmscape: Enable the mitigation
x86/bugs: Move cpu_bugs_smt_update() down
x86/vmscape: Warn when STIBP is disabled with SMT
x86/vmscape: Add old Intel CPUs to affected list