From: Pengyu Luo mitltlatltl@gmail.com
[ Upstream commit fc5414a4774e14e51a93499a6adfdc45f2de82e0 ]
SM8650 have already been supported by qcom-cpufreq-hw driver, but never been added to cpufreq-dt-platdev. This makes noise
[ 0.388525] cpufreq-dt cpufreq-dt: failed register driver: -17 [ 0.388537] cpufreq-dt cpufreq-dt: probe with driver cpufreq-dt failed with error -17
So adding it to the cpufreq-dt-platdev driver's blocklist to fix it.
Signed-off-by: Pengyu Luo mitltlatltl@gmail.com Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2aa00769cf09d..a010da0f6337f 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -175,6 +175,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8350", }, { .compatible = "qcom,sm8450", }, { .compatible = "qcom,sm8550", }, + { .compatible = "qcom,sm8650", },
{ .compatible = "st,stih407", }, { .compatible = "st,stih410", },
From: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org
[ Upstream commit d4f610a9bafdec8e3210789aa19335367da696ea ]
Enabling the compile test should not cause automatic enabling of all drivers.
Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/cpufreq/Kconfig.arm | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f9cb943d945c..d4d625ded285f 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default y + default ARCH_BRCMSTB help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" depends on CPU_S3C6410 || COMPILE_TEST - default y + default CPU_S3C6410 help This adds the CPUFreq driver for Samsung S3C6410 SoC.
@@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" depends on CPU_S5PV210 || COMPILE_TEST - default y + default CPU_S5PV210 help This adds the CPUFreq driver for Samsung S5PV210 and S5PC110 SoCs. @@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help This adds the CPUFreq driver support for SPEAr SOCs.
@@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra20/30 SOCs.
@@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra124 SOCs.
@@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default y + default ARCH_TEGRA help This adds CPU frequency driver support for Tegra194 SOCs.
config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST - default y + default ARCH_OMAP2PLUS || ARCH_K3 help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to
From: Heiko Stuebner heiko@sntech.de
[ Upstream commit 6907e8093b3070d877ee607e5ceede60cfd08bde ]
The RK3588 has an offset into the OTP area where the readable area begins and automatically adds this to the start address. Other variants are very much similar to rk3588, just with a different offset, so move that value into variant-data.
To match the size in bytes, store this value also in bytes and not in number of blocks.
Signed-off-by: Heiko Stuebner heiko@sntech.de Tested-by: Nicolas Frattaroli nicolas.frattaroli@collabora.com Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20250411112251.68002-2-srinivas.kandagatla@linaro.... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvmem/rockchip-otp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index ebc3f0b24166b..3edfbfc2d7220 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -59,7 +59,6 @@ #define RK3588_OTPC_AUTO_EN 0x08 #define RK3588_OTPC_INT_ST 0x84 #define RK3588_OTPC_DOUT0 0x20 -#define RK3588_NO_SECURE_OFFSET 0x300 #define RK3588_NBYTES 4 #define RK3588_BURST_NUM 1 #define RK3588_BURST_SHIFT 8 @@ -69,6 +68,7 @@
struct rockchip_data { int size; + int read_offset; const char * const *clks; int num_clks; nvmem_reg_read_t reg_read; @@ -196,7 +196,7 @@ static int rk3588_otp_read(void *context, unsigned int offset, addr_start = round_down(offset, RK3588_NBYTES) / RK3588_NBYTES; addr_end = round_up(offset + bytes, RK3588_NBYTES) / RK3588_NBYTES; addr_len = addr_end - addr_start; - addr_start += RK3588_NO_SECURE_OFFSET; + addr_start += otp->data->read_offset / RK3588_NBYTES;
buf = kzalloc(array_size(addr_len, RK3588_NBYTES), GFP_KERNEL); if (!buf) @@ -280,6 +280,7 @@ static const char * const rk3588_otp_clocks[] = {
static const struct rockchip_data rk3588_data = { .size = 0x400, + .read_offset = 0xc00, .clks = rk3588_otp_clocks, .num_clks = ARRAY_SIZE(rk3588_otp_clocks), .reg_read = rk3588_otp_read,
From: Heiko Stuebner heiko@sntech.de
[ Upstream commit 50d75a13a9ce880a5ef07a4ccc63ba561cc2e69a ]
The variant works very similar to the rk3588, just with a different read-offset and size.
Signed-off-by: Heiko Stuebner heiko@sntech.de Tested-by: Nicolas Frattaroli nicolas.frattaroli@collabora.com Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20250411112251.68002-5-srinivas.kandagatla@linaro.... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvmem/rockchip-otp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index 3edfbfc2d7220..d88f12c532426 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -274,6 +274,14 @@ static const struct rockchip_data px30_data = { .reg_read = px30_otp_read, };
+static const struct rockchip_data rk3576_data = { + .size = 0x100, + .read_offset = 0x700, + .clks = px30_otp_clocks, + .num_clks = ARRAY_SIZE(px30_otp_clocks), + .reg_read = rk3588_otp_read, +}; + static const char * const rk3588_otp_clocks[] = { "otp", "apb_pclk", "phy", "arb", }; @@ -295,6 +303,10 @@ static const struct of_device_id rockchip_otp_match[] = { .compatible = "rockchip,rk3308-otp", .data = &px30_data, }, + { + .compatible = "rockchip,rk3576-otp", + .data = &rk3576_data, + }, { .compatible = "rockchip,rk3588-otp", .data = &rk3588_data,
From: Dmitry Baryshkov dmitry.baryshkov@linaro.org
[ Upstream commit 7a06ef75107799675ea6e4d73b9df37e18e352a8 ]
If the NVMEM specifies a stride to access data, reading particular cell might require bit offset that is bigger than one byte. Rework NVMEM core code to support bit offsets of more than 8 bits.
Signed-off-by: Dmitry Baryshkov dmitry.baryshkov@linaro.org Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20250411112251.68002-9-srinivas.kandagatla@linaro.... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvmem/core.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index fff85bbf0ecd0..7872903c08a11 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -837,7 +837,9 @@ static int nvmem_add_cells_from_dt(struct nvmem_device *nvmem, struct device_nod if (addr && len == (2 * sizeof(u32))) { info.bit_offset = be32_to_cpup(addr++); info.nbits = be32_to_cpup(addr); - if (info.bit_offset >= BITS_PER_BYTE || info.nbits < 1) { + if (info.bit_offset >= BITS_PER_BYTE * info.bytes || + info.nbits < 1 || + info.bit_offset + info.nbits > BITS_PER_BYTE * info.bytes) { dev_err(dev, "nvmem: invalid bits on %pOF\n", child); of_node_put(child); return -EINVAL; @@ -1630,21 +1632,29 @@ EXPORT_SYMBOL_GPL(nvmem_cell_put); static void nvmem_shift_read_buffer_in_place(struct nvmem_cell_entry *cell, void *buf) { u8 *p, *b; - int i, extra, bit_offset = cell->bit_offset; + int i, extra, bytes_offset; + int bit_offset = cell->bit_offset;
p = b = buf; - if (bit_offset) { + + bytes_offset = bit_offset / BITS_PER_BYTE; + b += bytes_offset; + bit_offset %= BITS_PER_BYTE; + + if (bit_offset % BITS_PER_BYTE) { /* First shift */ - *b++ >>= bit_offset; + *p = *b++ >> bit_offset;
/* setup rest of the bytes if any */ for (i = 1; i < cell->bytes; i++) { /* Get bits from next byte and shift them towards msb */ - *p |= *b << (BITS_PER_BYTE - bit_offset); + *p++ |= *b << (BITS_PER_BYTE - bit_offset);
- p = b; - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; } + } else if (p != b) { + memmove(p, b, cell->bytes - bytes_offset); + p += cell->bytes - 1; } else { /* point to the msb */ p += cell->bytes - 1;
From: Dmitry Baryshkov dmitry.baryshkov@linaro.org
[ Upstream commit 13bcd440f2ff38cd7e42a179c223d4b833158b33 ]
Check that the NVMEM cell's raw_len is a aligned to word_size. Otherwise Otherwise drivers might face incomplete read while accessing the last part of the NVMEM cell.
Signed-off-by: Dmitry Baryshkov dmitry.baryshkov@linaro.org Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20250411112251.68002-10-srinivas.kandagatla@linaro... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvmem/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 7872903c08a11..7b8c85f9e035c 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -605,6 +605,18 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, return -EINVAL; }
+ if (!IS_ALIGNED(cell->raw_len, nvmem->word_size)) { + dev_err(&nvmem->dev, + "cell %s raw len %zd unaligned to nvmem word size %d\n", + cell->name ?: "<unknown>", cell->raw_len, + nvmem->word_size); + + if (info->raw_len) + return -EINVAL; + + cell->raw_len = ALIGN(cell->raw_len, nvmem->word_size); + } + return 0; }
From: Dmitry Baryshkov dmitry.baryshkov@linaro.org
[ Upstream commit 6786484223d5705bf7f919c1e5055d478ebeec32 ]
If NVMEM cell uses bit offset or specifies bit truncation, update raw_len manually (following the cell->bytes update), ensuring that the NVMEM access is still word-aligned.
Signed-off-by: Dmitry Baryshkov dmitry.baryshkov@linaro.org Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20250411112251.68002-11-srinivas.kandagatla@linaro... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvmem/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 7b8c85f9e035c..e206efc29a004 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -594,9 +594,11 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, cell->nbits = info->nbits; cell->np = info->np;
- if (cell->nbits) + if (cell->nbits) { cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, BITS_PER_BYTE); + cell->raw_len = ALIGN(cell->bytes, nvmem->word_size); + }
if (!IS_ALIGNED(cell->offset, nvmem->stride)) { dev_err(&nvmem->dev,
From: Dmitry Baryshkov dmitry.baryshkov@linaro.org
[ Upstream commit 3566a737db87a9bf360c2fd36433c5149f805f2e ]
All platforms since Snapdragon 8 Gen1 (SM8450) require using 4-byte reads to access QFPROM data. While older platforms were more than happy with 1-byte reads, change the qfprom driver to use 4-byte reads for all the platforms. Specify stride and word size of 4 bytes. To retain compatibility with the existing DT and to simplify porting data from vendor kernels, use fixup_dt_cell_info in order to bump alignment requirements.
Signed-off-by: Dmitry Baryshkov dmitry.baryshkov@linaro.org Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20250411112251.68002-12-srinivas.kandagatla@linaro... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvmem/qfprom.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c index 116a39e804c70..a872c640b8c5a 100644 --- a/drivers/nvmem/qfprom.c +++ b/drivers/nvmem/qfprom.c @@ -321,19 +321,32 @@ static int qfprom_reg_read(void *context, unsigned int reg, void *_val, size_t bytes) { struct qfprom_priv *priv = context; - u8 *val = _val; - int i = 0, words = bytes; + u32 *val = _val; void __iomem *base = priv->qfpcorrected; + int words = DIV_ROUND_UP(bytes, sizeof(u32)); + int i;
if (read_raw_data && priv->qfpraw) base = priv->qfpraw;
- while (words--) - *val++ = readb(base + reg + i++); + for (i = 0; i < words; i++) + *val++ = readl(base + reg + i * sizeof(u32));
return 0; }
+/* Align reads to word boundary */ +static void qfprom_fixup_dt_cell_info(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell) +{ + unsigned int byte_offset = cell->offset % sizeof(u32); + + cell->bit_offset += byte_offset * BITS_PER_BYTE; + cell->offset -= byte_offset; + if (byte_offset && !cell->nbits) + cell->nbits = cell->bytes * BITS_PER_BYTE; +} + static void qfprom_runtime_disable(void *data) { pm_runtime_disable(data); @@ -358,10 +371,11 @@ static int qfprom_probe(struct platform_device *pdev) struct nvmem_config econfig = { .name = "qfprom", .add_legacy_fixed_of_cells = true, - .stride = 1, - .word_size = 1, + .stride = 4, + .word_size = 4, .id = NVMEM_DEVID_AUTO, .reg_read = qfprom_reg_read, + .fixup_dt_cell_info = qfprom_fixup_dt_cell_info, }; struct device *dev = &pdev->dev; struct resource *res;
From: Dmitry Bogdanov d.bogdanov@yadro.com
[ Upstream commit 7f533cc5ee4c4436cee51dc58e81dfd9c3384418 ]
NOPIN response timer may expire on a deleted connection and crash with such logs:
Did not receive response to NOPIN on CID: 0, failing connection for I_T Nexus (null),i,0x00023d000125,iqn.2017-01.com.iscsi.target,t,0x3d
BUG: Kernel NULL pointer dereference on read at 0x00000000 NIP strlcpy+0x8/0xb0 LR iscsit_fill_cxn_timeout_err_stats+0x5c/0xc0 [iscsi_target_mod] Call Trace: iscsit_handle_nopin_response_timeout+0xfc/0x120 [iscsi_target_mod] call_timer_fn+0x58/0x1f0 run_timer_softirq+0x740/0x860 __do_softirq+0x16c/0x420 irq_exit+0x188/0x1c0 timer_interrupt+0x184/0x410
That is because nopin response timer may be re-started on nopin timer expiration.
Stop nopin timer before stopping the nopin response timer to be sure that no one of them will be re-started.
Signed-off-by: Dmitry Bogdanov d.bogdanov@yadro.com Link: https://lore.kernel.org/r/20241224101757.32300-1-d.bogdanov@yadro.com Reviewed-by: Maurizio Lombardi mlombard@redhat.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/target/iscsi/iscsi_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1244ef3aa86c1..620ba6e0ab075 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4263,8 +4263,8 @@ int iscsit_close_connection( spin_unlock(&iscsit_global->ts_bitmap_lock);
iscsit_stop_timers_for_cmds(conn); - iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + iscsit_stop_nopin_response_timer(conn);
if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn);
From: Manish Pandey quic_mapa@quicinc.com
[ Upstream commit 569330a34a31a52c904239439984a59972c11d28 ]
Samsung UFS devices require additional time in hibern8 mode before exiting, beyond the negotiated handshaking phase between the host and device. Introduce a quirk to increase the PA_HIBERN8TIME parameter by 100 µs, a value derived from experiments, to ensure a proper hibernation process.
Signed-off-by: Manish Pandey quic_mapa@quicinc.com Link: https://lore.kernel.org/r/20250411121630.21330-3-quic_mapa@quicinc.com Reviewed-by: Bean Huo beanhuo@micron.com Reviewed-by: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ufs/core/ufshcd.c | 29 +++++++++++++++++++++++++++++ include/ufs/ufs_quirks.h | 6 ++++++ 2 files changed, 35 insertions(+)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 464f13da259aa..cdd4fd9bb2d15 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -278,6 +278,7 @@ static const struct ufs_dev_quirk ufs_fixups[] = { .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE | + UFS_DEVICE_QUIRK_PA_HIBER8TIME | UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS }, { .wmanufacturerid = UFS_VENDOR_SKHYNIX, .model = UFS_ANY_MODEL, @@ -8384,6 +8385,31 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) return ret; }
+/** + * ufshcd_quirk_override_pa_h8time - Ensures proper adjustment of PA_HIBERN8TIME. + * @hba: per-adapter instance + * + * Some UFS devices require specific adjustments to the PA_HIBERN8TIME parameter + * to ensure proper hibernation timing. This function retrieves the current + * PA_HIBERN8TIME value and increments it by 100us. + */ +static void ufshcd_quirk_override_pa_h8time(struct ufs_hba *hba) +{ + u32 pa_h8time; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_HIBERN8TIME), &pa_h8time); + if (ret) { + dev_err(hba->dev, "Failed to get PA_HIBERN8TIME: %d\n", ret); + return; + } + + /* Increment by 1 to increase hibernation time by 100 µs */ + ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME), pa_h8time + 1); + if (ret) + dev_err(hba->dev, "Failed updating PA_HIBERN8TIME: %d\n", ret); +} + static void ufshcd_tune_unipro_params(struct ufs_hba *hba) { ufshcd_vops_apply_dev_quirks(hba); @@ -8394,6 +8420,9 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE) ufshcd_quirk_tune_host_pa_tactivate(hba); + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_HIBER8TIME) + ufshcd_quirk_override_pa_h8time(hba); }
static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 41ff44dfa1db3..f52de5ed1b3b6 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -107,4 +107,10 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11)
+/* + * Some ufs devices may need more time to be in hibern8 before exiting. + * Enable this quirk to give it an additional 100us. + */ +#define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) + #endif /* UFS_QUIRKS_H_ */
From: Zhongqiu Han quic_zhonhan@quicinc.com
[ Upstream commit 2e2f925fe737576df2373931c95e1a2b66efdfef ]
syzbot reports a data-race when accessing the event_triggered, here is the simplified stack when the issue occurred:
================================================================== BUG: KCSAN: data-race in virtqueue_disable_cb / virtqueue_enable_cb_delayed
write to 0xffff8881025bc452 of 1 bytes by task 3288 on cpu 0: virtqueue_enable_cb_delayed+0x42/0x3c0 drivers/virtio/virtio_ring.c:2653 start_xmit+0x230/0x1310 drivers/net/virtio_net.c:3264 __netdev_start_xmit include/linux/netdevice.h:5151 [inline] netdev_start_xmit include/linux/netdevice.h:5160 [inline] xmit_one net/core/dev.c:3800 [inline]
read to 0xffff8881025bc452 of 1 bytes by interrupt on cpu 1: virtqueue_disable_cb_split drivers/virtio/virtio_ring.c:880 [inline] virtqueue_disable_cb+0x92/0x180 drivers/virtio/virtio_ring.c:2566 skb_xmit_done+0x5f/0x140 drivers/net/virtio_net.c:777 vring_interrupt+0x161/0x190 drivers/virtio/virtio_ring.c:2715 __handle_irq_event_percpu+0x95/0x490 kernel/irq/handle.c:158 handle_irq_event_percpu kernel/irq/handle.c:193 [inline]
value changed: 0x01 -> 0x00 ==================================================================
When the data race occurs, the function virtqueue_enable_cb_delayed() sets event_triggered to false, and virtqueue_disable_cb_split/packed() reads it as false due to the race condition. Since event_triggered is an unreliable hint used for optimization, this should only cause the driver temporarily suggest that the device not send an interrupt notification when the event index is used.
Fix this KCSAN reported data-race issue by explicitly tagging the access as data_racy.
Reported-by: syzbot+efe683d57990864b8c8e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67c7761a.050a0220.15b4b9.0018.GAE@google.com/ Signed-off-by: Zhongqiu Han quic_zhonhan@quicinc.com Message-Id: 20250312130412.3516307-1-quic_zhonhan@quicinc.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Acked-by: Jason Wang jasowang@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fdd2d2b07b5a2..b784aab668670 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2650,7 +2650,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq);
if (vq->event_triggered) - vq->event_triggered = false; + data_race(vq->event_triggered = false);
return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq);
From: Balbir Singh balbirs@nvidia.com
[ Upstream commit 2042c352e21d19eaf5f9e22fb6afce72293ef28c ]
In the debug and resolution of an issue involving forced use of bounce buffers, 7170130e4c72 ("x86/mm/init: Handle the special case of device private pages in add_pages(), to not increase max_pfn and trigger dma_addressing_limited() bounce buffers"). It would have been easier to debug the issue if dma_addressing_limited() had debug information about the device not being able to address all of memory and thus forcing all accesses through a bounce buffer. Please see[2]
Implement dev_dbg to debug the potential use of bounce buffers when we hit the condition. When swiotlb is used, dma_addressing_limited() is used to determine the size of maximum dma buffer size in dma_direct_max_mapping_size(). The debug prints could be triggered in that check as well (when enabled).
Link: https://lore.kernel.org/lkml/20250401000752.249348-1-balbirs@nvidia.com/ [1] Link: https://lore.kernel.org/lkml/20250310112206.4168-1-spasswolf@web.de/ [2]
Cc: Marek Szyprowski m.szyprowski@samsung.com Cc: Robin Murphy robin.murphy@arm.com Cc: "Christian König" christian.koenig@amd.com Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook kees@kernel.org Cc: Bjorn Helgaas bhelgaas@google.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Andy Lutomirski luto@kernel.org Cc: Alex Deucher alexander.deucher@amd.com Cc: Bert Karwatzki spasswolf@web.de Cc: Christoph Hellwig hch@infradead.org
Signed-off-by: Balbir Singh balbirs@nvidia.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Link: https://lore.kernel.org/r/20250414113752.3298276-1-balbirs@nvidia.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/dma/mapping.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index cda127027e48a..67da08fa67237 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -918,7 +918,7 @@ EXPORT_SYMBOL(dma_set_coherent_mask); * the system, else %false. Lack of addressing bits is the prime reason for * bounce buffering, but might not be the only one. */ -bool dma_addressing_limited(struct device *dev) +static bool __dma_addressing_limited(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -930,6 +930,15 @@ bool dma_addressing_limited(struct device *dev) return false; return !dma_direct_all_ram_mapped(dev); } + +bool dma_addressing_limited(struct device *dev) +{ + if (!__dma_addressing_limited(dev)) + return false; + + dev_dbg(dev, "device is DMA addressing limited\n"); + return true; +} EXPORT_SYMBOL_GPL(dma_addressing_limited);
size_t dma_max_mapping_size(struct device *dev)
From: Lorenzo Stoakes lorenzo.stoakes@oracle.com
[ Upstream commit 8e553520596bbd5ce832e26e9d721e6a0c797b8b ]
The struct page->mapping, index fields are deprecated and soon to be only available as part of a folio.
It is likely the intel_th code which sets page->mapping, index is was implemented out of concern that some aspect of the page fault logic may encounter unexpected problems should they not.
However, the appropriate interface for inserting kernel-allocated memory is vm_insert_page() in a VM_MIXEDMAP. By using the helper function vmf_insert_mixed() we can do this with minimal churn in the existing fault handler.
By doing so, we bypass the remainder of the faulting logic. The pages are still pinned so there is no possibility of anything unexpected being done with the pages once established.
It would also be reasonable to pre-map everything on fault, however to minimise churn we retain the fault handler.
We also eliminate all code which clears page->mapping on teardown as this has now become unnecessary.
The MSU code relies on faulting to function correctly, so is by definition dependent on CONFIG_MMU. We avoid spurious reports about compilation failure for unsupported platforms by making this requirement explicit in Kconfig as part of this change too.
Signed-off-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com Acked-by: Alexander Shishkin alexander.shishkin@linux.intel.com Link: https://lore.kernel.org/r/20250331125608.60300-1-lorenzo.stoakes@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hwtracing/intel_th/Kconfig | 1 + drivers/hwtracing/intel_th/msu.c | 31 +++++++----------------------- 2 files changed, 8 insertions(+), 24 deletions(-)
diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig index 4b6359326ede9..4f7d2b6d79e29 100644 --- a/drivers/hwtracing/intel_th/Kconfig +++ b/drivers/hwtracing/intel_th/Kconfig @@ -60,6 +60,7 @@ config INTEL_TH_STH
config INTEL_TH_MSU tristate "Intel(R) Trace Hub Memory Storage Unit" + depends on MMU help Memory Storage Unit (MSU) trace output device enables storing STP traces to system memory. It supports single diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index bf99d79a41920..7163950eb3719 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -19,6 +19,7 @@ #include <linux/io.h> #include <linux/workqueue.h> #include <linux/dma-mapping.h> +#include <linux/pfn_t.h>
#ifdef CONFIG_X86 #include <asm/set_memory.h> @@ -976,7 +977,6 @@ static void msc_buffer_contig_free(struct msc *msc) for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { struct page *page = virt_to_page(msc->base + off);
- page->mapping = NULL; __free_page(page); }
@@ -1158,9 +1158,6 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win) int i;
for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) { - struct page *page = msc_sg_page(sg); - - page->mapping = NULL; dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, sg_virt(sg), sg_dma_address(sg)); } @@ -1601,22 +1598,10 @@ static void msc_mmap_close(struct vm_area_struct *vma) { struct msc_iter *iter = vma->vm_file->private_data; struct msc *msc = iter->msc; - unsigned long pg;
if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex)) return;
- /* drop page _refcounts */ - for (pg = 0; pg < msc->nr_pages; pg++) { - struct page *page = msc_buffer_get_page(msc, pg); - - if (WARN_ON_ONCE(!page)) - continue; - - if (page->mapping) - page->mapping = NULL; - } - /* last mapping -- drop user_count */ atomic_dec(&msc->user_count); mutex_unlock(&msc->buf_mutex); @@ -1626,16 +1611,14 @@ static vm_fault_t msc_mmap_fault(struct vm_fault *vmf) { struct msc_iter *iter = vmf->vma->vm_file->private_data; struct msc *msc = iter->msc; + struct page *page;
- vmf->page = msc_buffer_get_page(msc, vmf->pgoff); - if (!vmf->page) + page = msc_buffer_get_page(msc, vmf->pgoff); + if (!page) return VM_FAULT_SIGBUS;
- get_page(vmf->page); - vmf->page->mapping = vmf->vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - return 0; + get_page(page); + return vmf_insert_mixed(vmf->vma, vmf->address, page_to_pfn_t(page)); }
static const struct vm_operations_struct msc_mmap_ops = { @@ -1676,7 +1659,7 @@ static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&msc->user_count);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP); vma->vm_ops = &msc_mmap_ops; return ret; }
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit 04d3e5461c1f5cf8eec964ab64948ebed826e95e ]
In preparation to closing a race when reading driver pointer in dev_uevent() code, instead of setting device->driver pointer directly introduce device_set_driver() helper.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Reviewed-by: Masami Hiramatsu (Google) mhiramat@kernel.org Link: https://lore.kernel.org/r/20250311052417.1846985-2-dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/base.h | 6 ++++++ drivers/base/core.c | 2 +- drivers/base/dd.c | 7 +++---- 3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/base/base.h b/drivers/base/base.h index 0042e4774b0ce..eb203cf8370bc 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -180,6 +180,12 @@ int driver_add_groups(const struct device_driver *drv, const struct attribute_gr void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev);
+static inline void device_set_driver(struct device *dev, const struct device_driver *drv) +{ + // FIXME - this cast should not be needed "soon" + dev->driver = (struct device_driver *)drv; +} + int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); diff --git a/drivers/base/core.c b/drivers/base/core.c index 2fde698430dff..4a23dc8e2cdaf 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3700,7 +3700,7 @@ int device_add(struct device *dev) device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: - dev->driver = NULL; + device_set_driver(dev, NULL); bus_remove_device(dev); BusError: device_remove_attrs(dev); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0e4b4aba885c..b526e0e0f52d7 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -550,7 +550,7 @@ static void device_unbind_cleanup(struct device *dev) arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - dev->driver = NULL; + device_set_driver(dev, NULL); dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); @@ -629,8 +629,7 @@ static int really_probe(struct device *dev, const struct device_driver *drv) }
re_probe: - // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + device_set_driver(dev, drv);
/* If using pinctrl, bind pins now before probing */ ret = pinctrl_bind_pins(dev); @@ -1014,7 +1013,7 @@ static int __device_attach(struct device *dev, bool allow_async) if (ret == 0) ret = 1; else { - dev->driver = NULL; + device_set_driver(dev, NULL); ret = 0; } } else {
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit 18daa52418e7e4629ed1703b64777294209d2622 ]
If userspace reads "uevent" device attribute at the same time as another threads unbinds the device from its driver, change to dev->driver from a valid pointer to NULL may result in crash. Fix this by using READ_ONCE() when fetching the pointer, and take bus' drivers klist lock to make sure driver instance will not disappear while we access it.
Use WRITE_ONCE() when setting the driver pointer to ensure there is no tearing.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Reviewed-by: Masami Hiramatsu (Google) mhiramat@kernel.org Link: https://lore.kernel.org/r/20250311052417.1846985-3-dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/base.h | 13 ++++++++++++- drivers/base/bus.c | 2 +- drivers/base/core.c | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/drivers/base/base.h b/drivers/base/base.h index eb203cf8370bc..123031a757d91 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -73,6 +73,7 @@ static inline void subsys_put(struct subsys_private *sp) kset_put(&sp->subsys); }
+struct subsys_private *bus_to_subsys(const struct bus_type *bus); struct subsys_private *class_to_subsys(const struct class *class);
struct driver_private { @@ -182,8 +183,18 @@ void device_driver_detach(struct device *dev);
static inline void device_set_driver(struct device *dev, const struct device_driver *drv) { + /* + * Majority (all?) read accesses to dev->driver happens either + * while holding device lock or in bus/driver code that is only + * invoked when the device is bound to a driver and there is no + * concern of the pointer being changed while it is being read. + * However when reading device's uevent file we read driver pointer + * without taking device lock (so we do not block there for + * arbitrary amount of time). We use WRITE_ONCE() here to prevent + * tearing so that READ_ONCE() can safely be used in uevent code. + */ // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + WRITE_ONCE(dev->driver, (struct device_driver *)drv); }
int devres_release_all(struct device *dev); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 6b9e65a42cd2e..c8c7e08040249 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -57,7 +57,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ -static struct subsys_private *bus_to_subsys(const struct bus_type *bus) +struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; diff --git a/drivers/base/core.c b/drivers/base/core.c index 4a23dc8e2cdaf..e65728e99ae42 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2624,6 +2624,35 @@ static const char *dev_uevent_name(const struct kobject *kobj) return NULL; }
+/* + * Try filling "DRIVER=<name>" uevent variable for a device. Because this + * function may race with binding and unbinding the device from a driver, + * we need to be careful. Binding is generally safe, at worst we miss the + * fact that the device is already bound to a driver (but the driver + * information that is delivered through uevents is best-effort, it may + * become obsolete as soon as it is generated anyways). Unbinding is more + * risky as driver pointer is transitioning to NULL, so READ_ONCE() should + * be used to make sure we are dealing with the same pointer, and to + * ensure that driver structure is not going to disappear from under us + * we take bus' drivers klist lock. The assumption that only registered + * driver can be bound to a device, and to unregister a driver bus code + * will take the same lock. + */ +static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (sp) { + scoped_guard(spinlock, &sp->klist_drivers.k_lock) { + struct device_driver *drv = READ_ONCE(dev->driver); + if (drv) + add_uevent_var(env, "DRIVER=%s", drv->name); + } + + subsys_put(sp); + } +} + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); @@ -2655,8 +2684,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
- if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Add "DRIVER=%s" variable if the device is bound to a driver */ + dev_driver_uevent(dev, env);
/* Add common DT information about the device */ of_device_uevent(dev, env);
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit f88c0c72ffb014e5eba676ee337c4eb3b1d6a119 ]
vsc_tp.tx_buf and vsc_tp.rx_buf point to a struct vsc_tp_packet, use the correct type instead of "void *" and use sizeof(*ptr) when allocating memory for these buffers.
Signed-off-by: Hans de Goede hdegoede@redhat.com Reviewed-by: Alexander Usyskin alexander.usyskin@intel.com Reviewed-by: Sakari Ailus sakari.ailus@linux.intel.com Link: https://lore.kernel.org/r/20250318141203.94342-3-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/misc/mei/vsc-tp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 7be1649b19725..40d99ad52210d 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -67,8 +67,8 @@ struct vsc_tp { u32 seq;
/* command buffer */ - void *tx_buf; - void *rx_buf; + struct vsc_tp_packet *tx_buf; + struct vsc_tp_packet *rx_buf;
atomic_t assert_cnt; wait_queue_head_t xfer_wait; @@ -160,7 +160,7 @@ static int vsc_tp_xfer_helper(struct vsc_tp *tp, struct vsc_tp_packet *pkt, { int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet); int next_xfer_len = VSC_TP_PACKET_SIZE(pkt) + VSC_TP_XFER_TIMEOUT_BYTES; - u8 *src, *crc_src, *rx_buf = tp->rx_buf; + u8 *src, *crc_src, *rx_buf = (u8 *)tp->rx_buf; int count_down = VSC_TP_MAX_XFER_COUNT; u32 recv_crc = 0, crc = ~0; struct vsc_tp_packet ack; @@ -320,7 +320,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) guard(mutex)(&tp->mutex);
/* rom xfer is big endian */ - cpu_to_be32_array(tp->tx_buf, obuf, words); + cpu_to_be32_array((u32 *)tp->tx_buf, obuf, words);
ret = read_poll_timeout(gpiod_get_value_cansleep, ret, !ret, VSC_TP_ROM_XFER_POLL_DELAY_US, @@ -336,7 +336,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret;
if (ibuf) - be32_to_cpu_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, (u32 *)tp->rx_buf, words);
return ret; } @@ -490,11 +490,11 @@ static int vsc_tp_probe(struct spi_device *spi) if (!tp) return -ENOMEM;
- tp->tx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->tx_buf = devm_kzalloc(dev, sizeof(*tp->tx_buf), GFP_KERNEL); if (!tp->tx_buf) return -ENOMEM;
- tp->rx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->rx_buf = devm_kzalloc(dev, sizeof(*tp->rx_buf), GFP_KERNEL); if (!tp->rx_buf) return -ENOMEM;
From: Marek Szyprowski m.szyprowski@samsung.com
[ Upstream commit c9b19ea63036fc537a69265acea1b18dabd1cbd3 ]
When CONFIG_NEED_DMA_MAP_STATE is not defined, dma-mapping clients might report unused data compilation warnings for dma_unmap_*() calls arguments. Redefine macros for those calls to let compiler to notice that it is okay when the provided arguments are not used.
Reported-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Suggested-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Tested-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Link: https://lore.kernel.org/r/20250415075659.428549-1-m.szyprowski@samsung.com Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/dma-mapping.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c4333..85ab710ec0e72 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif
#endif /* _LINUX_DMA_MAPPING_H */
From: David Sterba dsterba@suse.com
[ Upstream commit f1ab0171e9be96fd530329fa54761cff5e09ea95 ]
The whole tree checker returns EUCLEAN, except the one check in btrfs_verify_level_key(). This was inherited from the function that was moved from disk-io.c in 2cac5af16537 ("btrfs: move btrfs_verify_level_key into tree-checker.c") but this should be unified with the rest.
Reviewed-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 43979891f7c89..2b66a6130269a 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -2235,7 +2235,7 @@ int btrfs_verify_level_key(struct extent_buffer *eb, btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, check->level, found_level); - return -EIO; + return -EUCLEAN; }
if (!check->has_first_key)
From: gaoxu gaoxu2@honor.com
[ Upstream commit 87c259a7a359e73e6c52c68fcbec79988999b4e6 ]
When adding folio_memcg function call in the zram module for Android16-6.12, the following error occurs during compilation: ERROR: modpost: "cgroup_mutex" [../soc-repo/zram.ko] undefined!
This error is caused by the indirect call to lockdep_is_held(&cgroup_mutex) within folio_memcg. The export setting for cgroup_mutex is controlled by the CONFIG_PROVE_RCU macro. If CONFIG_LOCKDEP is enabled while CONFIG_PROVE_RCU is not, this compilation error will occur.
To resolve this issue, add a parallel macro CONFIG_LOCKDEP control to ensure cgroup_mutex is properly exported when needed.
Signed-off-by: gao xu gaoxu2@honor.com Acked-by: Michal Koutný mkoutny@suse.com Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 81f078c059e86..d1b4409ed1723 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -90,7 +90,7 @@ DEFINE_MUTEX(cgroup_mutex); DEFINE_SPINLOCK(css_set_lock);
-#ifdef CONFIG_PROVE_RCU +#if (defined CONFIG_PROVE_RCU || defined CONFIG_LOCKDEP) EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif
From: Stefano Garzarella sgarzare@redhat.com
[ Upstream commit fec0abf52609c20279243699d08b660c142ce0aa ]
Commit cb380909ae3b ("vhost: return task creation error instead of NULL") changed the return value of vhost_task_create(), but did not update the documentation.
Reflect the change in the documentation: on an error, vhost_task_create() returns an ERR_PTR() and no longer NULL.
Signed-off-by: Stefano Garzarella sgarzare@redhat.com Message-Id: 20250327124435.142831-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/vhost_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 2ef2e1b800916..2f844c279a3e0 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); * @arg: data to be passed to fn and handled_kill * @name: the thread's name * - * This returns a specialized task for use by the vhost layer or NULL on + * This returns a specialized task for use by the vhost layer or ERR_PTR() on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */
From: Dongli Zhang dongli.zhang@oracle.com
[ Upstream commit f591cf9fce724e5075cc67488c43c6e39e8cbe27 ]
The vhost-scsi completion path may access vq->log_base when vq->log_used is already set to false.
vhost-thread QEMU-thread
vhost_scsi_complete_cmd_work() -> vhost_add_used() -> vhost_add_used_n() if (unlikely(vq->log_used)) QEMU disables vq->log_used via VHOST_SET_VRING_ADDR. mutex_lock(&vq->mutex); vq->log_used = false now! mutex_unlock(&vq->mutex);
QEMU gfree(vq->log_base) log_used() -> log_write(vq->log_base)
Assuming the VMM is QEMU. The vq->log_base is from QEMU userpace and can be reclaimed via gfree(). As a result, this causes invalid memory writes to QEMU userspace.
The control queue path has the same issue.
Signed-off-by: Dongli Zhang dongli.zhang@oracle.com Acked-by: Jason Wang jasowang@redhat.com Reviewed-by: Mike Christie michael.christie@oracle.com Message-Id: 20250403063028.16045-2-dongli.zhang@oracle.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vhost/scsi.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 7aeff435c1d87..a71ad7353341e 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -571,6 +571,9 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) int ret;
llnode = llist_del_all(&svq->completion_list); + + mutex_lock(&svq->vq.mutex); + llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd;
@@ -604,6 +607,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_scsi_release_cmd_res(se_cmd); }
+ mutex_unlock(&svq->vq.mutex); + if (signal) vhost_signal(&svq->vs->dev, &svq->vq); } @@ -1297,8 +1302,11 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) else resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+ mutex_lock(&tmf->svq->vq.mutex); vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, tmf->vq_desc, &tmf->resp_iov, resp_code); + mutex_unlock(&tmf->svq->vq.mutex); + vhost_scsi_release_tmf_res(tmf); }
From: Ranjan Kumar ranjan.kumar@broadcom.com
[ Upstream commit b0b7ee3b574a72283399b9232f6190be07f220c0 ]
Ensure event logs are only generated when the debug logging level MPI3_DEBUG_EVENT is enabled. This prevents unnecessary logging.
Signed-off-by: Ranjan Kumar ranjan.kumar@broadcom.com Link: https://lore.kernel.org/r/20250415101546.204018-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index ec5b1ab287177..1cf5a517b4758 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -174,6 +174,9 @@ static void mpi3mr_print_event_data(struct mpi3mr_ioc *mrioc, char *desc = NULL; u16 event;
+ if (!(mrioc->logging_level & MPI3_DEBUG_EVENT)) + return; + event = event_reply->event;
switch (event) {
From: Vladimir Oltean vladimir.oltean@nxp.com
[ Upstream commit 1d587faa5be7e9785b682cc5f58ba8f4100c13ea ]
This small snippet of code ensures that we do something with the array of RX software buffer descriptor elements after passing the skb to the stack. In this case, we see if the other half of the page is reusable, and if so, we "turn around" the buffers, making them directly usable by enetc_refill_rx_ring() without going to enetc_new_page().
We will need to perform this kind of buffer flipping from a new code path, i.e. from XDP_PASS. Currently, enetc_build_skb() does it there buffer by buffer, but in a subsequent change we will stop using enetc_build_skb() for XDP_PASS.
Signed-off-by: Vladimir Oltean vladimir.oltean@nxp.com Reviewed-by: Wei Fang wei.fang@nxp.com Link: https://patch.msgid.link/20250417120005.3288549-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/freescale/enetc/enetc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 2106861463e40..15f510d0913d3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1850,6 +1850,16 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, } }
+static void enetc_bulk_flip_buff(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + enetc_flip_rx_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } +} + static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, struct napi_struct *napi, int work_limit, struct bpf_prog *prog) @@ -1965,11 +1975,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_xdp_drop(rx_ring, orig_i, i); rx_ring->stats.xdp_redirect_failures++; } else { - while (orig_i != i) { - enetc_flip_rx_buff(rx_ring, - &rx_ring->rx_swbd[orig_i]); - enetc_bdr_idx_inc(rx_ring, &orig_i); - } + enetc_bulk_flip_buff(rx_ring, orig_i, i); xdp_redirect_frm_cnt++; rx_ring->stats.xdp_redirect++; }
From: Balbir Singh balbirs@nvidia.com
[ Upstream commit cae5572ec9261f752af834cdaaf5a0ba0afcf256 ]
lkp reported a warning about missing prototype for a recent patch.
The kernel-doc style comments are out of sync, move them to the right function.
Cc: Marek Szyprowski m.szyprowski@samsung.com Cc: Christoph Hellwig hch@lst.de
Reported-by: kernel test robot lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202504190615.g9fANxHw-lkp@intel.com/
Signed-off-by: Balbir Singh balbirs@nvidia.com [mszyprow: reformatted subject] Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Link: https://lore.kernel.org/r/20250422114034.3535515-1-balbirs@nvidia.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/dma/mapping.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 67da08fa67237..051a32988040f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -910,14 +910,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask);
-/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ static bool __dma_addressing_limited(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -931,6 +923,14 @@ static bool __dma_addressing_limited(struct device *dev) return !dma_direct_all_ram_mapped(dev); }
+/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ bool dma_addressing_limited(struct device *dev) { if (!__dma_addressing_limited(dev))
From: Frederick Lawler fred@cloudflare.com
[ Upstream commit 30d68cb0c37ebe2dc63aa1d46a28b9163e61caa2 ]
On IMA policy update, if a measure rule exists in the policy, IMA_MEASURE is set for ima_policy_flags which makes the violation_check variable always true. Coupled with a no-action on MAY_READ for a FILE_CHECK call, we're always taking the inode_lock().
This becomes a performance problem for extremely heavy read-only workloads. Therefore, prevent this only in the case there's no action to be taken.
Signed-off-by: Frederick Lawler fred@cloudflare.com Acked-by: Roberto Sassu roberto.sassu@huawei.com Signed-off-by: Mimi Zohar zohar@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/integrity/ima/ima_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f3e7ac513db3f..f99ab1a3b0f09 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -245,7 +245,9 @@ static int process_measurement(struct file *file, const struct cred *cred, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && - (ima_policy_flag & IMA_MEASURE)); + (ima_policy_flag & IMA_MEASURE) && + ((action & IMA_MEASURE) || + (file->f_mode & FMODE_WRITE))); if (!action && !violation_check) return 0;
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit 7ffe3de53a885dbb5836541c2178bd07d1bad7df ]
Callers of __find_get_block() may or may not allow for blocking semantics, and is currently assumed that it will not. Layout two paths based on this. The the private_lock scheme will continued to be used for atomic contexts. Otherwise take the folio lock instead, which protects the buffers, such as vs migration and try_to_free_buffers().
Per the "hack idea", the latter can alleviate contention on the private_lock for bdev mappings. For reasons of determinism and avoid making bugs hard to reproduce, the trylocking is not attempted.
No change in semantics. All lookup users still take the spinlock.
Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Davidlohr Bueso dave@stgolabs.net Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-2-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/buffer.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c index cc8452f602516..a03c245022dcf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_write_sync);
-/* - * Various filesystems appear to want __find_get_block to be non-blocking. - * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the blockdev mapping's - * i_private_lock. - * - * Hack idea: for the blockdev mapping, i_private_lock contention - * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_private_lock. - */ static struct buffer_head * -__find_get_block_slow(struct block_device *bdev, sector_t block) +__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; @@ -204,7 +194,16 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (IS_ERR(folio)) goto out;
- spin_lock(&bd_mapping->i_private_lock); + /* + * Folio lock protects the buffers. Callers that cannot block + * will fallback to serializing vs try_to_free_buffers() via + * the i_private_lock. + */ + if (atomic) + spin_lock(&bd_mapping->i_private_lock); + else + folio_lock(folio); + head = folio_buffers(folio); if (!head) goto out_unlock; @@ -236,7 +235,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) 1 << blkbits); } out_unlock: - spin_unlock(&bd_mapping->i_private_lock); + if (atomic) + spin_unlock(&bd_mapping->i_private_lock); + else + folio_unlock(folio); folio_put(folio); out: return ret; @@ -1388,14 +1390,15 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) * it in the LRU and mark it as accessed. If it is not present then return * NULL */ -struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +static struct buffer_head * +find_get_block_common(struct block_device *bdev, sector_t block, + unsigned size, bool atomic) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ - bh = __find_get_block_slow(bdev, block); + bh = __find_get_block_slow(bdev, block, atomic); if (bh) bh_lru_install(bh); } else @@ -1403,6 +1406,12 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
return bh; } + +struct buffer_head * +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +{ + return find_get_block_common(bdev, block, size, true); +} EXPORT_SYMBOL(__find_get_block);
/**
On Tue, Apr 29, 2025 at 07:49:53PM -0400, Sasha Levin wrote:
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit 7ffe3de53a885dbb5836541c2178bd07d1bad7df ]
Callers of __find_get_block() may or may not allow for blocking semantics, and is currently assumed that it will not. Layout two paths based on this. The the private_lock scheme will continued to be used for atomic contexts. Otherwise take the folio lock instead, which protects the buffers, such as vs migration and try_to_free_buffers().
Per the "hack idea", the latter can alleviate contention on the private_lock for bdev mappings. For reasons of determinism and avoid making bugs hard to reproduce, the trylocking is not attempted.
No change in semantics. All lookup users still take the spinlock.
This is pushing it. I would not expect this to go to stable. At all.
BTW you had mentioned the code for auto-sel would be published a while ago, is it available anywhere?
Luis
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit 2814a7d3d2ff5d2cdd22936f641f758fdb971fa0 ]
Add __find_get_block_nonatomic() and sb_find_get_block_nonatomic() calls for which users will be converted where safe. These versions will take the folio lock instead of the mapping's private_lock.
Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Davidlohr Bueso dave@stgolabs.net Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-3-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/buffer.c | 9 +++++++++ include/linux/buffer_head.h | 8 ++++++++ 2 files changed, 17 insertions(+)
diff --git a/fs/buffer.c b/fs/buffer.c index a03c245022dcf..7981097c846d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1414,6 +1414,15 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__find_get_block);
+/* same as __find_get_block() but allows sleeping contexts */ +struct buffer_head * +__find_get_block_nonatomic(struct block_device *bdev, sector_t block, + unsigned size) +{ + return find_get_block_common(bdev, block, size, false); +} +EXPORT_SYMBOL(__find_get_block_nonatomic); + /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 932139c5d46f5..ffcd76d977703 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -223,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -398,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); }
+static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) {
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit 5b67d43976828dea2394eae2556b369bb7a61f64 ]
Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept.
Convert write_boundary_block() which already takes the buffer lock as well as bdev_getblk() depending on the respective gpf flags. There are no changes in semantics.
Suggested-by: Jan Kara jack@suse.cz Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Davidlohr Bueso dave@stgolabs.net Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-4-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev # [0] [1] Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/buffer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c index 7981097c846d4..2494fe3a5e69e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -658,7 +658,9 @@ EXPORT_SYMBOL(generic_buffers_fsync); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { - struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); + struct buffer_head *bh; + + bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); @@ -1440,7 +1442,12 @@ EXPORT_SYMBOL(__find_get_block_nonatomic); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + struct buffer_head *bh; + + if (gfpflags_allow_blocking(gfp)) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size);
might_alloc(gfp); if (bh)
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit a0b5ff07491010789fcb012bc8f9dad9d26f9a8b ]
This is a path that allows for blocking as it does IO. Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept.
Suggested-by: Jan Kara jack@suse.cz Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Davidlohr Bueso dave@stgolabs.net Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-5-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ocfs2/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f1b4b3e611cb9..c7a9729dc9d08 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1249,7 +1249,7 @@ static int ocfs2_force_read_journal(struct inode *inode) }
for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh)
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit f76d4c28a46a9260d85e00dafc8f46d369365d33 ]
Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept.
- jbd2_journal_revoke(): can sleep (has might_sleep() in the beginning)
- jbd2_journal_cancel_revoke(): only used from do_get_write_access() and do_get_create_access() which do sleep. So can sleep.
- jbd2_clear_buffer_revoked_flags() - only called from journal commit code which sleeps. So can sleep.
Suggested-by: Jan Kara jack@suse.cz Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Davidlohr Bueso dave@stgolabs.net Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-6-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/jbd2/revoke.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index ce63d5fde9c3a..f68fc8c255f00 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, bh = bh_in;
if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } @@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
/* If there is a different buffer_head lying around in * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh2 = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) @@ -466,7 +468,8 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, + bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); @@ -495,9 +498,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal) struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; - bh = __find_get_block(journal->j_fs_dev, - record->blocknr, - journal->j_blocksize); + bh = __find_get_block_nonatomic(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh);
From: Davidlohr Bueso dave@stgolabs.net
[ Upstream commit 6e8f57fd09c9fb569d10b2ccc3878155b702591a ]
Enable ext4_free_blocks() to use it, which has a cond_resched to begin with. Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept.
Suggested-by: Jan Kara jack@suse.cz Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Davidlohr Bueso dave@stgolabs.net Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-7-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/mballoc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b25a27c866969..d6f1e61c6dc82 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6644,7 +6644,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, for (i = 0; i < count; i++) { cond_resched(); if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); + bh = sb_find_get_block_nonatomic(inode->i_sb, + block + i); ext4_forget(handle, is_metadata, inode, bh, block + i); } }
From: Nicholas Susanto nsusanto@amd.com
[ Upstream commit 756c85e4d0ddc497b4ad5b1f41ad54e838e06188 ]
[Why]
Urgent latency adjustment was disabled on DCN35 due to issues with P0 enablement on some platforms. Without urgent latency, underflows occur when doing certain high timing configurations. After testing, we found that reenabling urgent latency didn't reintroduce p0 support on multiple platforms.
[How]
renable urgent latency on DCN35 and setting it to 3000 Mhz.
This reverts commit 3412860cc4c0c484f53f91b371483e6e4440c3e5.
Reviewed-by: Charlene Liu charlene.liu@amd.com Signed-off-by: Nicholas Susanto nsusanto@amd.com Signed-off-by: Zaeem Mohamed zaeem.mohamed@amd.com Tested-by: Mark Broadworth mark.broadworth@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com (cherry picked from commit cd74ce1f0cddffb3f36d0995d0f61e89f0010738) Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 47d785204f29c..beed7adbbd43e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 0, + .do_urgent_latency_adjustment = 1, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, };
void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
From: Felix Kuehling felix.kuehling@amd.com
[ Upstream commit a92741e72f91b904c1d8c3d409ed8dbe9c1f2b26 ]
If peer memory is accessible through XGMI, allow leaving it in VRAM rather than forcing its migration to GTT on DMABuf attachment.
Signed-off-by: Felix Kuehling felix.kuehling@amd.com Tested-by: Hao (Claire) Zhou hao.zhou@amd.com Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com (cherry picked from commit 372c8d72c3680fdea3fbb2d6b089f76b4a6d596a) Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index c9842a0e2a1cd..cb043296f9aec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h>
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false;
amdgpu_vm_bo_update_shared(bo); @@ -459,6 +484,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj;
+ if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf;
From: Ihor Solodrai ihor.solodrai@linux.dev
[ Upstream commit f2858f308131a09e33afb766cd70119b5b900569 ]
"sockmap_ktls disconnect_after_delete" test has been failing on BPF CI after recent merges from netdev: * https://github.com/kernel-patches/bpf/actions/runs/14458537639 * https://github.com/kernel-patches/bpf/actions/runs/14457178732
It happens because disconnect has been disabled for TLS [1], and it renders the test case invalid.
Removing all the test code creates a conflict between bpf and bpf-next, so for now only remove the offending assert [2].
The test will be removed later on bpf-next.
[1] https://lore.kernel.org/netdev/20250404180334.3224206-1-kuba@kernel.org/ [2] https://lore.kernel.org/bpf/cfc371285323e1a3f3b006bfcf74e6cf7ad65258@linux.d...
Signed-off-by: Ihor Solodrai ihor.solodrai@linux.dev Signed-off-by: Andrii Nakryiko andrii@kernel.org Reviewed-by: Jiayuan Chen jiayuan.chen@linux.dev Link: https://lore.kernel.org/bpf/20250416170246.2438524-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862a..0a99fd404f6dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli;
err = disconnect(cli); - ASSERT_OK(err, "disconnect");
close_cli: close(cli);
From: "Darrick J. Wong" djwong@kernel.org
[ Upstream commit c0e473a0d226479e8e925d5ba93f751d8df628e9 ]
With the new large sector size support, it's now the case that set_blocksize can change i_blksize and the folio order in a manner that conflicts with a concurrent reader and causes a kernel crash.
Specifically, let's say that udev-worker calls libblkid to detect the labels on a block device. The read call can create an order-0 folio to read the first 4096 bytes from the disk. But then udev is preempted.
Next, someone tries to mount an 8k-sectorsize filesystem from the same block device. The filesystem calls set_blksize, which sets i_blksize to 8192 and the minimum folio order to 1.
Now udev resumes, still holding the order-0 folio it allocated. It then tries to schedule a read bio and do_mpage_readahead tries to create bufferheads for the folio. Unfortunately, blocks_per_folio == 0 because the page size is 4096 but the blocksize is 8192 so no bufferheads are attached and the bh walk never sets bdev. We then submit the bio with a NULL block device and crash.
Therefore, truncate the page cache after flushing but before updating i_blksize. However, that's not enough -- we also need to lock out file IO and page faults during the update. Take both the i_rwsem and the invalidate_lock in exclusive mode for invalidations, and in shared mode for read/write operations.
I don't know if this is the correct fix, but xfs/259 found it.
Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Luis Chamberlain mcgrof@kernel.org Tested-by: Shin'ichiro Kawasaki shinichiro.kawasaki@wdc.com Link: https://lore.kernel.org/r/174543795699.4139148.2086129139322431423.stgit@fro... Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/bdev.c | 17 +++++++++++++++++ block/blk-zoned.c | 5 ++++- block/fops.c | 16 ++++++++++++++++ block/ioctl.c | 6 ++++++ 4 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/block/bdev.c b/block/bdev.c index 9d73a8fbf7f99..06b8cab31d759 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -167,9 +167,26 @@ int set_blocksize(struct file *file, int size)
/* Don't change the size if it is same as current */ if (inode->i_blkbits != blksize_bits(size)) { + /* + * Flush and truncate the pagecache before we reconfigure the + * mapping geometry because folio sizes are variable now. If a + * reader has already allocated a folio whose size is smaller + * than the new min_order but invokes readahead after the new + * min_order becomes visible, readahead will think there are + * "zero" blocks per folio and crash. Take the inode and + * invalidation locks to avoid racing with + * read/write/fallocate. + */ + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + sync_blockdev(bdev); + kill_bdev(bdev); + inode->i_blkbits = blksize_bits(size); kill_bdev(bdev); + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); } return 0; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0c77244a35c92..8f15d1aa6eb89 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, op = REQ_OP_ZONE_RESET;
/* Invalidate the page cache, including dirty pages. */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) @@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
fail: - if (cmd == BLKRESETZONE) + if (cmd == BLKRESETZONE) { filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + }
return ret; } diff --git a/block/fops.c b/block/fops.c index be9f1dbea9ce0..e221fdcaa8aaf 100644 --- a/block/fops.c +++ b/block/fops.c @@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = direct_write_fallback(iocb, from, ret, blkdev_buffered_write(iocb, from)); } else { + /* + * Take i_rwsem and invalidate_lock to avoid racing with + * set_blocksize changing i_blkbits/folio order and punching + * out the pagecache. + */ + inode_lock_shared(bd_inode); ret = blkdev_buffered_write(iocb, from); + inode_unlock_shared(bd_inode); }
if (ret > 0) @@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct inode *bd_inode = bdev_file_inode(iocb->ki_filp); struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); loff_t size = bdev_nr_bytes(bdev); loff_t pos = iocb->ki_pos; @@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) goto reexpand; }
+ /* + * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize + * changing i_blkbits/folio order and punching out the pagecache. + */ + inode_lock_shared(bd_inode); ret = filemap_read(iocb, to, ret); + inode_unlock_shared(bd_inode);
reexpand: if (unlikely(shorted)) @@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL;
+ inode_lock(inode); filemap_invalidate_lock(inode->i_mapping);
/* @@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
fail: filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); return error; }
diff --git a/block/ioctl.c b/block/ioctl.c index 6554b728bae6a..919066b4bb49c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -141,6 +141,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (err) return err;
+ inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) @@ -173,6 +174,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, blk_finish_plug(&plug); fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; }
@@ -198,12 +200,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, end > bdev_nr_bytes(bdev)) return -EINVAL;
+ inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; }
@@ -235,6 +239,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL;
/* Invalidate the page cache, including dirty pages */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) @@ -245,6 +250,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; }
From: "Darrick J. Wong" djwong@kernel.org
[ Upstream commit e03463d247ddac66e71143468373df3d74a3a6bd ]
Hoist the block size validation code to bdev_validate_blocksize so that we can call it from filesystems that don't care about the bdev pagecache manipulations of set_blocksize.
Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Luis Chamberlain mcgrof@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/174543795720.4139148.840349813093799165.stgit@frog... Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/bdev.c | 33 +++++++++++++++++++++++++++------ include/linux/blkdev.h | 1 + 2 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c index 06b8cab31d759..e5af18bc43082 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -150,18 +150,39 @@ static void set_init_blocksize(struct block_device *bdev) BD_INODE(bdev)->i_blkbits = blksize_bits(bsize); }
-int set_blocksize(struct file *file, int size) +/** + * bdev_validate_blocksize - check that this block size is acceptable + * @bdev: blockdevice to check + * @block_size: block size to check + * + * For block device users that do not use buffer heads or the block device + * page cache, make sure that this block size can be used with the device. + * + * Return: On success zero is returned, negative error code on failure. + */ +int bdev_validate_blocksize(struct block_device *bdev, int block_size) { - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = I_BDEV(inode); - - if (blk_validate_block_size(size)) + if (blk_validate_block_size(block_size)) return -EINVAL;
/* Size cannot be smaller than the size supported by the device */ - if (size < bdev_logical_block_size(bdev)) + if (block_size < bdev_logical_block_size(bdev)) return -EINVAL;
+ return 0; +} +EXPORT_SYMBOL_GPL(bdev_validate_blocksize); + +int set_blocksize(struct file *file, int size) +{ + struct inode *inode = file->f_mapping->host; + struct block_device *bdev = I_BDEV(inode); + int ret; + + ret = bdev_validate_blocksize(bdev, size); + if (ret) + return ret; + if (!file->private_data) return -EINVAL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d37751789bf58..ef98bcca7f5f7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1593,6 +1593,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); }
+int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size);
int lookup_bdev(const char *pathname, dev_t *dev);
From: Pavel Begunkov asml.silence@gmail.com
[ Upstream commit 5e16f1a68d28965c12b6fa227a306fef8a680f84 ]
io_req_post_cqe() sets submit_state.cq_flush so that *flush_completions() can take care of batch commiting CQEs. Don't commit it twice by using __io_cq_unlock_post().
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Link: https://lore.kernel.org/r/41c416660c509cee676b6cad96081274bcb459f3.174549386... Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- io_uring/io_uring.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7370f763346f4..1421ada5b0330 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -877,10 +877,15 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock);
- __io_cq_lock(ctx); - posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + if (!ctx->lockless_cq) { + spin_lock(&ctx->completion_lock); + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + spin_unlock(&ctx->completion_lock); + } else { + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + } + ctx->submit_state.cq_flush = true; - __io_cq_unlock_post(ctx); return posted; }
From: Brandon Kammerdiener brandon.kammerdiener@intel.com
[ Upstream commit 75673fda0c557ae26078177dd14d4857afbf128d ]
The _safe variant used here gets the next element before running the callback, avoiding the endless loop condition.
Signed-off-by: Brandon Kammerdiener brandon.kammerdiener@intel.com Link: https://lore.kernel.org/r/20250424153246.141677-2-brandon.kammerdiener@intel... Signed-off-by: Alexei Starovoitov ast@kernel.org Acked-by: Hou Tao houtao1@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/hashtab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4a9eeb7aef855..43574b0495c30 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2224,7 +2224,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ b = &htab->buckets[i]; rcu_read_lock(); head = &b->head; - hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { key = elem->key; if (is_percpu) { /* current cpu value for percpu map */
From: Haoran Jiang jianghaoran@kylinos.cn
[ Upstream commit 548762f05d19c5542db7590bcdfb9be1fb928376 ]
When building the latest samples/bpf on LoongArch Fedora
make M=samples/bpf
There are compilation errors as follows:
In file included from ./linux/samples/bpf/sockex2_kern.c:2: In file included from ./include/uapi/linux/in.h:25: In file included from ./include/linux/socket.h:8: In file included from ./include/linux/uio.h:9: In file included from ./include/linux/thread_info.h:60: In file included from ./arch/loongarch/include/asm/thread_info.h:15: In file included from ./arch/loongarch/include/asm/processor.h:13: In file included from ./arch/loongarch/include/asm/cpu-info.h:11: ./arch/loongarch/include/asm/loongarch.h:13:10: fatal error: 'larchintrin.h' file not found ^~~~~~~~~~~~~~~ 1 error generated.
larchintrin.h is included in /usr/lib64/clang/14.0.6/include, and the header file location is specified at compile time.
Test on LoongArch Fedora: https://github.com/fedora-remix-loongarch/releases-info
Signed-off-by: Haoran Jiang jianghaoran@kylinos.cn Signed-off-by: zhangxi zhangxi@kylinos.cn Signed-off-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/bpf/20250425095042.838824-1-jianghaoran@kylinos.cn Signed-off-by: Sasha Levin sashal@kernel.org --- samples/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5b632635e00dd..95a4fa1f1e447 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -376,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \
linux-stable-mirror@lists.linaro.org