We found some discrete AMD graphics devices hide funtion 0 and the whole
is not supposed to be probed.
Since our original purpose is to allow integrated devices (on bus 0) to
be probed without function 0, we can limit the islolated function probing
only on bus 0.
Cc: stable(a)vger.kernel.org
Fixes: a02fd05661d73a8 ("PCI: Extend isolated function probing to LoongArch")
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
Resend to correct the subject line.
drivers/pci/probe.c | 2 +-
include/linux/hypervisor.h | 8 +++++---
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c83e75a0ec12..da6a2aef823a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2883,7 +2883,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
* a hypervisor that passes through individual PCI
* functions.
*/
- if (!hypervisor_isolated_pci_functions())
+ if (!hypervisor_isolated_pci_functions(bus->number))
break;
}
fn = next_fn(bus, dev, fn);
diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
index be5417303ecf..30ece04a16d9 100644
--- a/include/linux/hypervisor.h
+++ b/include/linux/hypervisor.h
@@ -32,13 +32,15 @@ static inline bool jailhouse_paravirt(void)
#endif /* !CONFIG_X86 */
-static inline bool hypervisor_isolated_pci_functions(void)
+static inline bool hypervisor_isolated_pci_functions(int bus)
{
if (IS_ENABLED(CONFIG_S390))
return true;
- if (IS_ENABLED(CONFIG_LOONGARCH))
- return true;
+ if (IS_ENABLED(CONFIG_LOONGARCH)) {
+ if (bus == 0)
+ return true;
+ }
return jailhouse_paravirt();
}
--
2.47.3
Each damon_ctx maintains callback requests using a linked list
(damon_ctx->call_controls). When a new callback request is received via
damon_call(), the new request should be added to the list. However, the
function is making a mistake at list_add_tail() invocation: putting the
new item to add and the list head to add it before, in the opposite
order. Because of the linked list manipulation implementation, the new
request can still be reached from the context's list head. But the list
items that were added before the new request are dropped from the list.
As a result, the callbacks are unexpectedly not invocated. Worse yet,
if the dropped callback requests were dynamically allocated, the memory
is leaked. Actually DAMON sysfs interface is using a dynamically
allocated repeat-mode callback request for automatic essential stats
update. And because the online DAMON parameters commit is using
a non-repeat-mode callback request, the issue can easily be reproduced,
like below.
# damo start --damos_action stat --refresh_stat 1s
# damo tune --damos_action stat --refresh_stat 1s
The first command dynamically allocates the repeat-mode callback request
for automatic essential stat update. Users can see the essential stats
are automatically updated for every second, using the sysfs interface.
The second command calls damon_commit() with a new callback request that
was made for the commit. As a result, the previously added repeat-mode
callback request is dropped from the list. The automatic stats refresh
stops working, and the memory for the repeat-mode callback request is
leaked. It can be confirmed using kmemleak.
Fix the mistake on the list_add_tail() call.
Fixes: 004ded6bee11 ("mm/damon: accept parallel damon_call() requests")
Cc: <stable(a)vger.kernel.org> # 6.17.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 417f33a7868e..109b050c795a 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1453,7 +1453,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
INIT_LIST_HEAD(&control->list);
mutex_lock(&ctx->call_controls_lock);
- list_add_tail(&ctx->call_controls, &control->list);
+ list_add_tail(&control->list, &ctx->call_controls);
mutex_unlock(&ctx->call_controls_lock);
if (!damon_is_running(ctx))
return -EINVAL;
base-commit: 49926cfb24ad064c8c26f8652e8a61bbcde37701
--
2.47.3
When building for 32-bit platforms, for which 'size_t' is
'unsigned int', there are a couple instances of -Wformat:
fs/smb/client/misc.c:922:25: error: format specifies type 'unsigned long' but the argument has type 'unsigned int' [-Werror,-Wformat]
921 | "%s: header is malformed (size is %u, must be %lu)\n",
| ~~~
| %u
922 | __func__, rsp_size, sizeof(*rsp));
| ^~~~~~~~~~~~
fs/smb/client/misc.c:940:5: error: format specifies type 'unsigned long' but the argument has type 'unsigned int' [-Werror,-Wformat]
938 | "%s: malformed buffer (size is %u, must be at least %lu)\n",
| ~~~
| %u
939 | __func__, rsp_size,
940 | sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3));
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use the proper 'size_t' format specifier, '%zu', to clear up these
warnings.
Cc: stable(a)vger.kernel.org
Fixes: c1047752ed9f ("cifs: parse_dfs_referrals: prevent oob on malformed input")
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
Feel free to squash this into the original change to make backporting
easier. I included the tags in case rebasing was not an option.
---
fs/smb/client/misc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 987f0ca73123..e10123d8cd7d 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -918,7 +918,7 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
if (rsp_size < sizeof(*rsp)) {
cifs_dbg(VFS | ONCE,
- "%s: header is malformed (size is %u, must be %lu)\n",
+ "%s: header is malformed (size is %u, must be %zu)\n",
__func__, rsp_size, sizeof(*rsp));
rc = -EINVAL;
goto parse_DFS_referrals_exit;
@@ -935,7 +935,7 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) {
cifs_dbg(VFS | ONCE,
- "%s: malformed buffer (size is %u, must be at least %lu)\n",
+ "%s: malformed buffer (size is %u, must be at least %zu)\n",
__func__, rsp_size,
sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3));
rc = -EINVAL;
---
base-commit: 4e47319b091f90d5776efe96d6c198c139f34883
change-id: 20251014-smb-client-fix-wformat-32b-parse_dfs_referrals-189b8c6fdf75
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3d3c4cd5c62f24bb3cb4511b7a95df707635e00a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101349-reptile-seldom-427d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3d3c4cd5c62f24bb3cb4511b7a95df707635e00a Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel(a)pengutronix.de>
Date: Sun, 5 Oct 2025 10:12:03 +0200
Subject: [PATCH] net: usb: asix: hold PM usage ref to avoid PM/MDIO + RTNL
deadlock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Prevent USB runtime PM (autosuspend) for AX88772* in bind.
usbnet enables runtime PM (autosuspend) by default, so disabling it via
the usb_driver flag is ineffective. On AX88772B, autosuspend shows no
measurable power saving with current driver (no link partner, admin
up/down). The ~0.453 W -> ~0.248 W drop on v6.1 comes from phylib powering
the PHY off on admin-down, not from USB autosuspend.
The real hazard is that with runtime PM enabled, ndo_open() (under RTNL)
may synchronously trigger autoresume (usb_autopm_get_interface()) into
asix_resume() while the USB PM lock is held. Resume paths then invoke
phylink/phylib and MDIO, which also expect RTNL, leading to possible
deadlocks or PM lock vs MDIO wake issues.
To avoid this, keep the device runtime-PM active by taking a usage
reference in ax88772_bind() and dropping it in unbind(). A non-zero PM
usage count blocks runtime suspend regardless of userspace policy
(.../power/control - pm_runtime_allow/forbid), making this approach
robust against sysfs overrides.
Holding a runtime-PM usage ref does not affect system-wide suspend;
system sleep/resume callbacks continue to run as before.
Fixes: 4a2c7217cd5a ("net: usb: asix: ax88772: manage PHY PM from MAC")
Reported-by: Hubert Wiśniewski <hubert.wisniewski.25632(a)gmail.com>
Closes: https://lore.kernel.org/all/DCGHG5UJT9G3.2K1GHFZ3H87T0@gmail.com
Tested-by: Hubert Wiśniewski <hubert.wisniewski.25632(a)gmail.com>
Reported-by: Marek Szyprowski <m.szyprowski(a)samsung.com>
Closes: https://lore.kernel.org/all/b5ea8296-f981-445d-a09a-2f389d7f6fdd@samsung.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Link: https://patch.msgid.link/20251005081203.3067982-1-o.rempel@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 792ddda1ad49..85bd5d845409 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -625,6 +625,21 @@ static void ax88772_suspend(struct usbnet *dev)
asix_read_medium_status(dev, 1));
}
+/* Notes on PM callbacks and locking context:
+ *
+ * - asix_suspend()/asix_resume() are invoked for both runtime PM and
+ * system-wide suspend/resume. For struct usb_driver the ->resume()
+ * callback does not receive pm_message_t, so the resume type cannot
+ * be distinguished here.
+ *
+ * - The MAC driver must hold RTNL when calling phylink interfaces such as
+ * phylink_suspend()/resume(). Those calls will also perform MDIO I/O.
+ *
+ * - Taking RTNL and doing MDIO from a runtime-PM resume callback (while
+ * the USB PM lock is held) is fragile. Since autosuspend brings no
+ * measurable power saving here, we block it by holding a PM usage
+ * reference in ax88772_bind().
+ */
static int asix_suspend(struct usb_interface *intf, pm_message_t message)
{
struct usbnet *dev = usb_get_intfdata(intf);
@@ -919,6 +934,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
if (ret)
goto initphy_err;
+ /* Keep this interface runtime-PM active by taking a usage ref.
+ * Prevents runtime suspend while bound and avoids resume paths
+ * that could deadlock (autoresume under RTNL while USB PM lock
+ * is held, phylink/MDIO wants RTNL).
+ */
+ pm_runtime_get_noresume(&intf->dev);
+
return 0;
initphy_err:
@@ -948,6 +970,8 @@ static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf)
phylink_destroy(priv->phylink);
ax88772_mdio_unregister(priv);
asix_rx_fixup_common_free(dev->driver_priv);
+ /* Drop the PM usage ref taken in bind() */
+ pm_runtime_put(&intf->dev);
}
static void ax88178_unbind(struct usbnet *dev, struct usb_interface *intf)
@@ -1600,6 +1624,11 @@ static struct usb_driver asix_driver = {
.resume = asix_resume,
.reset_resume = asix_resume,
.disconnect = usbnet_disconnect,
+ /* usbnet enables autosuspend by default (supports_autosuspend=1).
+ * We keep runtime-PM active for AX88772* by taking a PM usage
+ * reference in ax88772_bind() (pm_runtime_get_noresume()) and
+ * dropping it in unbind(), which effectively blocks autosuspend.
+ */
.supports_autosuspend = 1,
.disable_hub_initiated_lpm = 1,
};
Reduce la rotación y retén a tu talento clave
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Evita la rotación de personal y retén talento clave con PsicoSmart.
Hola, ,
¿Te ha pasado que tus mejores empleados renuncian poco después de ser contratados, dejando vacantes y generando costos inesperados?
Confiar únicamente en entrevistas o currículums no siempre garantiza que un candidato encaje en tu empresa. Por eso quiero presentarte PsicoSmart, una herramienta que ayuda a tomar decisiones de selección basadas en datos, reduciendo sorpresas y rotación de personal.
Con PsicoSmart puedes:
Evaluar 31 competencias psicométricas, incluyendo liderazgo, comunicación, honestidad e inteligencia emocional.
Validar conocimientos técnicos con más de 2,500 exámenes especializados.
Verificar la identidad de quien responde mediante captura fotográfica automática durante la evaluación.
Gestionar todo desde una sola plataforma, accesible desde cualquier dispositivo.
Reducir la rotación y retener talento clave está al alcance de un clic. Si quieres conocer más, puedes responder este correo o contactarme directamente, mis datos están abajo.
Saludos,
--------------
Atte.: Valeria Pérez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewispotseup">click aquí</a>
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 85afa9ea122dd9d4a2ead104a951d318975dcd25
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101341-gallon-ungloved-bc19@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 85afa9ea122dd9d4a2ead104a951d318975dcd25 Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Date: Tue, 16 Sep 2025 11:57:56 +0900
Subject: [PATCH] PCI: endpoint: pci-epf-test: Add NULL check for DMA channels
before release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The fields dma_chan_tx and dma_chan_rx of the struct pci_epf_test can be
NULL even after EPF initialization. Then it is prudent to check that
they have non-NULL values before releasing the channels. Add the checks
in pci_epf_test_clean_dma_chan().
Without the checks, NULL pointer dereferences happen and they can lead
to a kernel panic in some cases:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050
Call trace:
dma_release_channel+0x2c/0x120 (P)
pci_epf_test_epc_deinit+0x94/0xc0 [pci_epf_test]
pci_epc_deinit_notify+0x74/0xc0
tegra_pcie_ep_pex_rst_irq+0x250/0x5d8
irq_thread_fn+0x34/0xb8
irq_thread+0x18c/0x2e8
kthread+0x14c/0x210
ret_from_fork+0x10/0x20
Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities")
Fixes: 5ebf3fc59bd2 ("PCI: endpoint: functions/pci-epf-test: Add DMA support to transfer data")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
[mani: trimmed the stack trace]
Signed-off-by: Manivannan Sadhasivam <mani(a)kernel.org>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Reviewed-by: Krzysztof Wilczyński <kwilczynski(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250916025756.34807-1-shinichiro.kawasaki@wdc.com
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 09e1b8b46b55..31617772ad51 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -301,15 +301,20 @@ static void pci_epf_test_clean_dma_chan(struct pci_epf_test *epf_test)
if (!epf_test->dma_supported)
return;
- dma_release_channel(epf_test->dma_chan_tx);
- if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+ if (epf_test->dma_chan_tx) {
+ dma_release_channel(epf_test->dma_chan_tx);
+ if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+ epf_test->dma_chan_tx = NULL;
+ epf_test->dma_chan_rx = NULL;
+ return;
+ }
epf_test->dma_chan_tx = NULL;
- epf_test->dma_chan_rx = NULL;
- return;
}
- dma_release_channel(epf_test->dma_chan_rx);
- epf_test->dma_chan_rx = NULL;
+ if (epf_test->dma_chan_rx) {
+ dma_release_channel(epf_test->dma_chan_rx);
+ epf_test->dma_chan_rx = NULL;
+ }
}
static void pci_epf_test_print_rate(struct pci_epf_test *epf_test,
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 85afa9ea122dd9d4a2ead104a951d318975dcd25
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101340-passably-rounding-59df@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 85afa9ea122dd9d4a2ead104a951d318975dcd25 Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Date: Tue, 16 Sep 2025 11:57:56 +0900
Subject: [PATCH] PCI: endpoint: pci-epf-test: Add NULL check for DMA channels
before release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The fields dma_chan_tx and dma_chan_rx of the struct pci_epf_test can be
NULL even after EPF initialization. Then it is prudent to check that
they have non-NULL values before releasing the channels. Add the checks
in pci_epf_test_clean_dma_chan().
Without the checks, NULL pointer dereferences happen and they can lead
to a kernel panic in some cases:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050
Call trace:
dma_release_channel+0x2c/0x120 (P)
pci_epf_test_epc_deinit+0x94/0xc0 [pci_epf_test]
pci_epc_deinit_notify+0x74/0xc0
tegra_pcie_ep_pex_rst_irq+0x250/0x5d8
irq_thread_fn+0x34/0xb8
irq_thread+0x18c/0x2e8
kthread+0x14c/0x210
ret_from_fork+0x10/0x20
Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities")
Fixes: 5ebf3fc59bd2 ("PCI: endpoint: functions/pci-epf-test: Add DMA support to transfer data")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
[mani: trimmed the stack trace]
Signed-off-by: Manivannan Sadhasivam <mani(a)kernel.org>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Reviewed-by: Krzysztof Wilczyński <kwilczynski(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250916025756.34807-1-shinichiro.kawasaki@wdc.com
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 09e1b8b46b55..31617772ad51 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -301,15 +301,20 @@ static void pci_epf_test_clean_dma_chan(struct pci_epf_test *epf_test)
if (!epf_test->dma_supported)
return;
- dma_release_channel(epf_test->dma_chan_tx);
- if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+ if (epf_test->dma_chan_tx) {
+ dma_release_channel(epf_test->dma_chan_tx);
+ if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+ epf_test->dma_chan_tx = NULL;
+ epf_test->dma_chan_rx = NULL;
+ return;
+ }
epf_test->dma_chan_tx = NULL;
- epf_test->dma_chan_rx = NULL;
- return;
}
- dma_release_channel(epf_test->dma_chan_rx);
- epf_test->dma_chan_rx = NULL;
+ if (epf_test->dma_chan_rx) {
+ dma_release_channel(epf_test->dma_chan_rx);
+ epf_test->dma_chan_rx = NULL;
+ }
}
static void pci_epf_test_print_rate(struct pci_epf_test *epf_test,
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 85afa9ea122dd9d4a2ead104a951d318975dcd25
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101340-marbled-uneven-a896@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 85afa9ea122dd9d4a2ead104a951d318975dcd25 Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Date: Tue, 16 Sep 2025 11:57:56 +0900
Subject: [PATCH] PCI: endpoint: pci-epf-test: Add NULL check for DMA channels
before release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The fields dma_chan_tx and dma_chan_rx of the struct pci_epf_test can be
NULL even after EPF initialization. Then it is prudent to check that
they have non-NULL values before releasing the channels. Add the checks
in pci_epf_test_clean_dma_chan().
Without the checks, NULL pointer dereferences happen and they can lead
to a kernel panic in some cases:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050
Call trace:
dma_release_channel+0x2c/0x120 (P)
pci_epf_test_epc_deinit+0x94/0xc0 [pci_epf_test]
pci_epc_deinit_notify+0x74/0xc0
tegra_pcie_ep_pex_rst_irq+0x250/0x5d8
irq_thread_fn+0x34/0xb8
irq_thread+0x18c/0x2e8
kthread+0x14c/0x210
ret_from_fork+0x10/0x20
Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities")
Fixes: 5ebf3fc59bd2 ("PCI: endpoint: functions/pci-epf-test: Add DMA support to transfer data")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
[mani: trimmed the stack trace]
Signed-off-by: Manivannan Sadhasivam <mani(a)kernel.org>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Reviewed-by: Krzysztof Wilczyński <kwilczynski(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250916025756.34807-1-shinichiro.kawasaki@wdc.com
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 09e1b8b46b55..31617772ad51 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -301,15 +301,20 @@ static void pci_epf_test_clean_dma_chan(struct pci_epf_test *epf_test)
if (!epf_test->dma_supported)
return;
- dma_release_channel(epf_test->dma_chan_tx);
- if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+ if (epf_test->dma_chan_tx) {
+ dma_release_channel(epf_test->dma_chan_tx);
+ if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+ epf_test->dma_chan_tx = NULL;
+ epf_test->dma_chan_rx = NULL;
+ return;
+ }
epf_test->dma_chan_tx = NULL;
- epf_test->dma_chan_rx = NULL;
- return;
}
- dma_release_channel(epf_test->dma_chan_rx);
- epf_test->dma_chan_rx = NULL;
+ if (epf_test->dma_chan_rx) {
+ dma_release_channel(epf_test->dma_chan_rx);
+ epf_test->dma_chan_rx = NULL;
+ }
}
static void pci_epf_test_print_rate(struct pci_epf_test *epf_test,
When fsl_edma_alloc_chan_resources() fails after clk_prepare_enable(),
the error paths only free IRQs and destroy the TCD pool, but forget to
call clk_disable_unprepare(). This causes the channel clock to remain
enabled, leaking power and resources.
Fix it by disabling the channel clock in the error unwind path.
Fixes: d8d4355861d8 ("dmaengine: fsl-edma: add i.MX8ULP edma support")
Cc: stable(a)vger.kernel.org
Suggested-by: Frank Li <Frank.Li(a)nxp.com>
Signed-off-by: Zhen Ni <zhen.ni(a)easystack.cn>
---
Changes in v2:
- Remove FSL_EDMA_DRV_HAS_CHCLK check
Changes in v3:
- Remove cleanup
Changes in v4:
- Re-send as a new thread
---
drivers/dma/fsl-edma-common.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index 4976d7dde080..11655dcc4d6c 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -852,6 +852,7 @@ int fsl_edma_alloc_chan_resources(struct dma_chan *chan)
free_irq(fsl_chan->txirq, fsl_chan);
err_txirq:
dma_pool_destroy(fsl_chan->tcd_pool);
+ clk_disable_unprepare(fsl_chan->clk);
return ret;
}
--
2.20.1
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 4d6fc29f36341d7795db1d1819b4c15fe9be7b23
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101300-backspace-pulmonary-5620@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4d6fc29f36341d7795db1d1819b4c15fe9be7b23 Mon Sep 17 00:00:00 2001
From: Donet Tom <donettom(a)linux.ibm.com>
Date: Wed, 24 Sep 2025 00:16:59 +0530
Subject: [PATCH] mm/ksm: fix incorrect KSM counter handling in mm_struct
during fork
Patch series "mm/ksm: Fix incorrect accounting of KSM counters during
fork", v3.
The first patch in this series fixes the incorrect accounting of KSM
counters such as ksm_merging_pages, ksm_rmap_items, and the global
ksm_zero_pages during fork.
The following patch add a selftest to verify the ksm_merging_pages counter
was updated correctly during fork.
Test Results
============
Without the first patch
-----------------------
# [RUN] test_fork_ksm_merging_page_count
not ok 10 ksm_merging_page in child: 32
With the first patch
--------------------
# [RUN] test_fork_ksm_merging_page_count
ok 10 ksm_merging_pages is not inherited after fork
This patch (of 2):
Currently, the KSM-related counters in `mm_struct`, such as
`ksm_merging_pages`, `ksm_rmap_items`, and `ksm_zero_pages`, are inherited
by the child process during fork. This results in inconsistent
accounting.
When a process uses KSM, identical pages are merged and an rmap item is
created for each merged page. The `ksm_merging_pages` and
`ksm_rmap_items` counters are updated accordingly. However, after a fork,
these counters are copied to the child while the corresponding rmap items
are not. As a result, when the child later triggers an unmerge, there are
no rmap items present in the child, so the counters remain stale, leading
to incorrect accounting.
A similar issue exists with `ksm_zero_pages`, which maintains both a
global counter and a per-process counter. During fork, the per-process
counter is inherited by the child, but the global counter is not
incremented. Since the child also references zero pages, the global
counter should be updated as well. Otherwise, during zero-page unmerge,
both the global and per-process counters are decremented, causing the
global counter to become inconsistent.
To fix this, ksm_merging_pages and ksm_rmap_items are reset to 0 during
fork, and the global ksm_zero_pages counter is updated with the
per-process ksm_zero_pages value inherited by the child. This ensures
that KSM statistics remain accurate and reflect the activity of each
process correctly.
Link: https://lkml.kernel.org/r/cover.1758648700.git.donettom@linux.ibm.com
Link: https://lkml.kernel.org/r/7b9870eb67ccc0d79593940d9dbd4a0b39b5d396.17586487…
Fixes: 7609385337a4 ("ksm: count ksm merging pages for each process")
Fixes: cb4df4cae4f2 ("ksm: count allocated ksm rmap_items for each process")
Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM")
Signed-off-by: Donet Tom <donettom(a)linux.ibm.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Aboorva Devarajan <aboorvad(a)linux.ibm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Donet Tom <donettom(a)linux.ibm.com>
Cc: "Ritesh Harjani (IBM)" <ritesh.list(a)gmail.com>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: xu xin <xu.xin16(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org> [6.6+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 22e67ca7cba3..067538fc4d58 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -56,8 +56,14 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm)
static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
/* Adding mm to ksm is best effort on fork. */
- if (mm_flags_test(MMF_VM_MERGEABLE, oldmm))
+ if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) {
+ long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages);
+
+ mm->ksm_merging_pages = 0;
+ mm->ksm_rmap_items = 0;
+ atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages);
__ksm_enter(mm);
+ }
}
static inline int ksm_execve(struct mm_struct *mm)
fsnotify_mmap_perm() requires a byte offset for the file about to be
mmap'ed. But it is called from vm_mmap_pgoff(), which has a page offset.
Previously the conversion was done incorrectly so let's fix it, being
careful not to overflow on 32-bit platforms.
Discovered during code review.
Cc: <stable(a)vger.kernel.org>
Fixes: 066e053fe208 ("fsnotify: add pre-content hooks on mmap()")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
Applies against today's mm-unstable (aa05a436eca8).
Thanks,
Ryan
mm/util.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/util.c b/mm/util.c
index 6c1d64ed0221..8989d5767528 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -566,6 +566,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flag, unsigned long pgoff)
{
+ loff_t off = (loff_t)pgoff << PAGE_SHIFT;
unsigned long ret;
struct mm_struct *mm = current->mm;
unsigned long populate;
@@ -573,7 +574,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
ret = security_mmap_file(file, prot, flag);
if (!ret)
- ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len);
+ ret = fsnotify_mmap_perm(file, prot, off, len);
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;
--
2.43.0
Since commits
7b9eb53e8591 ("media: cx18: Access v4l2_fh from file")
9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file")
All the ioctl handlers access their private data structures
from file *
The ivtv and cx18 drivers call the ioctl handlers from their
DVB layer without a valid file *, causing invalid memory access.
The issue has been reported by smatch in
"[bug report] media: cx18: Access v4l2_fh from file"
Fix this by providing wrappers for the ioctl handlers to be
used by the DVB layer that do not require a valid file *.
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Changes in v5:
- Remove stray link from cx18 patch
- Add Hans' tags
- Link to v4: https://lore.kernel.org/r/20250819-cx18-v4l2-fh-v4-0-9db1635d6787@ideasonbo…
Changes in v4:
- Slightly adjust commit messages
- Link to v3: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v3-0-5e2f08f3cadc@ideasonbo…
Changes in v3:
- Change helpers to accept the type they're going to operate on instead
of using the open_id wrapper type as suggested by Laurent
- Link to v2: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v2-0-3f53ce423663@ideasonbo…
Changes in v2:
- Add Cc: stable(a)vger.kernel.org per-patch
---
Jacopo Mondi (2):
media: cx18: Fix invalid access to file *
media: ivtv: Fix invalid access to file *
drivers/media/pci/cx18/cx18-driver.c | 9 +++------
drivers/media/pci/cx18/cx18-ioctl.c | 30 +++++++++++++++++++-----------
drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++---
drivers/media/pci/ivtv/ivtv-driver.c | 11 ++++-------
drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++-----
drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++--
6 files changed, 52 insertions(+), 34 deletions(-)
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20250818-cx18-v4l2-fh-7eaa6199fdde
Best regards,
--
Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
In the IOMMU Shared Virtual Addressing (SVA) context, the IOMMU hardware
shares and walks the CPU's page tables. The x86 architecture maps the
kernel's virtual address space into the upper portion of every process's
page table. Consequently, in an SVA context, the IOMMU hardware can walk
and cache kernel page table entries.
The Linux kernel currently lacks a notification mechanism for kernel page
table changes, specifically when page table pages are freed and reused.
The IOMMU driver is only notified of changes to user virtual address
mappings. This can cause the IOMMU's internal caches to retain stale
entries for kernel VA.
A Use-After-Free (UAF) and Write-After-Free (WAF) condition arises when
kernel page table pages are freed and later reallocated. The IOMMU could
misinterpret the new data as valid page table entries. The IOMMU might
then walk into attacker-controlled memory, leading to arbitrary physical
memory DMA access or privilege escalation. This is also a Write-After-Free
issue, as the IOMMU will potentially continue to write Accessed and Dirty
bits to the freed memory while attempting to walk the stale page tables.
Currently, SVA contexts are unprivileged and cannot access kernel
mappings. However, the IOMMU will still walk kernel-only page tables
all the way down to the leaf entries, where it realizes the mapping
is for the kernel and errors out. This means the IOMMU still caches
these intermediate page table entries, making the described vulnerability
a real concern.
To mitigate this, a new IOMMU interface is introduced to flush IOTLB
entries for the kernel address space. This interface is invoked from the
x86 architecture code that manages combined user and kernel page tables,
specifically before any kernel page table page is freed and reused.
This addresses the main issue with vfree() which is a common occurrence
and can be triggered by unprivileged users. While this resolves the
primary problem, it doesn't address some extremely rare case related to
memory unplug of memory that was present as reserved memory at boot,
which cannot be triggered by unprivileged users. The discussion can be
found at the link below.
Fixes: 26b25a2b98e4 ("iommu: Bind process address spaces to devices")
Cc: stable(a)vger.kernel.org
Suggested-by: Jann Horn <jannh(a)google.com>
Co-developed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Link: https://lore.kernel.org/linux-iommu/04983c62-3b1d-40d4-93ae-34ca04b827e5@in…
---
include/linux/iommu.h | 4 ++++
drivers/iommu/iommu-sva.c | 29 ++++++++++++++++++++++++++++-
mm/pgtable-generic.c | 2 ++
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c30d12e16473..66e4abb2df0d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1134,7 +1134,9 @@ struct iommu_sva {
struct iommu_mm_data {
u32 pasid;
+ struct mm_struct *mm;
struct list_head sva_domains;
+ struct list_head mm_list_elm;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode);
@@ -1615,6 +1617,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end);
#else
static inline struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1639,6 +1642,7 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
}
static inline void mm_pasid_drop(struct mm_struct *mm) {}
+static inline void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end) {}
#endif /* CONFIG_IOMMU_SVA */
#ifdef CONFIG_IOMMU_IOPF
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 1a51cfd82808..d236aef80a8d 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -10,6 +10,8 @@
#include "iommu-priv.h"
static DEFINE_MUTEX(iommu_sva_lock);
+static bool iommu_sva_present;
+static LIST_HEAD(iommu_sva_mms);
static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
struct mm_struct *mm);
@@ -42,6 +44,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
return ERR_PTR(-ENOSPC);
}
iommu_mm->pasid = pasid;
+ iommu_mm->mm = mm;
INIT_LIST_HEAD(&iommu_mm->sva_domains);
/*
* Make sure the write to mm->iommu_mm is not reordered in front of
@@ -132,8 +135,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
if (ret)
goto out_free_domain;
domain->users = 1;
- list_add(&domain->next, &mm->iommu_mm->sva_domains);
+ if (list_empty(&iommu_mm->sva_domains)) {
+ if (list_empty(&iommu_sva_mms))
+ iommu_sva_present = true;
+ list_add(&iommu_mm->mm_list_elm, &iommu_sva_mms);
+ }
+ list_add(&domain->next, &iommu_mm->sva_domains);
out:
refcount_set(&handle->users, 1);
mutex_unlock(&iommu_sva_lock);
@@ -175,6 +183,13 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
list_del(&domain->next);
iommu_domain_free(domain);
}
+
+ if (list_empty(&iommu_mm->sva_domains)) {
+ list_del(&iommu_mm->mm_list_elm);
+ if (list_empty(&iommu_sva_mms))
+ iommu_sva_present = false;
+ }
+
mutex_unlock(&iommu_sva_lock);
kfree(handle);
}
@@ -312,3 +327,15 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
return domain;
}
+
+void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end)
+{
+ struct iommu_mm_data *iommu_mm;
+
+ guard(mutex)(&iommu_sva_lock);
+ if (!iommu_sva_present)
+ return;
+
+ list_for_each_entry(iommu_mm, &iommu_sva_mms, mm_list_elm)
+ mmu_notifier_arch_invalidate_secondary_tlbs(iommu_mm->mm, start, end);
+}
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 1c7caa8ef164..8c22be79b734 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -13,6 +13,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mm_inline.h>
+#include <linux/iommu.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -430,6 +431,7 @@ static void kernel_pgtable_work_func(struct work_struct *work)
list_splice_tail_init(&kernel_pgtable_work.list, &page_list);
spin_unlock(&kernel_pgtable_work.lock);
+ iommu_sva_invalidate_kva_range(PAGE_OFFSET, TLB_FLUSH_ALL);
list_for_each_entry_safe(pt, next, &page_list, pt_list)
__pagetable_free(pt);
}
--
2.43.0
Since commits
7b9eb53e8591 ("media: cx18: Access v4l2_fh from file")
9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file")
All the ioctl handlers access their private data structures
from file *
The ivtv and cx18 drivers call the ioctl handlers from their
DVB layer without a valid file *, causing invalid memory access.
The issue has been reported by smatch in
"[bug report] media: cx18: Access v4l2_fh from file"
Fix this by providing wrappers for the ioctl handlers to be
used by the DVB layer that do not require a valid file *.
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Changes in v4:
- Slightly adjust commit messages
- Link to v3: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v3-0-5e2f08f3cadc@ideasonbo…
Changes in v3:
- Change helpers to accept the type they're going to operate on instead
of using the open_id wrapper type as suggested by Laurent
- Link to v2: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v2-0-3f53ce423663@ideasonbo…
Changes in v2:
- Add Cc: stable(a)vger.kernel.org per-patch
---
Jacopo Mondi (2):
media: cx18: Fix invalid access to file *
media: ivtv: Fix invalid access to file *
drivers/media/pci/cx18/cx18-driver.c | 9 +++------
drivers/media/pci/cx18/cx18-ioctl.c | 30 +++++++++++++++++++-----------
drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++---
drivers/media/pci/ivtv/ivtv-driver.c | 11 ++++-------
drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++-----
drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++--
6 files changed, 52 insertions(+), 34 deletions(-)
---
base-commit: a75b8d198c55e9eb5feb6f6e155496305caba2dc
change-id: 20250818-cx18-v4l2-fh-7eaa6199fdde
Best regards,
--
Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 88daf2f448aad05a2e6df738d66fe8b0cf85cee0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101324-echo-cardinal-3043@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 88daf2f448aad05a2e6df738d66fe8b0cf85cee0 Mon Sep 17 00:00:00 2001
From: Matvey Kovalev <matvey.kovalev(a)ispras.ru>
Date: Thu, 25 Sep 2025 15:12:34 +0300
Subject: [PATCH] ksmbd: fix error code overwriting in
smb2_get_info_filesystem()
If client doesn't negotiate with SMB3.1.1 POSIX Extensions,
then proper error code won't be returned due to overwriting.
Return error immediately.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: e2f34481b24db ("cifsd: add server-side procedures for SMB3")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matvey Kovalev <matvey.kovalev(a)ispras.ru>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 0c069eff80b7..133ca5beb7cf 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -5629,7 +5629,8 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
if (!work->tcon->posix_extensions) {
pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
- rc = -EOPNOTSUPP;
+ path_put(&path);
+ return -EOPNOTSUPP;
} else {
info = (struct filesystem_posix_info *)(rsp->Buffer);
info->OptimalTransferSize = cpu_to_le32(stfs.f_bsize);
When migrating a balloon page, we first deflate the old page to then
inflate the new page.
However, if inflating the new page succeeded, we effectively deflated
the old page, reducing the balloon size.
In that case, the migration actually worked: similar to migrating+
immediately deflating the new page. The old page will be freed back to
the buddy.
Right now, the core will leave the page be marked as isolated (as
we returned an error). When later trying to putback that page, we will
run into the WARN_ON_ONCE() in balloon_page_putback().
That handling was changed in commit 3544c4faccb8 ("mm/balloon_compaction:
stop using __ClearPageMovable()"); before that change, we would have
tolerated that way of handling it.
To fix it, let's just return 0 in that case, making the core effectively
just clear the "isolated" flag + freeing it back to the buddy as if the
migration succeeded. Note that the new page will also get freed when the
core puts the last reference.
Note that this also makes it all be more consistent: we will no longer
unisolate the page in the balloon driver while keeping it marked as
being isolated in migration core.
This was found by code inspection.
Fixes: 3544c4faccb8 ("mm/balloon_compaction: stop using __ClearPageMovable()")
Cc: <stable(a)vger.kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Jerrin Shaji George <jerrin.shaji-george(a)broadcom.com>
Cc: Broadcom internal kernel review list <bcm-kernel-feedback-list(a)broadcom.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
I have no easy way to test this, and I assume it happens very very rarely
(inflation during migration failing).
I would prefer this to go through the MM-tree, as I have some follow-up
balloon_compaction reworks also mess with this code.
---
drivers/misc/vmw_balloon.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 6df51ee8db621..cc1d18b3df5ca 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1737,7 +1737,7 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
{
unsigned long status, flags;
struct vmballoon *b;
- int ret;
+ int ret = 0;
b = container_of(b_dev_info, struct vmballoon, b_dev_info);
@@ -1796,17 +1796,15 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
* A failure happened. While we can deflate the page we just
* inflated, this deflation can also encounter an error. Instead
* we will decrease the size of the balloon to reflect the
- * change and report failure.
+ * change.
*/
atomic64_dec(&b->size);
- ret = -EBUSY;
} else {
/*
* Success. Take a reference for the page, and we will add it to
* the list after acquiring the lock.
*/
get_page(newpage);
- ret = 0;
}
/* Update the balloon list under the @pages_lock */
@@ -1817,7 +1815,7 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
* If we succeed just insert it to the list and update the statistics
* under the lock.
*/
- if (!ret) {
+ if (status == VMW_BALLOON_SUCCESS) {
balloon_page_insert(&b->b_dev_info, newpage);
__count_vm_event(BALLOON_MIGRATE);
}
base-commit: 1c58c31dc83e39f4790ae778626b6b8b59bc0db8
--
2.51.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 64cf7d058a005c5c31eb8a0b741f35dc12915d18
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101354-eject-groove-319c@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 64cf7d058a005c5c31eb8a0b741f35dc12915d18 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Wed, 8 Oct 2025 12:45:10 -0400
Subject: [PATCH] tracing: Have trace_marker use per-cpu data to read user
space
It was reported that using __copy_from_user_inatomic() can actually
schedule. Which is bad when preemption is disabled. Even though there's
logic to check in_atomic() is set, but this is a nop when the kernel is
configured with PREEMPT_NONE. This is due to page faulting and the code
could schedule with preemption disabled.
Link: https://lore.kernel.org/all/20250819105152.2766363-1-luogengkun@huaweicloud…
The solution was to change the __copy_from_user_inatomic() to
copy_from_user_nofault(). But then it was reported that this caused a
regression in Android. There's several applications writing into
trace_marker() in Android, but now instead of showing the expected data,
it is showing:
tracing_mark_write: <faulted>
After reverting the conversion to copy_from_user_nofault(), Android was
able to get the data again.
Writes to the trace_marker is a way to efficiently and quickly enter data
into the Linux tracing buffer. It takes no locks and was designed to be as
non-intrusive as possible. This means it cannot allocate memory, and must
use pre-allocated data.
A method that is actively being worked on to have faultable system call
tracepoints read user space data is to allocate per CPU buffers, and use
them in the callback. The method uses a technique similar to seqcount.
That is something like this:
preempt_disable();
cpu = smp_processor_id();
buffer = this_cpu_ptr(&pre_allocated_cpu_buffers, cpu);
do {
cnt = nr_context_switches_cpu(cpu);
migrate_disable();
preempt_enable();
ret = copy_from_user(buffer, ptr, size);
preempt_disable();
migrate_enable();
} while (!ret && cnt != nr_context_switches_cpu(cpu));
if (!ret)
ring_buffer_write(buffer);
preempt_enable();
It's a little more involved than that, but the above is the basic logic.
The idea is to acquire the current CPU buffer, disable migration, and then
enable preemption. At this moment, it can safely use copy_from_user().
After reading the data from user space, it disables preemption again. It
then checks to see if there was any new scheduling on this CPU. If there
was, it must assume that the buffer was corrupted by another task. If
there wasn't, then the buffer is still valid as only tasks in preemptable
context can write to this buffer and only those that are running on the
CPU.
By using this method, where trace_marker open allocates the per CPU
buffers, trace_marker writes can access user space and even fault it in,
without having to allocate or take any locks of its own.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Luo Gengkun <luogengkun(a)huaweicloud.com>
Cc: Wattson CI <wattson-external(a)google.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Link: https://lore.kernel.org/20251008124510.6dba541a@gandalf.local.home
Fixes: 3d62ab32df065 ("tracing: Fix tracing_marker may trigger page fault during preempt_disable")
Reported-by: Runping Lai <runpinglai(a)google.com>
Tested-by: Runping Lai <runpinglai(a)google.com>
Closes: https://lore.kernel.org/linux-trace-kernel/20251007003417.3470979-2-runping…
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b3c94fbaf002..0fd582651293 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4791,12 +4791,6 @@ int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
return single_release(inode, filp);
}
-static int tracing_mark_open(struct inode *inode, struct file *filp)
-{
- stream_open(inode, filp);
- return tracing_open_generic_tr(inode, filp);
-}
-
static int tracing_release(struct inode *inode, struct file *file)
{
struct trace_array *tr = inode->i_private;
@@ -7163,7 +7157,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
#define TRACE_MARKER_MAX_SIZE 4096
-static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
+static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
size_t cnt, unsigned long ip)
{
struct ring_buffer_event *event;
@@ -7173,20 +7167,11 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user
int meta_size;
ssize_t written;
size_t size;
- int len;
-
-/* Used in tracing_mark_raw_write() as well */
-#define FAULTED_STR "<faulted>"
-#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
again:
size = cnt + meta_size;
- /* If less than "<faulted>", then make sure we can still add that */
- if (cnt < FAULTED_SIZE)
- size += FAULTED_SIZE - cnt;
-
buffer = tr->array_buffer.buffer;
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
tracing_gen_ctx());
@@ -7196,9 +7181,6 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user
* make it smaller and try again.
*/
if (size > ring_buffer_max_event_size(buffer)) {
- /* cnt < FAULTED size should never be bigger than max */
- if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
- return -EBADF;
cnt = ring_buffer_max_event_size(buffer) - meta_size;
/* The above should only happen once */
if (WARN_ON_ONCE(cnt + meta_size == size))
@@ -7212,14 +7194,8 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user
entry = ring_buffer_event_data(event);
entry->ip = ip;
-
- len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
- if (len) {
- memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
- cnt = FAULTED_SIZE;
- written = -EFAULT;
- } else
- written = cnt;
+ memcpy(&entry->buf, buf, cnt);
+ written = cnt;
if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
/* do not add \n before testing triggers, but add \0 */
@@ -7243,6 +7219,169 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user
return written;
}
+struct trace_user_buf {
+ char *buf;
+};
+
+struct trace_user_buf_info {
+ struct trace_user_buf __percpu *tbuf;
+ int ref;
+};
+
+
+static DEFINE_MUTEX(trace_user_buffer_mutex);
+static struct trace_user_buf_info *trace_user_buffer;
+
+static void trace_user_fault_buffer_free(struct trace_user_buf_info *tinfo)
+{
+ char *buf;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
+ kfree(buf);
+ }
+ free_percpu(tinfo->tbuf);
+ kfree(tinfo);
+}
+
+static int trace_user_fault_buffer_enable(void)
+{
+ struct trace_user_buf_info *tinfo;
+ char *buf;
+ int cpu;
+
+ guard(mutex)(&trace_user_buffer_mutex);
+
+ if (trace_user_buffer) {
+ trace_user_buffer->ref++;
+ return 0;
+ }
+
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ return -ENOMEM;
+
+ tinfo->tbuf = alloc_percpu(struct trace_user_buf);
+ if (!tinfo->tbuf) {
+ kfree(tinfo);
+ return -ENOMEM;
+ }
+
+ tinfo->ref = 1;
+
+ /* Clear each buffer in case of error */
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
+ }
+
+ for_each_possible_cpu(cpu) {
+ buf = kmalloc_node(TRACE_MARKER_MAX_SIZE, GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!buf) {
+ trace_user_fault_buffer_free(tinfo);
+ return -ENOMEM;
+ }
+ per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
+ }
+
+ trace_user_buffer = tinfo;
+
+ return 0;
+}
+
+static void trace_user_fault_buffer_disable(void)
+{
+ struct trace_user_buf_info *tinfo;
+
+ guard(mutex)(&trace_user_buffer_mutex);
+
+ tinfo = trace_user_buffer;
+
+ if (WARN_ON_ONCE(!tinfo))
+ return;
+
+ if (--tinfo->ref)
+ return;
+
+ trace_user_fault_buffer_free(tinfo);
+ trace_user_buffer = NULL;
+}
+
+/* Must be called with preemption disabled */
+static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
+ const char __user *ptr, size_t size,
+ size_t *read_size)
+{
+ int cpu = smp_processor_id();
+ char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
+ unsigned int cnt;
+ int trys = 0;
+ int ret;
+
+ if (size > TRACE_MARKER_MAX_SIZE)
+ size = TRACE_MARKER_MAX_SIZE;
+ *read_size = 0;
+
+ /*
+ * This acts similar to a seqcount. The per CPU context switches are
+ * recorded, migration is disabled and preemption is enabled. The
+ * read of the user space memory is copied into the per CPU buffer.
+ * Preemption is disabled again, and if the per CPU context switches count
+ * is still the same, it means the buffer has not been corrupted.
+ * If the count is different, it is assumed the buffer is corrupted
+ * and reading must be tried again.
+ */
+
+ do {
+ /*
+ * If for some reason, copy_from_user() always causes a context
+ * switch, this would then cause an infinite loop.
+ * If this task is preempted by another user space task, it
+ * will cause this task to try again. But just in case something
+ * changes where the copying from user space causes another task
+ * to run, prevent this from going into an infinite loop.
+ * 100 tries should be plenty.
+ */
+ if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
+ return NULL;
+
+ /* Read the current CPU context switch counter */
+ cnt = nr_context_switches_cpu(cpu);
+
+ /*
+ * Preemption is going to be enabled, but this task must
+ * remain on this CPU.
+ */
+ migrate_disable();
+
+ /*
+ * Now preemption is being enabed and another task can come in
+ * and use the same buffer and corrupt our data.
+ */
+ preempt_enable_notrace();
+
+ ret = __copy_from_user(buffer, ptr, size);
+
+ preempt_disable_notrace();
+ migrate_enable();
+
+ /* if it faulted, no need to test if the buffer was corrupted */
+ if (ret)
+ return NULL;
+
+ /*
+ * Preemption is disabled again, now check the per CPU context
+ * switch counter. If it doesn't match, then another user space
+ * process may have schedule in and corrupted our buffer. In that
+ * case the copying must be retried.
+ */
+ } while (nr_context_switches_cpu(cpu) != cnt);
+
+ *read_size = size;
+ return buffer;
+}
+
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
@@ -7250,6 +7389,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
struct trace_array *tr = filp->private_data;
ssize_t written = -ENODEV;
unsigned long ip;
+ size_t size;
+ char *buf;
if (tracing_disabled)
return -EINVAL;
@@ -7263,6 +7404,16 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt > TRACE_MARKER_MAX_SIZE)
cnt = TRACE_MARKER_MAX_SIZE;
+ /* Must have preemption disabled while having access to the buffer */
+ guard(preempt_notrace)();
+
+ buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
+ if (!buf)
+ return -EFAULT;
+
+ if (cnt > size)
+ cnt = size;
+
/* The selftests expect this function to be the IP address */
ip = _THIS_IP_;
@@ -7270,32 +7421,27 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (tr == &global_trace) {
guard(rcu)();
list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
- written = write_marker_to_buffer(tr, ubuf, cnt, ip);
+ written = write_marker_to_buffer(tr, buf, cnt, ip);
if (written < 0)
break;
}
} else {
- written = write_marker_to_buffer(tr, ubuf, cnt, ip);
+ written = write_marker_to_buffer(tr, buf, cnt, ip);
}
return written;
}
static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
- const char __user *ubuf, size_t cnt)
+ const char *buf, size_t cnt)
{
struct ring_buffer_event *event;
struct trace_buffer *buffer;
struct raw_data_entry *entry;
ssize_t written;
- int size;
- int len;
-
-#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+ size_t size;
size = sizeof(*entry) + cnt;
- if (cnt < FAULT_SIZE_ID)
- size += FAULT_SIZE_ID - cnt;
buffer = tr->array_buffer.buffer;
@@ -7309,14 +7455,8 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
return -EBADF;
entry = ring_buffer_event_data(event);
-
- len = copy_from_user_nofault(&entry->id, ubuf, cnt);
- if (len) {
- entry->id = -1;
- memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
- written = -EFAULT;
- } else
- written = cnt;
+ memcpy(&entry->id, buf, cnt);
+ written = cnt;
__buffer_unlock_commit(buffer, event);
@@ -7329,8 +7469,8 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
{
struct trace_array *tr = filp->private_data;
ssize_t written = -ENODEV;
-
-#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+ size_t size;
+ char *buf;
if (tracing_disabled)
return -EINVAL;
@@ -7342,6 +7482,17 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
if (cnt < sizeof(unsigned int))
return -EINVAL;
+ /* Must have preemption disabled while having access to the buffer */
+ guard(preempt_notrace)();
+
+ buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
+ if (!buf)
+ return -EFAULT;
+
+ /* raw write is all or nothing */
+ if (cnt > size)
+ return -EINVAL;
+
/* The global trace_marker_raw can go to multiple instances */
if (tr == &global_trace) {
guard(rcu)();
@@ -7357,6 +7508,27 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
return written;
}
+static int tracing_mark_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+
+ ret = trace_user_fault_buffer_enable();
+ if (ret < 0)
+ return ret;
+
+ stream_open(inode, filp);
+ ret = tracing_open_generic_tr(inode, filp);
+ if (ret < 0)
+ trace_user_fault_buffer_disable();
+ return ret;
+}
+
+static int tracing_mark_release(struct inode *inode, struct file *file)
+{
+ trace_user_fault_buffer_disable();
+ return tracing_release_generic_tr(inode, file);
+}
+
static int tracing_clock_show(struct seq_file *m, void *v)
{
struct trace_array *tr = m->private;
@@ -7764,13 +7936,13 @@ static const struct file_operations tracing_free_buffer_fops = {
static const struct file_operations tracing_mark_fops = {
.open = tracing_mark_open,
.write = tracing_mark_write,
- .release = tracing_release_generic_tr,
+ .release = tracing_mark_release,
};
static const struct file_operations tracing_mark_raw_fops = {
.open = tracing_mark_open,
.write = tracing_mark_raw_write,
- .release = tracing_release_generic_tr,
+ .release = tracing_mark_release,
};
static const struct file_operations trace_clock_fops = {