From: Benjamin Berg <benjamin.berg(a)intel.com>
The normal timer mechanism assume that timeout further in the future
need a lower accuracy. As an example, the granularity for a timer
scheduled 4096 ms in the future on a 1000 Hz system is already 512 ms.
This granularity is perfectly sufficient for e.g. timeouts, but there
are other types of events that will happen at a future point in time and
require a higher accuracy.
Add a new wiphy_hrtimer_work type that uses an hrtimer internally. The
API is almost identical to the existing wiphy_delayed_work and it can be
used as a drop-in replacement after minor adjustments. The work will be
scheduled relative to the current time with a slack of 1 millisecond.
CC: stable(a)vger.kernel.org # 6.4+
Signed-off-by: Benjamin Berg <benjamin.berg(a)intel.com>
Reviewed-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit(a)intel.com>
---
include/net/cfg80211.h | 78 ++++++++++++++++++++++++++++++++++++++++++
net/wireless/core.c | 56 ++++++++++++++++++++++++++++++
net/wireless/trace.h | 21 ++++++++++++
3 files changed, 155 insertions(+)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 781624f5913a..820e299f06b5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork,
* after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
* use just cancel_work() instead of cancel_work_sync(), it requires
* being in a section protected by wiphy_lock().
+ *
+ * Note that these are scheduled with a timer where the accuracy
+ * becomes less the longer in the future the scheduled timer is. Use
+ * wiphy_hrtimer_work_queue() if the timer must be not be late by more
+ * than approximately 10 percent.
*/
void wiphy_delayed_work_queue(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork,
@@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
bool wiphy_delayed_work_pending(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork);
+struct wiphy_hrtimer_work {
+ struct wiphy_work work;
+ struct wiphy *wiphy;
+ struct hrtimer timer;
+};
+
+enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t);
+
+static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork,
+ wiphy_work_func_t func)
+{
+ hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer,
+ CLOCK_BOOTTIME, HRTIMER_MODE_REL);
+ wiphy_work_init(&hrwork->work, func);
+}
+
+/**
+ * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @hrwork: the high resolution timer worker
+ * @delay: the delay given as a ktime_t
+ *
+ * Please refer to wiphy_delayed_work_queue(). The difference is that
+ * the hrtimer work uses a high resolution timer for scheduling. This
+ * may be needed if timeouts might be scheduled further in the future
+ * and the accuracy of the normal timer is not sufficient.
+ *
+ * Expect a delay of a few milliseconds as the timer is scheduled
+ * with some slack and some more time may pass between queueing the
+ * work and its start.
+ */
+void wiphy_hrtimer_work_queue(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork,
+ ktime_t delay);
+
+/**
+ * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work
+ * @wiphy: the wiphy, for debug purposes
+ * @hrtimer: the hrtimer work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_hrtimer_work_cancel(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrtimer);
+
+/**
+ * wiphy_hrtimer_work_flush - flush previously queued hrtimer work
+ * @wiphy: the wiphy, for debug purposes
+ * @hrwork: the hrtimer work to flush
+ *
+ * Flush the work (i.e. run it if pending). This must be called
+ * under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_hrtimer_work_flush(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork);
+
+/**
+ * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer
+ * work item is currently pending.
+ *
+ * @wiphy: the wiphy, for debug purposes
+ * @hrwork: the hrtimer work in question
+ *
+ * Return: true if timer is pending, false otherwise
+ *
+ * Please refer to the wiphy_delayed_work_pending() documentation as
+ * this is the equivalent function for hrtimer based delayed work
+ * items.
+ */
+bool wiphy_hrtimer_work_pending(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork);
+
/**
* enum ieee80211_ap_reg_power - regulatory power for an Access Point
*
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 797f9f2004a6..54a34d8d356e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy,
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending);
+enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t)
+{
+ struct wiphy_hrtimer_work *hrwork =
+ container_of(t, struct wiphy_hrtimer_work, timer);
+
+ wiphy_work_queue(hrwork->wiphy, &hrwork->work);
+
+ return HRTIMER_NORESTART;
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer);
+
+void wiphy_hrtimer_work_queue(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork,
+ ktime_t delay)
+{
+ trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay);
+
+ if (!delay) {
+ hrtimer_cancel(&hrwork->timer);
+ wiphy_work_queue(wiphy, &hrwork->work);
+ return;
+ }
+
+ hrwork->wiphy = wiphy;
+ hrtimer_start_range_ns(&hrwork->timer, delay,
+ 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue);
+
+void wiphy_hrtimer_work_cancel(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ hrtimer_cancel(&hrwork->timer);
+ wiphy_work_cancel(wiphy, &hrwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel);
+
+void wiphy_hrtimer_work_flush(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ hrtimer_cancel(&hrwork->timer);
+ wiphy_work_flush(wiphy, &hrwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush);
+
+bool wiphy_hrtimer_work_pending(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork)
+{
+ return hrtimer_is_queued(&hrwork->timer);
+}
+EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending);
+
static int __init cfg80211_init(void)
{
int err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 8a4c34112eb5..2b71f1d867a0 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue,
__entry->delay)
);
+TRACE_EVENT(wiphy_hrtimer_work_queue,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work,
+ ktime_t delay),
+ TP_ARGS(wiphy, work, delay),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(void *, instance)
+ __field(void *, func)
+ __field(ktime_t, delay)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->instance = work;
+ __entry->func = work->func;
+ __entry->delay = delay;
+ ),
+ TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu",
+ WIPHY_PR_ARG, __entry->instance, __entry->func,
+ __entry->delay)
+);
+
TRACE_EVENT(wiphy_work_worker_start,
TP_PROTO(struct wiphy *wiphy),
TP_ARGS(wiphy),
--
2.34.1
In omap_usb2_probe(), of_parse_phandle() returns a device node with its
reference count incremented. The caller is responsible for releasing this
reference when the node is no longer needed.
Add of_node_put(control_node) after usage to fix the
reference leak.
Found via static analysis.
Fixes: 478b6c7436c2 ("usb: phy: omap-usb2: Don't use omap_get_control_dev()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/phy/ti/phy-omap-usb2.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c
index 1eb252604441..660df3181e4f 100644
--- a/drivers/phy/ti/phy-omap-usb2.c
+++ b/drivers/phy/ti/phy-omap-usb2.c
@@ -426,6 +426,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
}
control_pdev = of_find_device_by_node(control_node);
+ of_node_put(control_node);
if (!control_pdev) {
dev_err(&pdev->dev, "Failed to get control device\n");
return -EINVAL;
--
2.39.5 (Apple Git-154)
The driver_find_device_by_of_node() function calls driver_find_device
and returns a device with its reference count incremented.
Add the missing put_device() call to
release this reference after the device is used.
Found via static analysis.
Fixes: 0b7c6075022c ("soc: samsung: exynos-pmu: Add regmap support for SoCs that protect PMU regs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/soc/samsung/exynos-pmu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/soc/samsung/exynos-pmu.c b/drivers/soc/samsung/exynos-pmu.c
index 22c50ca2aa79..a53c1f882e1a 100644
--- a/drivers/soc/samsung/exynos-pmu.c
+++ b/drivers/soc/samsung/exynos-pmu.c
@@ -346,6 +346,7 @@ struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
if (!dev)
return ERR_PTR(-EPROBE_DEFER);
+ put_device(dev);
return syscon_node_to_regmap(pmu_np);
}
EXPORT_SYMBOL_GPL(exynos_get_pmu_regmap_by_phandle);
--
2.39.5 (Apple Git-154)
There are two reference count leaks in this driver:
1. In nforce2_fsb_read(): pci_get_subsys() increases the reference count
of the PCI device, but pci_dev_put() is never called to release it,
thus leaking the reference.
2. In nforce2_detect_chipset(): pci_get_subsys() gets a reference to the
nforce2_dev which is stored in a global variable, but the reference
is never released when the module is unloaded.
Fix both by:
- Adding pci_dev_put(nforce2_sub5) in nforce2_fsb_read() after reading
the configuration.
- Adding pci_dev_put(nforce2_dev) in nforce2_exit() to release the
global device reference.
Found via static analysis.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/cpufreq/cpufreq-nforce2.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index fedad1081973..fbbbe501cf2d 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -145,6 +145,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
fsb /= 1000000;
+ pci_dev_put(nforce2_sub5);
+
/* Check if PLL register is already set */
pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
@@ -426,6 +428,7 @@ static int __init nforce2_init(void)
static void __exit nforce2_exit(void)
{
cpufreq_unregister_driver(&nforce2_driver);
+ pci_dev_put(nforce2_dev);
}
module_init(nforce2_init);
--
2.39.5 (Apple Git-154)
F2FS can mount filesystems with corrupted directory depth values that
get runtime-clamped to MAX_DIR_HASH_DEPTH. When RENAME_WHITEOUT
operations are performed on such directories, f2fs_rename performs
directory modifications (updating target entry and deleting source
entry) before attempting to add the whiteout entry via f2fs_add_link.
If f2fs_add_link fails due to the corrupted directory structure, the
function returns an error to VFS, but the partial directory
modifications have already been committed to disk. VFS assumes the
entire rename operation failed and does not update the dentry cache,
leaving stale mappings.
In the error path, VFS does not call d_move() to update the dentry
cache. This results in new_dentry still pointing to the old inode
(new_inode) which has already had its i_nlink decremented to zero.
The stale cache causes subsequent operations to incorrectly reference
the freed inode.
This causes subsequent operations to use cached dentry information that
no longer matches the on-disk state. When a second rename targets the
same entry, VFS attempts to decrement i_nlink on the stale inode, which
may already have i_nlink=0, triggering a WARNING in drop_nlink().
Example sequence:
1. First rename (RENAME_WHITEOUT): file2 → file1
- f2fs updates file1 entry on disk (points to inode 8)
- f2fs deletes file2 entry on disk
- f2fs_add_link(whiteout) fails (corrupted directory)
- Returns error to VFS
- VFS does not call d_move() due to error
- VFS cache still has: file1 → inode 7 (stale!)
- inode 7 has i_nlink=0 (already decremented)
2. Second rename: file3 → file1
- VFS uses stale cache: file1 → inode 7
- Tries to drop_nlink on inode 7 (i_nlink already 0)
- WARNING in drop_nlink()
Fix this by explicitly invalidating old_dentry and new_dentry when
f2fs_add_link fails during whiteout creation. This forces VFS to
refresh from disk on subsequent operations, ensuring cache consistency
even when the rename partially succeeds.
Reproducer:
1. Mount F2FS image with corrupted i_current_depth
2. renameat2(file2, file1, RENAME_WHITEOUT)
3. renameat2(file3, file1, 0)
4. System triggers WARNING in drop_nlink()
Fixes: 7e01e7ad746b ("f2fs: support RENAME_WHITEOUT")
Reported-by: syzbot+632cf32276a9a564188d(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=632cf32276a9a564188d
Suggested-by: Chao Yu <chao(a)kernel.org>
Link: https://lore.kernel.org/all/20251022233349.102728-1-kartikey406@gmail.com/ [v1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Deepanshu Kartikey <kartikey406(a)gmail.com>
---
Changes in v2:
- Added detailed explanation about VFS not calling d_move() in error path,
resulting in new_dentry still pointing to inode with zeroed i_nlink
(suggested by Chao Yu)
- Added Fixes tag pointing to commit 7e01e7ad746b
- Added Cc: stable(a)vger.kernel.org for backporting to stable kernels
---
fs/f2fs/namei.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b882771e4699..712479b7b93d 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1053,9 +1053,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (whiteout) {
set_inode_flag(whiteout, FI_INC_LINK);
err = f2fs_add_link(old_dentry, whiteout);
- if (err)
+ if (err) {
+ d_invalidate(old_dentry);
+ d_invalidate(new_dentry);
goto put_out_dir;
-
+ }
spin_lock(&whiteout->i_lock);
whiteout->i_state &= ~I_LINKABLE;
spin_unlock(&whiteout->i_lock);
--
2.43.0
The qm_get_qos_value() function calls bus_find_device_by_name() which
increases the device reference count, but fails to call put_device()
to balance the reference count and lead to a device reference leak.
Add put_device() calls in both the error path and success path to
properly balance the reference count.
Found via static analysis.
Fixes: 22d7a6c39cab ("crypto: hisilicon/qm - add pci bdf number check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/crypto/hisilicon/qm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index a5b96adf2d1e..3b391a146635 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3871,10 +3871,12 @@ static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf,
pdev = container_of(dev, struct pci_dev, dev);
if (pci_physfn(pdev) != qm->pdev) {
pci_err(qm->pdev, "the pdev input does not match the pf!\n");
+ put_device(dev);
return -EINVAL;
}
*fun_index = pdev->devfn;
+ put_device(dev);
return 0;
}
--
2.39.5 (Apple Git-154)
dclk_vop2_src currently has CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT
flags set, which is vastly different than dclk_vop0_src or dclk_vop1_src,
which have none of those.
With these flags in dclk_vop2_src, actually setting the clock then results
in a lot of other peripherals breaking, because setting the rate results
in the PLL source getting changed:
[ 14.898718] clk_core_set_rate_nolock: setting rate for dclk_vop2 to 152840000
[ 15.155017] clk_change_rate: setting rate for pll_gpll to 1680000000
[ clk adjusting every gpll user ]
This includes possibly the other vops, i2s, spdif and even the uarts.
Among other possible things, this breaks the uart console on a board
I use. Sometimes it recovers later on, but there will be a big block
of garbled output for a while at least.
Shared PLLs should not be changed by individual users, so drop these
flags from dclk_vop2_src and make the flags the same as on dclk_vop0
and dclk_vop1.
Fixes: f1c506d152ff ("clk: rockchip: add clock controller for the RK3588")
Cc: stable(a)vger.kernel.org
Signed-off-by: Heiko Stuebner <heiko(a)sntech.de>
---
drivers/clk/rockchip/clk-rk3588.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index 1694223f4f84..cf83242d1726 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -2094,7 +2094,7 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
COMPOSITE(DCLK_VOP1_SRC, "dclk_vop1_src", gpll_cpll_v0pll_aupll_p, 0,
RK3588_CLKSEL_CON(111), 14, 2, MFLAGS, 9, 5, DFLAGS,
RK3588_CLKGATE_CON(52), 11, GFLAGS),
- COMPOSITE(DCLK_VOP2_SRC, "dclk_vop2_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+ COMPOSITE(DCLK_VOP2_SRC, "dclk_vop2_src", gpll_cpll_v0pll_aupll_p, 0,
RK3588_CLKSEL_CON(112), 5, 2, MFLAGS, 0, 5, DFLAGS,
RK3588_CLKGATE_CON(52), 12, GFLAGS),
COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p,
--
2.47.2
From: Sven Eckelmann <sven(a)narfation.org>
Trying to dump the originators or the neighbors via netlink for a meshif
with an inactive primary interface is not allowed. The dump functions were
checking this correctly but they didn't handle non-existing primary
interfaces and existing _inactive_ interfaces differently.
(Primary) batadv_hard_ifaces hold a references to a net_device. And
accessing them is only allowed when either being in a RCU/spinlock
protected section or when holding a valid reference to them. The netlink
dump functions use the latter.
But because the missing specific error handling for inactive primary
interfaces, the reference was never dropped. This reference counting error
was only detected when the interface should have been removed from the
system:
unregister_netdevice: waiting for batadv_slave_0 to become free. Usage count = 2
Cc: stable(a)vger.kernel.org
Fixes: 6ecc4fd6c2f4 ("batman-adv: netlink: reduce duplicate code by returning interfaces")
Reported-by: syzbot+881d65229ca4f9ae8c84(a)syzkaller.appspotmail.com
Reported-by: Tetsuo Handa <penguin-kernel(a)i-love.sakura.ne.jp>
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
---
net/batman-adv/originator.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index a464ff96b9291..ed89d7fd1e7f4 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -764,11 +764,16 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
bat_priv = netdev_priv(mesh_iface);
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ if (!primary_if) {
ret = -ENOENT;
goto out_put_mesh_iface;
}
+ if (primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out_put_primary_if;
+ }
+
hard_iface = batadv_netlink_get_hardif(bat_priv, cb);
if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) {
ret = PTR_ERR(hard_iface);
@@ -1333,11 +1338,16 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
bat_priv = netdev_priv(mesh_iface);
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ if (!primary_if) {
ret = -ENOENT;
goto out_put_mesh_iface;
}
+ if (primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out_put_primary_if;
+ }
+
hard_iface = batadv_netlink_get_hardif(bat_priv, cb);
if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) {
ret = PTR_ERR(hard_iface);
--
2.47.3
The current code directly overwrites the scratch pointer with the
return value of kvrealloc(). If kvrealloc() fails and returns NULL,
the original buffer becomes unreachable, causing a memory leak.
Fix this by using a temporary variable to store kvrealloc()'s return
value and only update the scratch pointer on success.
Found via static anlaysis and this is similar to commit 42378a9ca553
("bpf, verifier: Fix memory leak in array reallocation for stack state")
Fixes: be17c0df6795 ("riscv: module: Optimize PLT/GOT entry counting")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
arch/riscv/kernel/module-sections.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
index 75551ac6504c..1675cbad8619 100644
--- a/arch/riscv/kernel/module-sections.c
+++ b/arch/riscv/kernel/module-sections.c
@@ -119,6 +119,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned int num_plts = 0;
unsigned int num_gots = 0;
Elf_Rela *scratch = NULL;
+ Elf_Rela *new_scratch;
size_t scratch_size = 0;
int i;
@@ -168,9 +169,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch);
if (scratch_size_needed > scratch_size) {
scratch_size = scratch_size_needed;
- scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
- if (!scratch)
+ new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
+ if (!new_scratch) {
+ kvfree(scratch);
return -ENOMEM;
+ }
+ scratch = new_scratch;
}
for (size_t j = 0; j < num_relas; j++)
--
2.39.5 (Apple Git-154)
From: ZhangGuoDong <zhangguodong(a)kylinos.cn>
[ Upstream commit 6fced056d2cc8d01b326e6fcfabaacb9850b71a4 ]
Memory leak occurs when ksmbd_vfs_read() fails.
Fix this by adding the missing kvfree().
Co-developed-by: ChenXiaoSong <chenxiaosong(a)kylinos.cn>
Signed-off-by: ChenXiaoSong <chenxiaosong(a)kylinos.cn>
Signed-off-by: ZhangGuoDong <zhangguodong(a)kylinos.cn>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
## **Backport Status: YES**
### **Extensive Analysis**
Based on comprehensive semantic code analysis and repository
examination, this commit **should be backported** to stable kernel
trees.
---
## **1. Code Change Analysis**
The commit adds a single line to fix a memory leak in
`fs/smb/server/smb2pdu.c`:
```c
nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
if (nbytes < 0) {
+ kvfree(aux_payload_buf); // <-- Added line
err = nbytes;
goto out;
}
```
**What was wrong:** Memory allocated at line 6821 (`aux_payload_buf =
kvzalloc(ALIGN(length, 8), KSMBD_DEFAULT_GFP)`) was not freed when
`ksmbd_vfs_read()` fails, while all other error paths properly call
`kvfree()`.
---
## **2. Semantic Analysis Tools Used**
### **Tool 1: mcp__semcode__find_function**
- Located `smb2_read()` in `fs/smb/server/smb2pdu.c:6727-6895`
- Confirmed it's an SMB2 protocol handler (169 lines, 24 function calls)
- Return type: `int` (returns error codes)
### **Tool 2: mcp__semcode__find_callers**
- Result: No direct function callers
- However, cross-referenced with `smb2ops.c:183` showing `smb2_read` is
registered as a handler: `[SMB2_READ_HE] = { .proc = smb2_read }`
- **Conclusion:** This is a protocol handler invoked by the SMB2 message
dispatcher, meaning it's **directly user-triggerable** via network
requests
### **Tool 3: mcp__semcode__find_calls**
- Analyzed `ksmbd_vfs_read()` dependencies
- Found it can fail with multiple error codes: `-EISDIR`, `-EACCES`,
`-EAGAIN`, plus any errors from `kernel_read()`
- **All of these failure paths trigger the memory leak**
### **Tool 4: git blame & git log**
- Bug introduced: commit `e2f34481b24db2` (2021-03-16) - **4 years
old!**
- Recent modification: commit `06a025448b572c` (2024-11-30) changed
allocation to `ALIGN(length, 8)` but didn't fix the leak
- Found 15+ similar "memory leak" fixes in ksmbd history, indicating
active maintenance
---
## **3. Impact Scope Analysis**
### **User Exposure: CRITICAL**
- **Protocol Handler:** Any SMB client can trigger this by sending SMB2
READ requests
- **Network-facing:** ksmbd is a kernel SMB server exposed to network
clients
- **No authentication required to trigger:** The error path can be
reached even with permission errors
### **Trigger Conditions (from VFS analysis):**
1. **-EISDIR**: Client tries to read a directory
2. **-EACCES**: Permission denied (no FILE_READ_DATA or FILE_EXECUTE
access)
3. **-EAGAIN**: File is locked by another process
4. **kernel_read() failures**: Various VFS/filesystem errors
All of these are **easily triggerable** by malicious or misbehaving
clients.
### **Memory Leak Severity: HIGH**
- **Allocation size:** `ALIGN(length, 8)` where `length` is client-
controlled
- **Maximum per leak:** Up to `SMB3_MAX_IOSIZE` = **8 MB** (from
smb2pdu.h:28)
- **Default size:** `SMB21_DEFAULT_IOSIZE` = **1 MB** (from
smb2pdu.h:25)
- **Attack scenario:** An attacker could repeatedly:
1. Send READ requests for locked files (triggers -EAGAIN)
2. Each failed request leaks up to 8MB
3. 100 requests = 800MB leaked
4. Can exhaust server memory leading to **DoS**
---
## **4. Regression Risk Analysis**
### **Risk Level: VERY LOW**
- **Change size:** Single line addition
- **Operation:** Adding missing cleanup (defensive programming)
- **No behavior change:** Only affects error path that already returns
failure
- **Idempotent:** `kvfree()` is safe to call and simply frees allocated
memory
- **No dependencies:** No API changes or external impact
---
## **5. Stable Tree Compliance**
| Criterion | Status | Evidence |
|-----------|--------|----------|
| Fixes important bug | ✅ YES | Memory leak leading to potential DoS |
| Small and contained | ✅ YES | 1 line change, single function |
| No new features | ✅ YES | Pure bug fix |
| No architectural changes | ✅ YES | No structural modifications |
| Low regression risk | ✅ YES | Minimal, defensive change |
| Confined to subsystem | ✅ YES | Only affects ksmbd |
| User-facing impact | ✅ YES | Affects all ksmbd users |
---
## **6. Comparison with Similar Commits**
Recent ksmbd fixes in the repository show similar patterns:
- `379510a815cb2`: "fix possible refcount leak in smb2_sess_setup()"
- `5929e98f3bb76`: "fix potential double free on smb2_read_pipe() error
path"
- `e523a26c05672`: "fix passing freed memory 'aux_payload_buf'"
- `809d50d36e71b`: "fix memory leak in smb2_lock()"
**All these are typical stable tree backport candidates** with similar
characteristics (small resource leak fixes).
---
## **7. Missing Stable Tags**
**Notable:** The commit lacks explicit stable tree tags:
- ❌ No `Cc: stable(a)vger.kernel.org`
- ❌ No `Fixes:` tag pointing to the original buggy commit
However, this doesn't diminish backport worthiness - the technical
merits clearly justify backporting.
---
## **Conclusion**
This commit is an **excellent candidate for backporting** because:
1. **Security concern:** Remote memory exhaustion DoS vector
2. **Long-standing bug:** Present since ksmbd's introduction (4+ years)
3. **Wide impact:** Affects all ksmbd deployments
4. **Minimal risk:** Single-line defensive fix
5. **Clear bug fix:** Unambiguous resource leak on error path
6. **Pattern consistency:** Matches other successfully backported ksmbd
fixes
**Recommendation:** Backport to all active stable trees where ksmbd
exists (5.15+).
fs/smb/server/smb2pdu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 287200d7c0764..409b85af82e1c 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -6826,6 +6826,7 @@ int smb2_read(struct ksmbd_work *work)
nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
if (nbytes < 0) {
+ kvfree(aux_payload_buf);
err = nbytes;
goto out;
}
--
2.51.0
From: Dave Vasilevsky <dave(a)vasilevsky.ca>
On 32-bit book3s with hash-MMUs, tlb_flush() was a no-op. This was
unnoticed because all uses until recently were for unmaps, and thus
handled by __tlb_remove_tlb_entry().
After commit 4a18419f71cd ("mm/mprotect: use mmu_gather") in kernel 5.19,
tlb_gather_mmu() started being used for mprotect as well. This caused
mprotect to simply not work on these machines:
int *ptr = mmap(NULL, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
*ptr = 1; // force HPTE to be created
mprotect(ptr, 4096, PROT_READ);
*ptr = 2; // should segfault, but succeeds
Fixed by making tlb_flush() actually flush TLB pages. This finally
agrees with the behaviour of boot3s64's tlb_flush().
Fixes: 4a18419f71cd ("mm/mprotect: use mmu_gather")
Signed-off-by: Dave Vasilevsky <dave(a)vasilevsky.ca>
---
arch/powerpc/include/asm/book3s/32/tlbflush.h | 8 ++++++--
arch/powerpc/mm/book3s32/tlb.c | 6 ++++++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index e43534da5207aa3b0cb3c07b78e29b833c141f3f..b8c587ad2ea954f179246a57d6e86e45e91dcfdc 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -11,6 +11,7 @@
void hash__flush_tlb_mm(struct mm_struct *mm);
void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+void hash__flush_gather(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
void _tlbie(unsigned long address);
@@ -28,9 +29,12 @@ void _tlbia(void);
*/
static inline void tlb_flush(struct mmu_gather *tlb)
{
- /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
- if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ hash__flush_gather(tlb);
+ } else {
+ /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
_tlbia();
+ }
}
static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
index 9ad6b56bfec96e989b96f027d075ad5812500854..3da95ecfbbb296303082e378425e92a5fbdbfac8 100644
--- a/arch/powerpc/mm/book3s32/tlb.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -105,3 +105,9 @@ void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
}
EXPORT_SYMBOL(hash__flush_tlb_page);
+
+void hash__flush_gather(struct mmu_gather *tlb)
+{
+ hash__flush_range(tlb->mm, tlb->start, tlb->end);
+}
+EXPORT_SYMBOL(hash__flush_gather);
---
base-commit: dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa
change-id: 20251027-vasi-mprotect-g3-f8f5278d4140
Best regards,
--
Dave Vasilevsky <dave(a)vasilevsky.ca>
Fix the order of the freq-table-hz property, then convert to OPP tables
and add interconnect support for UFS for the SM6350 SoC.
Signed-off-by: Luca Weiss <luca.weiss(a)fairphone.com>
---
Changes in v3:
- Actually pick up tags, sorry about that
- Link to v2: https://lore.kernel.org/r/20251023-sm6350-ufs-things-v2-0-799d59178713@fair…
Changes in v2:
- Resend, pick up tags
- Link to v1: https://lore.kernel.org/r/20250314-sm6350-ufs-things-v1-0-3600362cc52c@fair…
---
Luca Weiss (3):
arm64: dts: qcom: sm6350: Fix wrong order of freq-table-hz for UFS
arm64: dts: qcom: sm6350: Add OPP table support to UFSHC
arm64: dts: qcom: sm6350: Add interconnect support to UFS
arch/arm64/boot/dts/qcom/sm6350.dtsi | 49 ++++++++++++++++++++++++++++--------
1 file changed, 39 insertions(+), 10 deletions(-)
---
base-commit: a92c761bcac3d5042559107fa7679470727a4bcb
change-id: 20250314-sm6350-ufs-things-53c5de9fec5e
Best regards,
--
Luca Weiss <luca.weiss(a)fairphone.com>
The patch titled
Subject: mm/truncate: unmap large folio on split failure
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-truncate-unmap-large-folio-on-split-failure.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kiryl Shutsemau <kas(a)kernel.org>
Subject: mm/truncate: unmap large folio on split failure
Date: Mon, 27 Oct 2025 11:56:36 +0000
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
This behavior might not be respected on truncation.
During truncation, the kernel splits a large folio in order to reclaim
memory. As a side effect, it unmaps the folio and destroys PMD mappings
of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.
However, if the split fails, PMD mappings are preserved and the user will
not receive SIGBUS on any accesses within the PMD.
Unmap the folio on split failure. It will lead to refault as PTEs and
preserve SIGBUS semantics.
Make an exception for shmem/tmpfs that for long time intentionally mapped
with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/truncate.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)
--- a/mm/truncate.c~mm-truncate-unmap-large-folio-on-split-failure
+++ a/mm/truncate.c
@@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_
return 0;
}
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at,
+ unsigned long min_order)
+{
+ enum ttu_flags ttu_flags =
+ TTU_SYNC |
+ TTU_SPLIT_HUGE_PMD |
+ TTU_IGNORE_MLOCK;
+ int ret;
+
+ ret = try_folio_split_to_order(folio, split_at, min_order);
+
+ /*
+ * If the split fails, unmap the folio, so it will be refaulted
+ * with PTEs to respect SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if (ret && !shmem_mapping(folio->mapping)) {
+ try_to_unmap(folio, ttu_flags);
+ WARN_ON(folio_mapped(folio));
+ }
+
+ return ret;
+}
+
/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
@@ -226,7 +252,7 @@ bool truncate_inode_partial_folio(struct
min_order = mapping_min_folio_order(folio->mapping);
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- if (!try_folio_split_to_order(folio, split_at, min_order)) {
+ if (!try_folio_split_or_unmap(folio, split_at, min_order)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
@@ -250,13 +276,10 @@ bool truncate_inode_partial_folio(struct
if (!folio_trylock(folio2))
goto out;
- /*
- * make sure folio2 is large and does not change its mapping.
- * Its split result does not matter here.
- */
+ /* make sure folio2 is large and does not change its mapping */
if (folio_test_large(folio2) &&
folio2->mapping == folio->mapping)
- try_folio_split_to_order(folio2, split_at2, min_order);
+ try_folio_split_or_unmap(folio2, split_at2, min_order);
folio_unlock(folio2);
out:
_
Patches currently in -mm which might be from kas(a)kernel.org are
mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
mm-truncate-unmap-large-folio-on-split-failure.patch
The patch titled
Subject: mm/memory: do not populate page table entries beyond i_size
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kiryl Shutsemau <kas(a)kernel.org>
Subject: mm/memory: do not populate page table entries beyond i_size
Date: Mon, 27 Oct 2025 11:56:35 +0000
Patch series "Fix SIGBUS semantics with large folios", v3.
Accessing memory within a VMA, but beyond i_size rounded up to the next
page size, is supposed to generate SIGBUS.
Darrick reported[1] an xfstests regression in v6.18-rc1. generic/749
failed due to missing SIGBUS. This was caused by my recent changes that
try to fault in the whole folio where possible:
19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
357b92761d94 ("mm/filemap: map entire large folio faultaround")
These changes did not consider i_size when setting up PTEs, leading to
xfstest breakage.
However, the problem has been present in the kernel for a long time -
since huge tmpfs was introduced in 2016. The kernel happily maps
PMD-sized folios as PMD without checking i_size. And huge=always tmpfs
allocates PMD-size folios on any writes.
I considered this corner case when I implemented a large tmpfs, and my
conclusion was that no one in their right mind should rely on receiving a
SIGBUS signal when accessing beyond i_size. I cannot imagine how it could
be useful for the workload.
But apparently filesystem folks care a lot about preserving strict SIGBUS
semantics.
Generic/749 was introduced last year with reference to POSIX, but no real
workloads were mentioned. It also acknowledged the tmpfs deviation from
the test case.
POSIX indeed says[3]:
References within the address range starting at pa and
continuing for len bytes to whole pages following the end of an
object shall result in delivery of a SIGBUS signal.
The patchset fixes the regression introduced by recent changes as well as
more subtle SIGBUS breakage due to split failure on truncation.
This patch (of 2):
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
Recent changes attempted to fault in full folio where possible. They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.
Darrick reported generic/749 breakage because of this.
However, the problem existed before the recent changes. With huge=always
tmpfs, any write to a file leads to PMD-size allocation. Following the
fault-in of the folio will install PMD mapping regardless of i_size.
Fix filemap_map_pages() and finish_fault() to not install:
- PTEs beyond i_size;
- PMD mappings across i_size;
Make an exception for shmem/tmpfs that for long time intentionally
mapped with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Fixes: 19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
Fixes: 357b92761d94 ("mm/filemap: map entire large folio faultaround")
Fixes: 01c70267053d ("fs: add a filesystem flag for THPs")
Reported-by: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/filemap.c | 28 ++++++++++++++++++++--------
mm/memory.c | 20 +++++++++++++++++++-
2 files changed, 39 insertions(+), 9 deletions(-)
--- a/mm/filemap.c~mm-memory-do-not-populate-page-table-entries-beyond-i_size
+++ a/mm/filemap.c
@@ -3681,7 +3681,8 @@ skip:
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
- unsigned long *rss, unsigned short *mmap_miss)
+ unsigned long *rss, unsigned short *mmap_miss,
+ bool can_map_large)
{
unsigned int ref_from_caller = 1;
vm_fault_t ret = 0;
@@ -3696,7 +3697,7 @@ static vm_fault_t filemap_map_folio_rang
* The folio must not cross VMA or page table boundary.
*/
addr0 = addr - start * PAGE_SIZE;
- if (folio_within_vma(folio, vmf->vma) &&
+ if (can_map_large && folio_within_vma(folio, vmf->vma) &&
(addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
vmf->pte -= start;
page -= start;
@@ -3811,13 +3812,27 @@ vm_fault_t filemap_map_pages(struct vm_f
unsigned long rss = 0;
unsigned int nr_pages = 0, folio_type;
unsigned short mmap_miss = 0, mmap_miss_saved;
+ bool can_map_large;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
if (!folio)
goto out;
- if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+ end_pgoff = min(end_pgoff, file_end);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ can_map_large = shmem_mapping(mapping) ||
+ file_end >= folio_next_index(folio);
+
+ if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3830,10 +3845,6 @@ vm_fault_t filemap_map_pages(struct vm_f
goto out;
}
- file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
- if (end_pgoff > file_end)
- end_pgoff = file_end;
-
folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3850,7 +3861,8 @@ vm_fault_t filemap_map_pages(struct vm_f
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &rss, &mmap_miss);
+ nr_pages, &rss, &mmap_miss,
+ can_map_large);
folio_unlock(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
--- a/mm/memory.c~mm-memory-do-not-populate-page-table-entries-beyond-i_size
+++ a/mm/memory.c
@@ -65,6 +65,7 @@
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
+#include <linux/shmem_fs.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
@@ -5501,8 +5502,25 @@ fallback:
return ret;
}
+ if (!needs_fallback && vma->vm_file) {
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t file_end;
+
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ needs_fallback = !shmem_mapping(mapping) &&
+ file_end < folio_next_index(folio);
+ }
+
if (pmd_none(*vmf->pmd)) {
- if (folio_test_pmd_mappable(folio)) {
+ if (!needs_fallback && folio_test_pmd_mappable(folio)) {
ret = do_set_pmd(vmf, folio, page);
if (ret != VM_FAULT_FALLBACK)
return ret;
_
Patches currently in -mm which might be from kas(a)kernel.org are
mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
mm-truncate-unmap-large-folio-on-split-failure.patch
The function calls of_parse_phandle() to get a
device node, which increments the reference count of the node. However,
the function fails to call of_node_put() to decrement the reference.
This leads to a reference count leak.
This is found by static analysis and similar to the commit a508e33956b5
("ipmi:ipmb: Fix refcount leak in ipmi_ipmb_probe")
Fixes: 423de5b5bc5b ("thermal/of: Fix cdev lookup in thermal_of_should_bind()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/thermal/thermal_of.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 1a51a4d240ff..932291648683 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -284,8 +284,12 @@ static bool thermal_of_cm_lookup(struct device_node *cm_np,
int count, i;
tr_np = of_parse_phandle(child, "trip", 0);
- if (tr_np != trip->priv)
+ if (tr_np != trip->priv) {
+ of_node_put(tr_np);
continue;
+ }
+
+ of_node_put(tr_np);
/* The trip has been found, look up the cdev. */
count = of_count_phandle_with_args(child, "cooling-device",
--
2.39.5 (Apple Git-154)
From: Emanuele Ghidoli <emanuele.ghidoli(a)toradex.com>
While the DP83867 PHYs report EEE capability through their feature
registers, the actual hardware does not support EEE (see Links).
When the connected MAC enables EEE, it causes link instability and
communication failures.
The issue is reproducible with a iMX8MP and relevant stmmac ethernet port.
Since the introduction of phylink-managed EEE support in the stmmac driver,
EEE is now enabled by default, leading to issues on systems using the
DP83867 PHY.
Call phy_disable_eee during phy initialization to prevent EEE from being
enabled on DP83867 PHYs.
Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/1445…
Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/6586…
Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy")
Cc: stable(a)vger.kernel.org
Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli(a)toradex.com>
---
drivers/net/phy/dp83867.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index deeefb962566..36a0c1b7f59c 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -738,6 +738,12 @@ static int dp83867_config_init(struct phy_device *phydev)
return ret;
}
+ /* Although the DP83867 reports EEE capability through the
+ * MDIO_PCS_EEE_ABLE and MDIO_AN_EEE_ADV registers, the feature
+ * is not actually implemented in hardware.
+ */
+ phy_disable_eee(phydev);
+
if (phy_interface_is_rgmii(phydev) ||
phydev->interface == PHY_INTERFACE_MODE_SGMII) {
val = phy_read(phydev, MII_DP83867_PHYCTRL);
--
2.43.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x daeb4037adf7d3349b4a1fb792f4bc9824686a4b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102701-scabby-entrust-a162@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From daeb4037adf7d3349b4a1fb792f4bc9824686a4b Mon Sep 17 00:00:00 2001
From: Artem Shimko <a.shimko.dev(a)gmail.com>
Date: Sun, 19 Oct 2025 12:51:31 +0300
Subject: [PATCH] serial: 8250_dw: handle reset control deassert error
Check the return value of reset_control_deassert() in the probe
function to prevent continuing probe when reset deassertion fails.
Previously, reset_control_deassert() was called without checking its
return value, which could lead to probe continuing even when the
device reset wasn't properly deasserted.
The fix checks the return value and returns an error with dev_err_probe()
if reset deassertion fails, providing better error handling and
diagnostics.
Fixes: acbdad8dd1ab ("serial: 8250_dw: simplify optional reset handling")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Artem Shimko <a.shimko.dev(a)gmail.com>
Link: https://patch.msgid.link/20251019095131.252848-1-a.shimko.dev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index a53ba04d9770..710ae4d40aec 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -635,7 +635,9 @@ static int dw8250_probe(struct platform_device *pdev)
if (IS_ERR(data->rst))
return PTR_ERR(data->rst);
- reset_control_deassert(data->rst);
+ err = reset_control_deassert(data->rst);
+ if (err)
+ return dev_err_probe(dev, err, "failed to deassert resets\n");
err = devm_add_action_or_reset(dev, dw8250_reset_control_assert, data->rst);
if (err)