Some DAMON sysfs directory setup functions generates its sub and sub-sub
directories. For example, 'monitoring_attrs/' directory setup creates
'intervals/' and 'intervals/intervals_goal/' directories under
'monitoring_attrs/' directory. When such sub-sub directories are
successfully made but followup setup is failed, the setup function
should recursively clean up the subdirectories.
However, such setup functions are only dereferencing sub directory
reference counters. As a result, under certain setup failures, the
sub-sub directories keep having non-zero reference counters. It means
the directories cannot be removed like zombies, and the memory for the
directories cannot be freed.
The user impact of this issue is limited due to the following reasons.
When the issue happens, the zombie directories are still taking the
path. Hence attempts to generate the directories again will fail,
without additional memory leak. This means the upper bound memory leak
is limited. Nonetheless this also implies controlling DAMON with a
feature that requires the setup-failed sysfs files will be impossible
until the system reboots.
Also, the setup operations are quite simple. The certain failures would
hence only rarely happen, and are difficult to artificially trigger.
SeongJae Park (4):
mm/damon/sysfs: cleanup intervals subdirs on attrs dir setup failure
mm/damon/sysfs: cleanup attrs subdirs on context dir setup failure
mm/damon/sysfs-scheme: cleanup quotas subdirs on scheme dir setup
failure
mm/damon/sysfs-scheme: cleanup access_pattern subdirs on scheme dir
setup failure
mm/damon/sysfs-schemes.c | 10 ++++++----
mm/damon/sysfs.c | 9 ++++++---
2 files changed, 12 insertions(+), 7 deletions(-)
base-commit: 6d039da6a260dd7919bebc70ebb65d250bb9c24e
--
2.47.3
When we fail to refill the receive buffers, we schedule a delayed worker
to retry later. However, this worker creates some concurrency issues
such as races and deadlocks. To simplify the logic and avoid further
problems, we will instead retry refilling in the next NAPI poll.
Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx")
Reported-by: Paolo Abeni <pabeni(a)redhat.com>
Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/drv-hw-dbg/results/400961/3-…
Cc: stable(a)vger.kernel.org
Suggested-by: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Signed-off-by: Bui Quang Minh <minhquangbui99(a)gmail.com>
---
drivers/net/virtio_net.c | 55 ++++++++++++++++++++++------------------
1 file changed, 30 insertions(+), 25 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1bb3aeca66c6..ac514c9383ae 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3035,7 +3035,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi,
}
static int virtnet_receive(struct receive_queue *rq, int budget,
- unsigned int *xdp_xmit)
+ unsigned int *xdp_xmit, bool *retry_refill)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
struct virtnet_rq_stats stats = {};
@@ -3047,12 +3047,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
- spin_lock(&vi->refill_lock);
- if (vi->refill_enabled)
- schedule_delayed_work(&vi->refill, 0);
- spin_unlock(&vi->refill_lock);
- }
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+ *retry_refill = true;
}
u64_stats_set(&stats.packets, packets);
@@ -3129,18 +3125,18 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct send_queue *sq;
unsigned int received;
unsigned int xdp_xmit = 0;
- bool napi_complete;
+ bool napi_complete, retry_refill = false;
virtnet_poll_cleantx(rq, budget);
- received = virtnet_receive(rq, budget, &xdp_xmit);
+ received = virtnet_receive(rq, budget, &xdp_xmit, &retry_refill);
rq->packets_in_napi += received;
if (xdp_xmit & VIRTIO_XDP_REDIR)
xdp_do_flush();
/* Out of packets? */
- if (received < budget) {
+ if (received < budget && !retry_refill) {
napi_complete = virtqueue_napi_complete(napi, rq->vq, received);
/* Intentionally not taking dim_lock here. This may result in a
* spurious net_dim call. But if that happens virtnet_rx_dim_work
@@ -3160,7 +3156,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
virtnet_xdp_put_sq(vi, sq);
}
- return received;
+ return retry_refill ? budget : received;
}
static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
@@ -3230,9 +3226,11 @@ static int virtnet_open(struct net_device *dev)
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
- /* Make sure we have some buffers: if oom use wq. */
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
+ /* If this fails, we will retry later in
+ * NAPI poll, which is scheduled in the below
+ * virtnet_enable_queue_pair
+ */
+ try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);
err = virtnet_enable_queue_pair(vi, i);
if (err < 0)
@@ -3473,15 +3471,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi,
bool refill)
{
bool running = netif_running(vi->dev);
- bool schedule_refill = false;
- if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
- schedule_refill = true;
+ if (refill)
+ /* If this fails, we will retry later in NAPI poll, which is
+ * scheduled in the below virtnet_napi_enable
+ */
+ try_fill_recv(vi, rq, GFP_KERNEL);
+
if (running)
virtnet_napi_enable(rq);
-
- if (schedule_refill)
- schedule_delayed_work(&vi->refill, 0);
}
static void virtnet_rx_resume_all(struct virtnet_info *vi)
@@ -3777,6 +3775,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
struct virtio_net_rss_config_trailer old_rss_trailer;
struct net_device *dev = vi->dev;
struct scatterlist sg;
+ int i;
if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
return 0;
@@ -3829,11 +3828,17 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
}
succ:
vi->curr_queue_pairs = queue_pairs;
- /* virtnet_open() will refill when device is going to up. */
- spin_lock_bh(&vi->refill_lock);
- if (dev->flags & IFF_UP && vi->refill_enabled)
- schedule_delayed_work(&vi->refill, 0);
- spin_unlock_bh(&vi->refill_lock);
+ if (dev->flags & IFF_UP) {
+ /* Let the NAPI poll refill the receive buffer for us. We can't
+ * safely call try_fill_recv() here because the NAPI might be
+ * enabled already.
+ */
+ local_bh_disable();
+ for (i = 0; i < vi->curr_queue_pairs; i++)
+ virtqueue_napi_schedule(&vi->rq[i].napi, vi->rq[i].vq);
+
+ local_bh_enable();
+ }
return 0;
}
--
2.43.0
On Fri, 2 Jan 2026, at 3:52 PM, Icenowy Zheng wrote:
> Currently the LS7A GMAC device tree node lacks a proper phy-handle
> property pointing to the PHY node.
>
> In addition, the phy-mode property specifies "rgmii" without any
> internal delay information, which means the board trace needs to add 2ns
> delay to the RGMII data lines; but that isn't known to happen on any
> Loongson board. The ACPI-based initialization codepath, which is used on
> LoongArch-based 3A5000 + 7A1000 hardwares, specifies "rgmii-id" phy
> mode, which should be the one we are using.
>
> Add the lacking phy-handle property and set proper phy-mode.
>
> Tested on a LS3A4000_7A1000_NUC_BOARD_V2.1 board with YT8521S PHY.
>
> Signed-off-by: Icenowy Zheng <zhengxingda(a)iscas.ac.cn>
Good catch! This with fine with realtek phy chips but YT8521S seems
to be picky.
Reviewed-by: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Also maybe:
Cc: stable(a)vger.kernel.org
Given those boards rely on built-in DT.
Thanks
> ---
> arch/mips/boot/dts/loongson/ls7a-pch.dtsi | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
> b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
> index ee71045883e7e..6dee85909f5a6 100644
> --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
> +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
> @@ -199,7 +199,8 @@ gmac@3,0 {
> <13 IRQ_TYPE_LEVEL_HIGH>;
> interrupt-names = "macirq", "eth_lpi";
> interrupt-parent = <&pic>;
> - phy-mode = "rgmii";
> + phy-mode = "rgmii-id";
> + phy-handle = <&phy0>;
> mdio {
> #address-cells = <1>;
> #size-cells = <0>;
> @@ -222,7 +223,8 @@ gmac@3,1 {
> <15 IRQ_TYPE_LEVEL_HIGH>;
> interrupt-names = "macirq", "eth_lpi";
> interrupt-parent = <&pic>;
> - phy-mode = "rgmii";
> + phy-mode = "rgmii-id";
> + phy-handle = <&phy1>;
> mdio {
> #address-cells = <1>;
> #size-cells = <0>;
> --
> 2.52.0
--
- Jiaxun
Synopsys renamed DWC_usb32 IP to DWC_usb4 as of IP version 1.30. No
functional change except checking for the IP_NAME here. The driver will
treat the new IP_NAME as if it's DWC_usb32. Additional features for USB4
will be introduced and checked separately.
Cc: stable(a)vger.kernel.org
Signed-off-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
---
drivers/usb/dwc3/core.c | 2 ++
drivers/usb/dwc3/core.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 96f85eada047..f71b75465a60 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -993,6 +993,8 @@ static bool dwc3_core_is_valid(struct dwc3 *dwc)
reg = dwc3_readl(dwc->regs, DWC3_GSNPSID);
dwc->ip = DWC3_GSNPS_ID(reg);
+ if (dwc->ip == DWC4_IP)
+ dwc->ip = DWC32_IP;
/* This should read as U3 followed by revision number */
if (DWC3_IP_IS(DWC3)) {
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index a5fc92c4ffa3..45757169b672 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1265,6 +1265,7 @@ struct dwc3 {
#define DWC3_IP 0x5533
#define DWC31_IP 0x3331
#define DWC32_IP 0x3332
+#define DWC4_IP 0x3430
u32 revision;
base-commit: 18514fd70ea4ca9de137bb3bceeac1bac4bcad75
--
2.28.0
Hi,
syzbot reported a circular locking dependency in the NET/ROM routing
code involving nr_neigh_list_lock, nr_node_list_lock and
nr_node->node_lock when nr_rt_device_down() interacts with the
ioctl path. This series fixes that deadlock and also addresses a
long-standing reference count leak found while auditing the same
code.
Patch 1/2 refactors nr_rt_device_down() to avoid nested locking
between nr_neigh_list_lock and nr_node_list_lock by doing two
separate passes over nodes and neighbours, and adjusts nr_rt_free()
to follow the same lock ordering.
Patch 2/2 fixes a per-route reference count leak by dropping
nr_neigh->count and calling nr_neigh_put() when removing routes
from nr_rt_device_down(), mirroring the behaviour of
nr_dec_obs()/nr_del_node().
[1] https://syzkaller.appspot.com/bug?extid=14afda08dc3484d5db82
Thanks,
Junjie
Hi stable maintainers,
I have tried backporting some fixes to stable kernel 6.12.y which also
have CVE numbers and are fixing commits in 6.12.y.
I am not a subsystem expert and have only done overall testing that we
do for stable release candidate testing and not any patch specific testing.
Note: All these patches are present backports from upstream.
PATCH 1: The broken commit is in 6.12.y, and the fix is a clean
cherry-pick and addresses CVE-2025-68206
PATCH 2: The broken commit is present in 6.12.y and the fix is a clean
cherry-pick and addresses CVE-2025-40325.
PATCH 3: The broken commit is present in 6.12.y and backport needed a
minor conflict resolution due to missing commit fe69a3918084
("drm/panthor: Fix UAF in panthor_gem_create_with_handle() debugfs
in 6.12.y
PATCH 4,5,6: Patch 4 and 5 are pulled in as prerequisites for PATCH 6 which
is a fix for CVE-2025-40170 and needed a minor conflict resolution due to missing
commit: 22d6c9eebf2e ("net: Unexport shared functions for DCCP.") in 6.12.y
PATCH 7: The broken commit in present in 6.12.y and the backport of the
fix needed a minor conflict resolution due to missing commit in 6.12.y.
This is fix for CVE-2025-40164.
Please let me know if there are any comments.
Regards,
Harshit
Andrii Melnychenko (1):
netfilter: nft_ct: add seqadj extension for natted connections
Boris Brezillon (1):
drm/panthor: Flush shmem writes before mapping buffers CPU-uncached
Eric Dumazet (2):
ipv6: adopt dst_dev() helper
net: use dst_dev_rcu() in sk_setup_caps()
Justin Iurman (1):
net: ipv6: ioam6: use consistent dst names
Xiao Ni (1):
md/raid10: wait barrier before returning discard request with
REQ_NOWAIT
Zqiang (1):
usbnet: Fix using smp_processor_id() in preemptible code warnings
drivers/gpu/drm/panthor/panthor_gem.c | 18 +++++++++++++
drivers/md/raid10.c | 3 +--
drivers/net/usb/usbnet.c | 2 ++
include/net/ip.h | 6 +++--
include/net/ip6_route.h | 4 +--
include/net/route.h | 2 +-
net/core/sock.c | 16 +++++++-----
net/ipv6/exthdrs.c | 2 +-
net/ipv6/icmp.c | 4 ++-
net/ipv6/ila/ila_lwt.c | 2 +-
net/ipv6/ioam6_iptunnel.c | 37 ++++++++++++++-------------
net/ipv6/ip6_gre.c | 8 +++---
net/ipv6/ip6_output.c | 19 +++++++-------
net/ipv6/ip6_tunnel.c | 4 +--
net/ipv6/ip6_udp_tunnel.c | 2 +-
net/ipv6/ip6_vti.c | 2 +-
net/ipv6/ndisc.c | 6 +++--
net/ipv6/netfilter/nf_dup_ipv6.c | 2 +-
net/ipv6/output_core.c | 2 +-
net/ipv6/route.c | 20 +++++++++------
net/ipv6/rpl_iptunnel.c | 4 +--
net/ipv6/seg6_iptunnel.c | 20 ++++++++-------
net/ipv6/seg6_local.c | 2 +-
net/netfilter/nft_ct.c | 5 ++++
24 files changed, 118 insertions(+), 74 deletions(-)
--
2.50.1
On x86_64:
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>" we observe a pafe fault that happens.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) – not-present page
This happens on x86_64 only, as this is already fixed in aarch64 in
commit: cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds")
V1: https://lore.kernel.org/all/20251112193005.3772542-1-harshit.m.mogalapalli@…
V1 attempted to do a similar sanity check in x86_64. Borislav suggested
to add a generic helper ima_validate_range() which could then be used
for both OF based and x86_64.
Testing information:
--------------------
On x86_64: With latest 6.19-rc2 based, we could reproduce the issue, and
patched kernel works fine. (with mem=8G on a 16G memory machine)
Thanks to Yifei for finding enabling IMA_KEXEC is the cause.
Thanks for the reviews on V1.
V1 -> V2:
- Patch 1: Add a generic helper "ima_validate_range()"
- Patch 2: Use this new helper in drivers/of/kexec.c -> No functional
change.
- Patch 3: Fix the page fault by doing sanity check with
"ima_validate_range()"
V2: https://lore.kernel.org/all/20251229081523.622515-1-harshit.m.mogalapalli@o…
V2 -> V3:
Update subject of Patch 1 to more appropriate one (Suggested by Mimi
Zohar)
Thanks,
Harshit
Harshit Mogalapalli (3):
ima: verify the previous kernel's IMA buffer lies in addressable RAM
of/kexec: refactor ima_get_kexec_buffer() to use ima_validate_range()
x86/kexec: Add a sanity check on previous kernel's ima kexec buffer
arch/x86/kernel/setup.c | 6 +++++
drivers/of/kexec.c | 15 +++----------
include/linux/ima.h | 1 +
security/integrity/ima/ima_kexec.c | 35 ++++++++++++++++++++++++++++++
4 files changed, 45 insertions(+), 12 deletions(-)
--
2.50.1
Hi all,
Roland Schwarzkopf reported to the Debian mailing list a problem which
he encountered once updating in Debian from 5.10.244 to 5.10.247. The
report is quoted below and found in
https://lists.debian.org/debian-kernel/2025/12/msg00223.html
Roland did bisect the changes between 5.10.244 to 5.10.247 and found
that the issue is introduced with 1550f3673972 ("net: rtnetlink: add
bulk delete support flag") which is the backport to the 5.10.y series.
On Thu, Dec 18, 2025 at 02:59:55PM +0100, Roland Schwarzkopf wrote:
> Hi Salvatore,
>
> On 12/17/25 20:28, Salvatore Bonaccorso wrote:
> > Hi Roland,
> >
> > I'm CC'ing Ben Hutchings directly as well as he takes care of the
> > Debian LTS kernel updates. Idellly we make this as well a proper bug
> > for easier tracking.
> >
> > On Wed, Dec 17, 2025 at 01:35:54PM +0100, Roland Schwarzkopf wrote:
> > > Hi there,
> > >
> > > after upgrading to the latest kernel on Debian 11
> > > (linux-image-5.10.0-37-amd64) I have an issue using libvirt with qemu/kvm
> > > virtual machines and macvtap networking. When a machine is shut down,
> > > libvirt can not delete the corresponding macvtap device. Thus, starting the
> > > machine again is not possible. After manually removing the macvtap device
> > > using `ip link delete` the vm can be started again.
> > >
> > > In the journal the following message is shown:
> > >
> > > Dec 17 13:19:27 iblis libvirtd[535]: error destroying network device macvtap0: Operation not supported
> > >
> > > After downgrading the kernel to linux-image-5.10.0-36-amd64, the problem
> > > disappears. I tested this on a fresh minimal install of Debian 11 - to
> > > exclude that anything else on my production machines is causing this issue.
> > >
> > > Since the older kernel does not have this issue, I assume this is related to
> > > the kernel and not to libvirt?
> > >
> > > I tried to check for bug reports of the kernel package, but the bug tracker
> > > finds no reports and even states that the package does not exist (I used the
> > > "Bug reports" link on
> > > https://packages.debian.org/bullseye/linux-image-5.10.0-37-amd64). This left
> > > me a bit puzzled. Since I don't have experience with the debian bug
> > > reporting process, I had no other idea than writing to this list.
> > You would need to search for inhttps://bugs.debian.org/src:linux ,
> > but that said I'm not aware of any bug reports in that direction.
> >
> > Would you be in the position of bisecting the problem as you can say
> > that 5.10.244 is good and 5.10.247 is bad and regressed? If you can do
> > that that would involve compiling a couple of kernels to narrow down
> > where the problem is introduced:
> >
> > git clone --single-branch -b linux-5.10.yhttps://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-st…
> > cd linux-stable
> > git checkout v5.10.244
> > cp /boot/config-$(uname -r) .config
> > yes '' | make localmodconfig
> > make savedefconfig
> > mv defconfig arch/x86/configs/my_defconfig
> >
> > # test 5.10.244 to ensure this is "good"
> > make my_defconfig
> > make -j $(nproc) bindeb-pkg
> > ... install the resulting .deb package and confirm it successfully boots / problem does not exist
> >
> > # test 5.10.247 to ensure this is "bad"
> > git checkout v5.10.247
> > make my_defconfig
> > make -j $(nproc) bindeb-pkg
> > ... install the resulting .deb package and confirm it fails to boot / problem exists
> >
> > With that confirmed, the bisection can start:
> >
> > git bisect start
> > git bisect good v5.10.244
> > git bisect bad v5.10.247
> >
> > In each bisection step git checks out a state between the oldest
> > known-bad and the newest known-good commit. In each step test using:
> >
> > make my_defconfig
> > make -j $(nproc) bindeb-pkg
> > ... install, try to boot / verify if problem exists
> >
> > and if the problem is hit run:
> >
> > git bisect bad
> >
> > and if the problem doesn't trigger run:
> >
> > git bisect good
> >
> > . Please pay attention to always select the just built kernel for
> > booting, it won't always be the default kernel picked up by grub.
> >
> > Iterate until git announces to have identified the first bad commit.
> >
> > Then provide the output of
> >
> > git bisect log
> >
> > In the course of the bisection you might have to uninstall previous
> > kernels again to not exhaust the disk space in /boot. Also in the end
> > uninstall all self-built kernels again.
>
> I just did my first bisection \o/ (sorry)
>
> Here are the results:
>
> git bisect start
> # bad: [f964b940099f9982d723d4c77988d4b0dda9c165] Linux 5.10.247
> git bisect bad f964b940099f9982d723d4c77988d4b0dda9c165
> # good: [863b76df7d1e327979946a2d3893479c3275bfa4] Linux 5.10.244
> git bisect good f52ee6ea810273e527a5d319e5f400be8c8424c1
> # good: [dc9fdb7586b90e33c766eac52b6f3d1c9ec365a1] net: usb: lan78xx: Add error handling to lan78xx_init_mac_address
> git bisect good dc9fdb7586b90e33c766eac52b6f3d1c9ec365a1
> # bad: [2272d5757ce5d3fb416d9f2497b015678eb85c0d] phy: cadence: cdns-dphy: Enable lower resolutions in dphy
> git bisect bad 2272d5757ce5d3fb416d9f2497b015678eb85c0d
> # bad: [547539f08b9e3629ce68479889813e58c8087e70] ALSA: usb-audio: fix control pipe direction
> git bisect bad 547539f08b9e3629ce68479889813e58c8087e70
> # bad: [3509c748e79435d09e730673c8c100b7f0ebc87c] most: usb: hdm_probe: Fix calling put_device() before device initialization
> git bisect bad 3509c748e79435d09e730673c8c100b7f0ebc87c
> # bad: [a6ebcafc2f5ff7f0d1ce0c6dc38ac09a16a56ec0] net: add ndo_fdb_del_bulk
> git bisect bad a6ebcafc2f5ff7f0d1ce0c6dc38ac09a16a56ec0
> # good: [b8a72692aa42b7dcd179a96b90bc2763ac74576a] hfsplus: fix KMSAN uninit-value issue in __hfsplus_ext_cache_extent()
> git bisect good b8a72692aa42b7dcd179a96b90bc2763ac74576a
> # good: [2b42a595863556b394bd702d46f4a9d0d2985aaa] m68k: bitops: Fix find_*_bit() signatures
> git bisect good 2b42a595863556b394bd702d46f4a9d0d2985aaa
> # good: [9d9f7d71d46cff3491a443a3cf452cecf87d51ef] net: rtnetlink: use BIT for flag values
> git bisect good 9d9f7d71d46cff3491a443a3cf452cecf87d51ef
> # bad: [1550f3673972c5cfba714135f8bf26784e6f2b0f] net: rtnetlink: add bulk delete support flag
> git bisect bad 1550f3673972c5cfba714135f8bf26784e6f2b0f
> # good: [c8879afa24169e504f78c9ca43a4d0d7397049eb] net: netlink: add NLM_F_BULK delete request modifier
> git bisect good c8879afa24169e504f78c9ca43a4d0d7397049eb
> # first bad commit: [1550f3673972c5cfba714135f8bf26784e6f2b0f] net: rtnetlink: add bulk delete support flag
>
> Is there anything else I can do to help?
Is there soemthing missing?
Roland I think it would be helpful if you can test as well more recent
stable series versions to confirm if the issue is present there as
well or not, which might indicate a 5.10.y specific backporting
problem.
#regzbot introduced: 1550f3673972c5cfba714135f8bf26784e6f2b0f
Regards,
Salvatore
When vm.dirtytime_expire_seconds is set to 0, wakeup_dirtytime_writeback()
schedules delayed work with a delay of 0, causing immediate execution.
The function then reschedules itself with 0 delay again, creating an
infinite busy loop that causes 100% kworker CPU usage.
Fix by:
- Only scheduling delayed work in wakeup_dirtytime_writeback() when
dirtytime_expire_interval is non-zero
- Cancelling the delayed work in dirtytime_interval_handler() when
the interval is set to 0
- Adding a guard in start_dirtytime_writeback() for defensive coding
Tested by booting kernel in QEMU with virtme-ng:
- Before fix: kworker CPU spikes to ~73%
- After fix: CPU remains at normal levels
- Setting interval back to non-zero correctly resumes writeback
Fixes: a2f4870697a5 ("fs: make sure the timestamps for lazytime inodes eventually get written")
Cc: stable(a)vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220227
Signed-off-by: Laveesh Bansal <laveeshb(a)laveeshbansal.com>
---
fs/fs-writeback.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6800886c4d10..cd21c74cd0e5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2492,7 +2492,8 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
wb_wakeup(wb);
}
rcu_read_unlock();
- schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ if (dirtytime_expire_interval)
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}
static int dirtytime_interval_handler(const struct ctl_table *table, int write,
@@ -2501,8 +2502,12 @@ static int dirtytime_interval_handler(const struct ctl_table *table, int write,
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (ret == 0 && write)
- mod_delayed_work(system_percpu_wq, &dirtytime_work, 0);
+ if (ret == 0 && write) {
+ if (dirtytime_expire_interval)
+ mod_delayed_work(system_percpu_wq, &dirtytime_work, 0);
+ else
+ cancel_delayed_work_sync(&dirtytime_work);
+ }
return ret;
}
@@ -2519,7 +2524,8 @@ static const struct ctl_table vm_fs_writeback_table[] = {
static int __init start_dirtytime_writeback(void)
{
- schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ if (dirtytime_expire_interval)
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
register_sysctl_init("vm", vm_fs_writeback_table);
return 0;
}
--
2.43.0
When vm.dirtytime_expire_seconds is set to 0, wakeup_dirtytime_writeback()
schedules delayed work with a delay of 0, causing immediate execution.
The function then reschedules itself with 0 delay again, creating an
infinite busy loop that causes 100% kworker CPU usage.
Fix by:
- Only scheduling delayed work in wakeup_dirtytime_writeback() when
dirtytime_expire_interval is non-zero
- Cancelling the delayed work in dirtytime_interval_handler() when
the interval is set to 0
- Adding a guard in start_dirtytime_writeback() for defensive coding
Tested by booting kernel in QEMU with virtme-ng:
- Before fix: kworker CPU spikes to ~73%
- After fix: CPU remains at normal levels
- Setting interval back to non-zero correctly resumes writeback
Fixes: a2f4870697a5 ("fs: make sure the timestamps for lazytime inodes eventually get written")
Cc: stable(a)vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220227
Signed-off-by: Laveesh Bansal <laveeshb(a)laveeshbansal.com>
---
fs/fs-writeback.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6800886c4d10..cd21c74cd0e5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2492,7 +2492,8 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
wb_wakeup(wb);
}
rcu_read_unlock();
- schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ if (dirtytime_expire_interval)
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}
static int dirtytime_interval_handler(const struct ctl_table *table, int write,
@@ -2501,8 +2502,12 @@ static int dirtytime_interval_handler(const struct ctl_table *table, int write,
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (ret == 0 && write)
- mod_delayed_work(system_percpu_wq, &dirtytime_work, 0);
+ if (ret == 0 && write) {
+ if (dirtytime_expire_interval)
+ mod_delayed_work(system_percpu_wq, &dirtytime_work, 0);
+ else
+ cancel_delayed_work_sync(&dirtytime_work);
+ }
return ret;
}
@@ -2519,7 +2524,8 @@ static const struct ctl_table vm_fs_writeback_table[] = {
static int __init start_dirtytime_writeback(void)
{
- schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ if (dirtytime_expire_interval)
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
register_sysctl_init("vm", vm_fs_writeback_table);
return 0;
}
--
2.43.0
From: Laveesh Bansal <laveeshbansal(a)gmail.com>
When vm.dirtytime_expire_seconds is set to 0, wakeup_dirtytime_writeback()
schedules delayed work with a delay of 0, causing immediate execution.
The function then reschedules itself with 0 delay again, creating an
infinite busy loop that causes 100% kworker CPU usage.
Fix by:
- Only scheduling delayed work in wakeup_dirtytime_writeback() when
dirtytime_expire_interval is non-zero
- Cancelling the delayed work in dirtytime_interval_handler() when
the interval is set to 0
- Adding a guard in start_dirtytime_writeback() for defensive coding
Tested by booting kernel in QEMU with virtme-ng:
- Before fix: kworker CPU spikes to ~73%
- After fix: CPU remains at normal levels
- Setting interval back to non-zero correctly resumes writeback
Fixes: a2f4870697a5 ("fs: make sure the timestamps for lazytime inodes eventually get written")
Cc: stable(a)vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220227
Signed-off-by: Laveesh Bansal <laveeshbansal(a)gmail.com>
---
fs/fs-writeback.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6800886c4d10..cd21c74cd0e5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2492,7 +2492,8 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
wb_wakeup(wb);
}
rcu_read_unlock();
- schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ if (dirtytime_expire_interval)
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}
static int dirtytime_interval_handler(const struct ctl_table *table, int write,
@@ -2501,8 +2502,12 @@ static int dirtytime_interval_handler(const struct ctl_table *table, int write,
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (ret == 0 && write)
- mod_delayed_work(system_percpu_wq, &dirtytime_work, 0);
+ if (ret == 0 && write) {
+ if (dirtytime_expire_interval)
+ mod_delayed_work(system_percpu_wq, &dirtytime_work, 0);
+ else
+ cancel_delayed_work_sync(&dirtytime_work);
+ }
return ret;
}
@@ -2519,7 +2524,8 @@ static const struct ctl_table vm_fs_writeback_table[] = {
static int __init start_dirtytime_writeback(void)
{
- schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ if (dirtytime_expire_interval)
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
register_sysctl_init("vm", vm_fs_writeback_table);
return 0;
}
--
2.43.0
Hi,
I wanted to see if you had time to review the email I sent earlier.
Should you need any other details, don't hesitate to reach out.
Best
Amy Giles
Please reply with REMOVE if you don't wish to receive further emails
-----Original Message-----
Hi,
Hope you're having a great day!
Are you looking for leads from NRF 2026 Retail's Big Show?
Attendees count: 30,000 Leads
Data Fields: Company Name, Web URL, Contact Name, Title, Direct Email, Phone Number, Mailing Address, Industry, Employee Size, Annual Sales.
If you're interested in these leads, I'd be glad to share the pricing. Let me know!
Thanks for the quick reply. I'm excited to get your thoughts.
Best
Amy Giles
Demand Generation Manager
Zoom Connect, Inc
Please reply with REMOVE if you don't wish to receive further emails
fw_token is used for DT/ACPI systems to identify CPUs sharing caches.
For DT based systems, fw_token is set to a pointer to a DT node.
Commit 22def0b492e6 ("arch_topology: Build cacheinfo from primary CPU")
removed clearing of per_cpu_cacheinfo(cpu), which leads to reference
underrun in cache_shared_cpu_map_remove() during repeated cpu
offline/online as the reference is no longer incremented, because
allocate_cache_info() is now skipped in the online path.
The same problem existed on upstream but had a different root cause,
see 2613cc29c572 ("cacheinfo: Remove of_node_put() for fw_token").
Fixes the following splat:
OF: ERROR: Bad of_node_put() on /cpus/l2-cache0
CPU: 3 PID: 29 Comm: cpuhp/3 Tainted: G O 6.1.159-arm64 #1
Hardware name: AXM56xx Victoria (DT)
Call trace:
dump_backtrace+0xd8/0x12c
show_stack+0x1c/0x34
dump_stack_lvl+0x70/0x88
dump_stack+0x14/0x2c
of_node_release+0x134/0x138
kobject_put+0xa8/0x21c
of_node_put+0x1c/0x28
cache_shared_cpu_map_remove+0x19c/0x220
cacheinfo_cpu_pre_down+0x60/0xa0
cpuhp_invoke_callback+0x140/0x570
cpuhp_thread_fun+0xc8/0x19c
smpboot_thread_fn+0x218/0x23c
kthread+0x108/0x118
ret_from_fork+0x10/0x20
Fixes: 22def0b492e6 ("arch_topology: Build cacheinfo from primary CPU")
Signed-off-by: Petr Malat <oss(a)malat.biz>
---
drivers/base/cacheinfo.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 9e11d42b0d64..b57f64725f25 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -404,8 +404,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
}
}
}
- if (of_have_populated_dt())
- of_node_put(this_leaf->fw_token);
}
/* cpu is no longer populated in the shared map */
--
2.39.5
This patch reverts fuse back to its original behavior of sync being a no-op.
This fixes the userspace regression reported by Athul and J. upstream in
[1][2] where if there is a bug in a fuse server that causes the server to
never complete writeback, it will make wait_sb_inodes() wait forever.
Thanks,
Joanne
[1] https://lore.kernel.org/regressions/CAJnrk1ZjQ8W8NzojsvJPRXiv9TuYPNdj8Ye7=C…
[2] https://lore.kernel.org/linux-fsdevel/aT7JRqhUvZvfUQlV@eldamar.lan/
Changelog:
v1: https://lore.kernel.org/linux-mm/20251120184211.2379439-1-joannelkoong@gmai…
* Change AS_WRITEBACK_MAY_HANG to AS_NO_DATA_INTEGRITY and keep
AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM as is.
Joanne Koong (1):
fs/writeback: skip AS_NO_DATA_INTEGRITY mappings in wait_sb_inodes()
fs/fs-writeback.c | 3 ++-
fs/fuse/file.c | 4 +++-
include/linux/pagemap.h | 11 +++++++++++
3 files changed, 16 insertions(+), 2 deletions(-)
--
2.47.3
Hi ,
Just touching base to see if you're available to discuss this further.
If you have any uncertainties, I'm happy to help.
Regards
Jessica
Marketing Manager
Campaign Data Leads.,
Please reply with REMOVE if you don't wish to receive further emails
-----Original Message-----
From: Jessica Garcia
Subject: CES 2026: Audience Breakdown & Leads
Hi ,
Trust you're in good spirits.
Is the idea of buying the CES 2026 attendees' details for your marketing efforts something you'd like to explore?
Expo Name: Consumer Electronics Show 2026
Total Number of records: 40,000 records
List includes: Company Name, Contact Name, Job Title, Mailing Address, Phone, Emails, etc.
Best chance to connect with participants
Feel free to contact me if you are interested in acquiring this list so that I can share pricing information with you
Hoping for your prompt feedback.
Regards
Jessica
Marketing Manager
Campaign Data Leads.,
Please reply with REMOVE if you don't wish to receive further emails
crypto_alloc_acomp_node() may return ERR_PTR(), but the fail path checks
only for NULL and can pass an error pointer to crypto_free_acomp().
Use IS_ERR_OR_NULL() to only free valid acomp instances.
Fixes: 779b9955f643 ("mm: zswap: move allocations during CPU init outside the lock")
Cc: stable(a)vger.kernel.org
Signed-off-by: Pavel Butsykin <pbutsykin(a)cloudlinux.com>
---
mm/zswap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 5d0f8b13a958..ac9b7a60736b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -787,7 +787,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
return 0;
fail:
- if (acomp)
+ if (!IS_ERR_OR_NULL(acomp))
crypto_free_acomp(acomp);
kfree(buffer);
return ret;
--
2.52.0
Add two flags for KVM_CAP_X2APIC_API to allow userspace to control support
for Suppress EOI Broadcasts, which KVM completely mishandles. When x2APIC
support was first added, KVM incorrectly advertised and "enabled" Suppress
EOI Broadcast, without fully supporting the I/O APIC side of the equation,
i.e. without adding directed EOI to KVM's in-kernel I/O APIC.
That flaw was carried over to split IRQCHIP support, i.e. KVM advertised
support for Suppress EOI Broadcasts irrespective of whether or not the
userspace I/O APIC implementation supported directed EOIs. Even worse,
KVM didn't actually suppress EOI broadcasts, i.e. userspace VMMs without
support for directed EOI came to rely on the "spurious" broadcasts.
KVM "fixed" the in-kernel I/O APIC implementation by completely disabling
support for Suppress EOI Broadcasts in commit 0bcc3fb95b97 ("KVM: lapic:
stop advertising DIRECTED_EOI when in-kernel IOAPIC is in use"), but
didn't do anything to remedy userspace I/O APIC implementations.
KVM's bogus handling of Suppress EOI Broadcast is problematic when the
guest relies on interrupts being masked in the I/O APIC until well after
the initial local APIC EOI. E.g. Windows with Credential Guard enabled
handles interrupts in the following order:
1. Interrupt for L2 arrives.
2. L1 APIC EOIs the interrupt.
3. L1 resumes L2 and injects the interrupt.
4. L2 EOIs after servicing.
5. L1 performs the I/O APIC EOI.
Because KVM EOIs the I/O APIC at step #2, the guest can get an interrupt
storm, e.g. if the IRQ line is still asserted and userspace reacts to the
EOI by re-injecting the IRQ, because the guest doesn't de-assert the line
until step #4, and doesn't expect the interrupt to be re-enabled until
step #5.
Unfortunately, simply "fixing" the bug isn't an option, as KVM has no way
of knowing if the userspace I/O APIC supports directed EOIs, i.e.
suppressing EOI broadcasts would result in interrupts being stuck masked
in the userspace I/O APIC due to step #5 being ignored by userspace. And
fully disabling support for Suppress EOI Broadcast is also undesirable, as
picking up the fix would require a guest reboot, *and* more importantly
would change the virtual CPU model exposed to the guest without any buy-in
from userspace.
Add KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST and
KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST flags to allow userspace to
explicitly enable or disable support for Suppress EOI Broadcasts. This
gives userspace control over the virtual CPU model exposed to the guest,
as KVM should never have enabled support for Suppress EOI Broadcast without
userspace opt-in. Not setting either flag will result in legacy quirky
behavior for backward compatibility.
When KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST is set and using in-kernel
IRQCHIP mode, KVM will use I/O APIC version 0x20, which includes support
for the EOI Register.
Note, Suppress EOI Broadcasts is defined only in Intel's SDM, not in AMD's
APM. But the bit is writable on some AMD CPUs, e.g. Turin, and KVM's ABI
is to support Directed EOI (KVM's name) irrespective of guest CPU vendor.
Fixes: 7543a635aa09 ("KVM: x86: Add KVM exit for IOAPIC EOIs")
Closes: https://lore.kernel.org/kvm/7D497EF1-607D-4D37-98E7-DAF95F099342@nutanix.com
Cc: stable(a)vger.kernel.org
Suggested-by: David Woodhouse <dwmw2(a)infradead.org>
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Khushit Shah <khushit.shah(a)nutanix.com>
---
Documentation/virt/kvm/api.rst | 28 +++++++++++++--
arch/x86/include/asm/kvm_host.h | 7 ++++
arch/x86/include/uapi/asm/kvm.h | 6 ++--
arch/x86/kvm/lapic.c | 64 ++++++++++++++++++++++-----------
arch/x86/kvm/x86.c | 15 ++++++--
5 files changed, 93 insertions(+), 27 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 57061fa29e6a..ad15ca519afc 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7800,8 +7800,10 @@ Will return -EBUSY if a VCPU has already been created.
Valid feature flags in args[0] are::
- #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
- #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+ #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST (1ULL << 2)
+ #define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST (1ULL << 3)
Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
@@ -7814,6 +7816,28 @@ as a broadcast even in x2APIC mode in order to support physical x2APIC
without interrupt remapping. This is undesirable in logical mode,
where 0xff represents CPUs 0-7 in cluster 0.
+Setting KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST instructs KVM to enable
+Suppress EOI Broadcasts. KVM will advertise support for Suppress EOI
+Broadcast to the guest and suppress LAPIC EOI broadcasts when the guest
+sets the Suppress EOI Broadcast bit in the SPIV register. When using
+in-kernel IRQCHIP mode, enabling this capability will cause KVM to use
+I/O APIC version 0x20, which includes support for the EOI Register for
+directed EOI.
+
+Setting KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST disables support for
+Suppress EOI Broadcasts entirely, i.e. instructs KVM to NOT advertise
+support to the guest.
+
+Modern VMMs should either enable KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST
+or KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST. If not, legacy quirky
+behavior will be used by KVM: in split IRQCHIP mode, KVM will advertise
+support for Suppress EOI Broadcasts but not actually suppress EOI
+broadcasts; for in-kernel IRQCHIP mode, KVM will not advertise support for
+Suppress EOI Broadcasts.
+
+Setting both KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST and
+KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST will fail with an EINVAL error.
+
7.8 KVM_CAP_S390_USER_INSTR0
----------------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48598d017d6f..4a6d94dc7a2a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1229,6 +1229,12 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+enum kvm_suppress_eoi_broadcast_mode {
+ KVM_SUPPRESS_EOI_BROADCAST_QUIRKED, /* Legacy behavior */
+ KVM_SUPPRESS_EOI_BROADCAST_ENABLED, /* Enable Suppress EOI broadcast */
+ KVM_SUPPRESS_EOI_BROADCAST_DISABLED /* Disable Suppress EOI broadcast */
+};
+
struct kvm_x86_msr_filter {
u8 count;
bool default_allow:1;
@@ -1480,6 +1486,7 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+ enum kvm_suppress_eoi_broadcast_mode suppress_eoi_broadcast_mode;
bool has_mapped_host_mmio;
bool guest_can_read_msr_platform_info;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d420c9c066d4..d30241429fa8 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -913,8 +913,10 @@ struct kvm_sev_snp_launch_finish {
__u64 pad1[4];
};
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+#define KVM_X2APIC_API_USE_32BIT_IDS (_BITULL(0))
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (_BITULL(1))
+#define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST (_BITULL(2))
+#define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST (_BITULL(3))
struct kvm_hyperv_eventfd {
__u32 conn_id;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2c24fd8d815f..36a5af218802 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -107,21 +107,31 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
bool kvm_lapic_advertise_suppress_eoi_broadcast(struct kvm *kvm)
{
- /*
- * The default in-kernel I/O APIC emulates the 82093AA and does not
- * implement an EOI register. Some guests (e.g. Windows with the
- * Hyper-V role enabled) disable LAPIC EOI broadcast without checking
- * the I/O APIC version, which can cause level-triggered interrupts to
- * never be EOI'd.
- *
- * To avoid this, KVM must not advertise Suppress EOI Broadcast support
- * when using the default in-kernel I/O APIC.
- *
- * Historically, in split IRQCHIP mode, KVM always advertised Suppress
- * EOI Broadcast support but did not actually suppress EOIs, resulting
- * in quirky behavior.
- */
- return !ioapic_in_kernel(kvm);
+ switch (kvm->arch.suppress_eoi_broadcast_mode) {
+ case KVM_SUPPRESS_EOI_BROADCAST_ENABLED:
+ return true;
+ case KVM_SUPPRESS_EOI_BROADCAST_DISABLED:
+ return false;
+ case KVM_SUPPRESS_EOI_BROADCAST_QUIRKED:
+ /*
+ * The default in-kernel I/O APIC emulates the 82093AA and does not
+ * implement an EOI register. Some guests (e.g. Windows with the
+ * Hyper-V role enabled) disable LAPIC EOI broadcast without
+ * checking the I/O APIC version, which can cause level-triggered
+ * interrupts to never be EOI'd.
+ *
+ * To avoid this, KVM must not advertise Suppress EOI Broadcast
+ * support when using the default in-kernel I/O APIC.
+ *
+ * Historically, in split IRQCHIP mode, KVM always advertised
+ * Suppress EOI Broadcast support but did not actually suppress
+ * EOIs, resulting in quirky behavior.
+ */
+ return !ioapic_in_kernel(kvm);
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
}
bool kvm_lapic_respect_suppress_eoi_broadcast(struct kvm *kvm)
@@ -129,13 +139,25 @@ bool kvm_lapic_respect_suppress_eoi_broadcast(struct kvm *kvm)
/*
* Returns true if KVM should honor the guest's request to suppress EOI
* broadcasts, i.e. actually implement Suppress EOI Broadcast.
- *
- * Historically, in split IRQCHIP mode, KVM ignored the suppress EOI
- * broadcast bit set by the guest and broadcasts EOIs to the userspace
- * I/O APIC. For In-kernel I/O APIC, the support itself is not
- * advertised, but if bit was set by the guest, it was respected.
*/
- return ioapic_in_kernel(kvm);
+ switch (kvm->arch.suppress_eoi_broadcast_mode) {
+ case KVM_SUPPRESS_EOI_BROADCAST_ENABLED:
+ return true;
+ case KVM_SUPPRESS_EOI_BROADCAST_DISABLED:
+ return false;
+ case KVM_SUPPRESS_EOI_BROADCAST_QUIRKED:
+ /*
+ * Historically, in split IRQCHIP mode, KVM ignored the suppress
+ * EOI broadcast bit set by the guest and broadcasts EOIs to the
+ * userspace I/O APIC. For In-kernel I/O APIC, the support itself
+ * is not advertised, but if bit was set by the guest, it was
+ * respected.
+ */
+ return ioapic_in_kernel(kvm);
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
}
__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9c2aa6f4705..5d56b0384dcc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -121,8 +121,10 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE
-#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
- KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
+ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK | \
+ KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST | \
+ KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST)
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
@@ -6778,11 +6780,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
break;
+ if ((cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) &&
+ (cap->args[0] & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST))
+ break;
+
if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
kvm->arch.x2apic_format = true;
if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
kvm->arch.x2apic_broadcast_quirk_disabled = true;
+ if (cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST)
+ kvm->arch.suppress_eoi_broadcast_mode = KVM_SUPPRESS_EOI_BROADCAST_ENABLED;
+ if (cap->args[0] & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST)
+ kvm->arch.suppress_eoi_broadcast_mode = KVM_SUPPRESS_EOI_BROADCAST_DISABLED;
+
r = 0;
break;
case KVM_CAP_X86_DISABLE_EXITS:
--
2.39.3
Hello stable maintainers,
Several Debian users reported a regression after updating to kernel
version 5.10.247.
Commit f0982400648a ("fbdev: Add bounds checking in bit_putcs to fix
vmalloc-out-of-bounds"), a backport of upstream commit 3637d34b35b2,
depends on vc_data::vc_font.charcount being initialised correctly.
However, before commit a1ac250a82a5 ("fbcon: Avoid using FNTCHARCNT()
and hard-coded built-in font charcount") in 5.11, this member was set
to 256 for VTs initially created with a built-in font and 0 for VTs
initially created with a user font.
Since Debian normally sets a user font before creating VTs 2 and up,
those additional VTs became unusable. VT 1 also doesn't work correctly
if the user font has > 256 characters, and the bounds check is
ineffective if it has < 256 characters.
This can be fixed by backporting the following commits from 5.11:
7a089ec7d77f console: Delete unused con_font_copy() callback implementations
259a252c1f4e console: Delete dummy con_font_set() and con_font_default() callback implementations
4ee573086bd8 Fonts: Add charcount field to font_desc
4497364e5f61 parisc/sticore: Avoid hard-coding built-in font charcount
a1ac250a82a5 fbcon: Avoid using FNTCHARCNT() and hard-coded built-in font charcount
These all apply without fuzz and builds cleanly for x86_64 and parisc64.
I tested on x86_64 that:
- VT 2 works again
- bit_putcs_aligned() is setting charcnt = 256
- After loading a font with 512 characters, bit_putcs_aligned() sets
charcnt = 512 and is able to display characters at positions >= 256
Ben.
--
Ben Hutchings
Man invented language to satisfy his deep need to complain.
- Lily Tomlin
With PWRSTS_OFF_ON, PCIe GDSCs are turned off during gdsc_disable(). This
can happen during scenarios such as system suspend and breaks the resume
of PCIe controllers from suspend.
So use PWRSTS_RET_ON to indicate the GDSC driver to not turn off the GDSCs
during gdsc_disable() and allow the hardware to transition the GDSCs to
retention when the parent domain enters low power state during system
suspend.
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru(a)oss.qualcomm.com>
---
Krishna Chaitanya Chundru (7):
clk: qcom: gcc-sc7280: Do not turn off PCIe GDSCs during gdsc_disable()
clk: qcom: gcc-sa8775p: Do not turn off PCIe GDSCs during gdsc_disable()
clk: qcom: gcc-sm8750: Do not turn off PCIe GDSCs during gdsc_disable()
clk: qcom: gcc-glymur: Do not turn off PCIe GDSCs during gdsc_disable()
clk: qcom: gcc-qcs8300: Do not turn off PCIe GDSCs during gdsc_disable()
clk: qcom: gcc-x1e80100: Do not turn off PCIe GDSCs during gdsc_disable()
clk: qcom: gcc-kaanapali: Do not turn off PCIe GDSCs during gdsc_disable()
drivers/clk/qcom/gcc-glymur.c | 16 ++++++++--------
drivers/clk/qcom/gcc-kaanapali.c | 2 +-
drivers/clk/qcom/gcc-qcs8300.c | 4 ++--
drivers/clk/qcom/gcc-sa8775p.c | 4 ++--
drivers/clk/qcom/gcc-sc7280.c | 2 +-
drivers/clk/qcom/gcc-sm8750.c | 2 +-
drivers/clk/qcom/gcc-x1e80100.c | 16 ++++++++--------
7 files changed, 23 insertions(+), 23 deletions(-)
---
base-commit: 98e506ee7d10390b527aeddee7bbeaf667129646
change-id: 20260102-pci_gdsc_fix-1dcf08223922
Best regards,
--
Krishna Chaitanya Chundru <krishna.chundru(a)oss.qualcomm.com>
When bnxt_init_one() fails during initialization (e.g.,
bnxt_init_int_mode returns -ENODEV), the error path calls
bnxt_free_hwrm_resources() which destroys the DMA pool and sets
bp->hwrm_dma_pool to NULL. Subsequently, bnxt_ptp_clear() is called,
which invokes ptp_clock_unregister().
Since commit a60fc3294a37 ("ptp: rework ptp_clock_unregister() to
disable events"), ptp_clock_unregister() now calls
ptp_disable_all_events(), which in turn invokes the driver's .enable()
callback (bnxt_ptp_enable()) to disable PTP events before completing the
unregistration.
bnxt_ptp_enable() attempts to send HWRM commands via bnxt_ptp_cfg_pin()
and bnxt_ptp_cfg_event(), both of which call hwrm_req_init(). This
function tries to allocate from bp->hwrm_dma_pool, causing a NULL
pointer dereference:
bnxt_en 0000:01:00.0 (unnamed net_device) (uninitialized): bnxt_init_int_mode err: ffffffed
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
Call Trace:
__hwrm_req_init (drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c:72)
bnxt_ptp_enable (drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:323 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:517)
ptp_disable_all_events (drivers/ptp/ptp_chardev.c:66)
ptp_clock_unregister (drivers/ptp/ptp_clock.c:518)
bnxt_ptp_clear (drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:1134)
bnxt_init_one (drivers/net/ethernet/broadcom/bnxt/bnxt.c:16889)
Lines are against commit f8f9c1f4d0c7 ("Linux 6.19-rc3")
Fix this by checking if bp->hwrm_dma_pool is NULL at the start of
bnxt_ptp_enable(). During error/cleanup paths when HWRM resources have
been freed, return success without attempting to send commands since the
hardware is being torn down anyway.
During normal operation, the DMA pool is always valid so PTP
functionality is unaffected.
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Fixes: a60fc3294a37 ("ptp: rework ptp_clock_unregister() to disable events")
Cc: stable(a)vger.kernel.org
---
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index a8a74f07bb54..a749bbfa398e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -482,6 +482,13 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info,
int pin_id;
int rc;
+ /* Return success if HWRM resources are not available.
+ * This can happen during error/cleanup paths when DMA pool has been
+ * freed.
+ */
+ if (!bp->hwrm_dma_pool)
+ return 0;
+
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
/* Configure an External PPS IN */
---
base-commit: 80380f6ce46f38a0d1200caade2f03de4b6c1d27
change-id: 20251231-bnxt-c54d317d8bfe
Best regards,
--
Breno Leitao <leitao(a)debian.org>
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 79f3f9bedd149ea438aaeb0fb6a083637affe205
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122903-sterile-from-4520@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 79f3f9bedd149ea438aaeb0fb6a083637affe205 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Wed, 2 Apr 2025 20:07:34 +0200
Subject: [PATCH] sched/eevdf: Fix min_vruntime vs avg_vruntime
Basically, from the constraint that the sum of lag is zero, you can
infer that the 0-lag point is the weighted average of the individual
vruntime, which is what we're trying to compute:
\Sum w_i * v_i
avg = --------------
\Sum w_i
Now, since vruntime takes the whole u64 (worse, it wraps), this
multiplication term in the numerator is not something we can compute;
instead we do the min_vruntime (v0 henceforth) thing like:
v_i = (v_i - v0) + v0
This does two things:
- it keeps the key: (v_i - v0) 'small';
- it creates a relative 0-point in the modular space.
If you do that subtitution and work it all out, you end up with:
\Sum w_i * (v_i - v0)
avg = --------------------- + v0
\Sum w_i
Since you cannot very well track a ratio like that (and not suffer
terrible numerical problems) we simpy track the numerator and
denominator individually and only perform the division when strictly
needed.
Notably, the numerator lives in cfs_rq->avg_vruntime and the denominator
lives in cfs_rq->avg_load.
The one extra 'funny' is that these numbers track the entities in the
tree, and current is typically outside of the tree, so avg_vruntime()
adds current when needed before doing the division.
(vruntime_eligible() elides the division by cross-wise multiplication)
Anyway, as mentioned above, we currently use the CFS era min_vruntime
for this purpose. However, this thing can only move forward, while the
above avg can in fact move backward (when a non-eligible task leaves,
the average becomes smaller), this can cause trouble when through
happenstance (or construction) these values drift far enough apart to
wreck the game.
Replace cfs_rq::min_vruntime with cfs_rq::zero_vruntime which is kept
near/at avg_vruntime, following its motion.
The down-side is that this requires computing the avg more often.
Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Reported-by: Zicheng Qu <quzicheng(a)huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Link: https://patch.msgid.link/20251106111741.GC4068168@noisy.programming.kicks-a…
Cc: stable(a)vger.kernel.org
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 02e16b70a790..41caa22e0680 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -796,7 +796,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
- s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, left_deadline = -1, spread;
+ s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread;
struct sched_entity *last, *first, *root;
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
@@ -819,15 +819,15 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
right_vruntime = last->vruntime;
- min_vruntime = cfs_rq->min_vruntime;
+ zero_vruntime = cfs_rq->zero_vruntime;
raw_spin_rq_unlock_irqrestore(rq, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_deadline",
SPLIT_NS(left_deadline));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_vruntime",
SPLIT_NS(left_vruntime));
- SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
- SPLIT_NS(min_vruntime));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime",
+ SPLIT_NS(zero_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
SPLIT_NS(avg_vruntime(cfs_rq)));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a11a832d63e..8d971d48669f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -554,7 +554,7 @@ static inline bool entity_before(const struct sched_entity *a,
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- return (s64)(se->vruntime - cfs_rq->min_vruntime);
+ return (s64)(se->vruntime - cfs_rq->zero_vruntime);
}
#define __node_2_se(node) \
@@ -606,13 +606,13 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* Which we track using:
*
- * v0 := cfs_rq->min_vruntime
+ * v0 := cfs_rq->zero_vruntime
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
* \Sum w_i := cfs_rq->avg_load
*
- * Since min_vruntime is a monotonic increasing variable that closely tracks
- * the per-task service, these deltas: (v_i - v), will be in the order of the
- * maximal (virtual) lag induced in the system due to quantisation.
+ * Since zero_vruntime closely tracks the per-task service, these
+ * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
+ * induced in the system due to quantisation.
*
* Also, we use scale_load_down() to reduce the size.
*
@@ -671,7 +671,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
avg = div_s64(avg, load);
}
- return cfs_rq->min_vruntime + avg;
+ return cfs_rq->zero_vruntime + avg;
}
/*
@@ -732,7 +732,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
load += weight;
}
- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+ return avg >= (s64)(vruntime - cfs_rq->zero_vruntime) * load;
}
int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -740,42 +740,14 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
return vruntime_eligible(cfs_rq, se->vruntime);
}
-static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
+static void update_zero_vruntime(struct cfs_rq *cfs_rq)
{
- u64 min_vruntime = cfs_rq->min_vruntime;
- /*
- * open coded max_vruntime() to allow updating avg_vruntime
- */
- s64 delta = (s64)(vruntime - min_vruntime);
- if (delta > 0) {
- avg_vruntime_update(cfs_rq, delta);
- min_vruntime = vruntime;
- }
- return min_vruntime;
-}
+ u64 vruntime = avg_vruntime(cfs_rq);
+ s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
-static void update_min_vruntime(struct cfs_rq *cfs_rq)
-{
- struct sched_entity *se = __pick_root_entity(cfs_rq);
- struct sched_entity *curr = cfs_rq->curr;
- u64 vruntime = cfs_rq->min_vruntime;
+ avg_vruntime_update(cfs_rq, delta);
- if (curr) {
- if (curr->on_rq)
- vruntime = curr->vruntime;
- else
- curr = NULL;
- }
-
- if (se) {
- if (!curr)
- vruntime = se->min_vruntime;
- else
- vruntime = min_vruntime(vruntime, se->min_vruntime);
- }
-
- /* ensure we never gain time by being placed backwards. */
- cfs_rq->min_vruntime = __update_min_vruntime(cfs_rq, vruntime);
+ cfs_rq->zero_vruntime = vruntime;
}
static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
@@ -848,6 +820,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
avg_vruntime_add(cfs_rq, se);
+ update_zero_vruntime(cfs_rq);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -859,6 +832,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
avg_vruntime_sub(cfs_rq, se);
+ update_zero_vruntime(cfs_rq);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1226,7 +1200,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->vruntime += calc_delta_fair(delta_exec, curr);
resched = update_deadline(cfs_rq, curr);
- update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
/*
@@ -3808,15 +3781,6 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (!curr)
__enqueue_entity(cfs_rq, se);
cfs_rq->nr_queued++;
-
- /*
- * The entity's vruntime has been adjusted, so let's check
- * whether the rq-wide min_vruntime needs updated too. Since
- * the calculations above require stable min_vruntime rather
- * than up-to-date one, we do the update at the end of the
- * reweight process.
- */
- update_min_vruntime(cfs_rq);
}
}
@@ -5429,15 +5393,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_cfs_group(se);
- /*
- * Now advance min_vruntime if @se was the entity holding it back,
- * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
- * put back on, and if we advance min_vruntime, we'll be placed back
- * further than we started -- i.e. we'll be penalized.
- */
- if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
- update_min_vruntime(cfs_rq);
-
if (flags & DEQUEUE_DELAYED)
finish_delayed_dequeue_entity(se);
@@ -9015,7 +8970,6 @@ static void yield_task_fair(struct rq *rq)
if (entity_eligible(cfs_rq, se)) {
se->vruntime = se->deadline;
se->deadline += calc_delta_fair(se->slice, se);
- update_min_vruntime(cfs_rq);
}
}
@@ -13078,23 +13032,6 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* Which shows that S and s_i transform alike (which makes perfect sense
* given that S is basically the (weighted) average of s_i).
*
- * Then:
- *
- * x -> s_min := min{s_i} (8)
- *
- * to obtain:
- *
- * \Sum_i w_i (s_i - s_min)
- * S = s_min + ------------------------ (9)
- * \Sum_i w_i
- *
- * Which already looks familiar, and is the basis for our current
- * approximation:
- *
- * S ~= s_min (10)
- *
- * Now, obviously, (10) is absolute crap :-), but it sorta works.
- *
* So the thing to remember is that the above is strictly UP. It is
* possible to generalize to multiple runqueues -- however it gets really
* yuck when you have to add affinity support, as illustrated by our very
@@ -13116,23 +13053,23 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* Let, for our runqueue 'k':
*
* T_k = \Sum_i w_i s_i
- * W_k = \Sum_i w_i ; for all i of k (11)
+ * W_k = \Sum_i w_i ; for all i of k (8)
*
* Then we can write (6) like:
*
* T_k
- * S_k = --- (12)
+ * S_k = --- (9)
* W_k
*
* From which immediately follows that:
*
* T_k + T_l
- * S_k+l = --------- (13)
+ * S_k+l = --------- (10)
* W_k + W_l
*
* On which we can define a combined lag:
*
- * lag_k+l(i) := S_k+l - s_i (14)
+ * lag_k+l(i) := S_k+l - s_i (11)
*
* And that gives us the tools to compare tasks across a combined runqueue.
*
@@ -13143,7 +13080,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* using (7); this only requires storing single 'time'-stamps.
*
* b) when comparing tasks between 2 runqueues of which one is forced-idle,
- * compare the combined lag, per (14).
+ * compare the combined lag, per (11).
*
* Now, of course cgroups (I so hate them) make this more interesting in
* that a) seems to suggest we need to iterate all cgroup on a CPU at such
@@ -13191,12 +13128,11 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* every tick. This limits the observed divergence due to the work
* conservancy.
*
- * On top of that, we can improve upon things by moving away from our
- * horrible (10) hack and moving to (9) and employing (13) here.
+ * On top of that, we can improve upon things by employing (10) here.
*/
/*
- * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+ * se_fi_update - Update the cfs_rq->zero_vruntime_fi in a CFS hierarchy if needed.
*/
static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
bool forceidle)
@@ -13210,7 +13146,7 @@ static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
cfs_rq->forceidle_seq = fi_seq;
}
- cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
+ cfs_rq->zero_vruntime_fi = cfs_rq->zero_vruntime;
}
}
@@ -13263,11 +13199,11 @@ bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
/*
* Find delta after normalizing se's vruntime with its cfs_rq's
- * min_vruntime_fi, which would have been updated in prior calls
+ * zero_vruntime_fi, which would have been updated in prior calls
* to se_fi_update().
*/
delta = (s64)(sea->vruntime - seb->vruntime) +
- (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+ (s64)(cfs_rqb->zero_vruntime_fi - cfs_rqa->zero_vruntime_fi);
return delta > 0;
}
@@ -13513,7 +13449,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
- cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+ cfs_rq->zero_vruntime = (u64)(-(1LL << 20));
raw_spin_lock_init(&cfs_rq->removed.lock);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 82e74e8ca2ea..5a3cf81c27be 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -681,10 +681,10 @@ struct cfs_rq {
s64 avg_vruntime;
u64 avg_load;
- u64 min_vruntime;
+ u64 zero_vruntime;
#ifdef CONFIG_SCHED_CORE
unsigned int forceidle_seq;
- u64 min_vruntime_fi;
+ u64 zero_vruntime_fi;
#endif
struct rb_root_cached tasks_timeline;
Commit 9beeee6584b9aa4f ("USB: EHCI: log a warning if ehci-hcd is not
loaded first") said that ehci-hcd should be loaded before ohci-hcd and
uhci-hcd. However, commit 05c92da0c52494ca ("usb: ohci/uhci - add soft
dependencies on ehci_pci") only makes ohci-pci/uhci-pci depend on ehci-
pci, which is not enough and we may still see the warnings in boot log.
So fix it by also making ohci-hcd/uhci-hcd depend on ehci-hcd.
Cc: stable(a)vger.kernel.org
Reported-by: Shengwen Xiao <atzlinux(a)sina.com>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
drivers/usb/host/ohci-hcd.c | 1 +
drivers/usb/host/uhci-hcd.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 9c7f3008646e..549c965b7fbe 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -1355,4 +1355,5 @@ static void __exit ohci_hcd_mod_exit(void)
clear_bit(USB_OHCI_LOADED, &usb_hcds_loaded);
}
module_exit(ohci_hcd_mod_exit);
+MODULE_SOFTDEP("pre: ehci_hcd");
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index 14e6dfef16c6..e1a27d01bdbc 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -939,3 +939,4 @@ module_exit(uhci_hcd_cleanup);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: ehci_hcd");
--
2.47.3
Currently pivot_root() doesnt't work on the real rootfs because it
cannot be unmounted. Userspace has to do a recursive removal of the
initramfs contents manually before continuing the boot.
Really all we want from the real rootfs is to serve as the parent mount
for anything that is actually useful such as the tmpfs or ramfs for
initramfs unpacking or the rootfs itself. There's no need for the real
rootfs to actually be anything meaningful or useful. Add a immutable
rootfs that can be selected via the "immutable_rootfs" kernel command
line option.
The kernel will mount a tmpfs/ramfs on top of it, unpack the initramfs
and fire up userspace which mounts the rootfs and can then just do:
chdir(rootfs);
pivot_root(".", ".");
umount2(".", MNT_DETACH);
and be done with it. (Ofc, userspace can also choose to retain the
initramfs contents by using something like pivot_root(".", "/initramfs")
without unmounting it.)
Technically this also means that the rootfs mount in unprivileged
namespaces doesn't need to become MNT_LOCKED anymore as it's guaranteed
that the immutable rootfs remains permanently empty so there cannot be
anything revealed by unmounting the covering mount.
In the future this will also allow us to create completely empty mount
namespaces without risking to leak anything.
systemd already handles this all correctly as it tries to pivot_root()
first and falls back to MS_MOVE only when that fails.
This goes back to various discussion in previous years and a LPC 2024
presentation about this very topic.
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
---
Christian Brauner (3):
fs: ensure that internal tmpfs mount gets mount id zero
fs: add init_pivot_root()
fs: add immutable rootfs
fs/Makefile | 2 +-
fs/init.c | 17 ++++
fs/internal.h | 1 +
fs/mount.h | 1 +
fs/namespace.c | 181 +++++++++++++++++++++++++++++-------------
fs/rootfs.c | 65 +++++++++++++++
include/linux/init_syscalls.h | 1 +
include/uapi/linux/magic.h | 1 +
init/do_mounts.c | 13 ++-
init/do_mounts.h | 1 +
10 files changed, 223 insertions(+), 60 deletions(-)
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20260102-work-immutable-rootfs-b5f23e0f5a27
kstack_offset was previously maintained per-cpu, but this caused a
couple of issues. So let's instead make it per-task.
Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
expected and required to be called with interrupts and preemption
disabled so that it could manipulate per-cpu state. But arm64, loongarch
and risc-v are calling them with interrupts and preemption enabled. I
don't _think_ this causes any functional issues, but it's certainly
unexpected and could lead to manipulating the wrong cpu's state, which
could cause a minor performance degradation due to bouncing the cache
lines. By maintaining the state per-task those functions can safely be
called in preemptible context.
Issue 2: add_random_kstack_offset() is called before executing the
syscall and expands the stack using a previously chosen rnadom offset.
choose_random_kstack_offset() is called after executing the syscall and
chooses and stores a new random offset for the next syscall. With
per-cpu storage for this offset, an attacker could force cpu migration
during the execution of the syscall and prevent the offset from being
updated for the original cpu such that it is predictable for the next
syscall on that cpu. By maintaining the state per-task, this problem
goes away because the per-task random offset is updated after the
syscall regardless of which cpu it is executing on.
Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
Cc: stable(a)vger.kernel.org
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
include/linux/randomize_kstack.h | 26 +++++++++++++++-----------
include/linux/sched.h | 4 ++++
init/main.c | 1 -
kernel/fork.c | 2 ++
4 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 1d982dbdd0d0..5d3916ca747c 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -9,7 +9,6 @@
DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
randomize_kstack_offset);
-DECLARE_PER_CPU(u32, kstack_offset);
/*
* Do not use this anywhere else in the kernel. This is used here because
@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
* add_random_kstack_offset - Increase stack utilization by previously
* chosen random offset
*
- * This should be used in the syscall entry path when interrupts and
- * preempt are disabled, and after user registers have been stored to
- * the stack. For testing the resulting entropy, please see:
- * tools/testing/selftests/lkdtm/stack-entropy.sh
+ * This should be used in the syscall entry path after user registers have been
+ * stored to the stack. Preemption may be enabled. For testing the resulting
+ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
*/
#define add_random_kstack_offset() do { \
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
- u32 offset = raw_cpu_read(kstack_offset); \
+ u32 offset = current->kstack_offset; \
u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
/* Keep allocation even after "ptr" loses scope. */ \
asm volatile("" :: "r"(ptr) : "memory"); \
@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
* choose_random_kstack_offset - Choose the random offset for the next
* add_random_kstack_offset()
*
- * This should only be used during syscall exit when interrupts and
- * preempt are disabled. This position in the syscall flow is done to
- * frustrate attacks from userspace attempting to learn the next offset:
+ * This should only be used during syscall exit. Preemption may be enabled. This
+ * position in the syscall flow is done to frustrate attacks from userspace
+ * attempting to learn the next offset:
* - Maximize the timing uncertainty visible from userspace: if the
* offset is chosen at syscall entry, userspace has much more control
* over the timing between choosing offsets. "How long will we be in
@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
#define choose_random_kstack_offset(rand) do { \
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
- u32 offset = raw_cpu_read(kstack_offset); \
+ u32 offset = current->kstack_offset; \
offset = ror32(offset, 5) ^ (rand); \
- raw_cpu_write(kstack_offset, offset); \
+ current->kstack_offset = offset; \
} \
} while (0)
+
+static inline void random_kstack_task_init(struct task_struct *tsk)
+{
+ tsk->kstack_offset = 0;
+}
#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
#define add_random_kstack_offset() do { } while (0)
#define choose_random_kstack_offset(rand) do { } while (0)
+#define random_kstack_task_init(tsk) do { } while (0)
#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d395f2810fac..9e0080ed1484 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1591,6 +1591,10 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+ u32 kstack_offset;
+#endif
+
#ifdef CONFIG_X86_MCE
void __user *mce_vaddr;
__u64 mce_kflags;
diff --git a/init/main.c b/init/main.c
index b84818ad9685..27fcbbde933e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -830,7 +830,6 @@ static inline void initcall_debug_enable(void)
#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
randomize_kstack_offset);
-DEFINE_PER_CPU(u32, kstack_offset);
static int __init early_randomize_kstack_offset(char *buf)
{
diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..b061e1edbc43 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -95,6 +95,7 @@
#include <linux/thread_info.h>
#include <linux/kstack_erase.h>
#include <linux/kasan.h>
+#include <linux/randomize_kstack.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
@@ -2231,6 +2232,7 @@ __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_io;
+ random_kstack_task_init(p);
stackleak_task_init(p);
if (pid != &init_struct_pid) {
--
2.43.0
Commit 7f9ab862e05c ("leds: spi-byte: Call of_node_put() on error path")
was merged in 6.11 and then backported to stable trees through 5.10. It
relocates the line that initializes the variable 'child' to a later
point in spi_byte_probe().
Versions < 6.9 do not have commit ccc35ff2fd29 ("leds: spi-byte: Use
devm_led_classdev_register_ext()"), which removes a line that reads a
property from 'child' before its new initialization point. Consequently,
spi_byte_probe() reads from an uninitialized device node in stable
kernels 6.6-5.10.
Initialize 'child' before it is first accessed.
Fixes: 7f9ab862e05c ("leds: spi-byte: Call of_node_put() on error path")
Signed-off-by: Tiffany Yang <ynaffit(a)google.com>
---
As an alternative to moving the initialization of 'child' up,
Fedor Pchelkin proposed [1] backporting the change that removes the
intermediate access.
[1] https://lore.kernel.org/stable/20241029204128.527033-1-pchelkin@ispras.ru/
---
drivers/leds/leds-spi-byte.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c
index afe9bff7c7c1..4520df1e2341 100644
--- a/drivers/leds/leds-spi-byte.c
+++ b/drivers/leds/leds-spi-byte.c
@@ -96,6 +96,7 @@ static int spi_byte_probe(struct spi_device *spi)
if (!led)
return -ENOMEM;
+ child = of_get_next_available_child(dev_of_node(dev), NULL);
of_property_read_string(child, "label", &name);
strscpy(led->name, name, sizeof(led->name));
led->spi = spi;
@@ -106,7 +107,6 @@ static int spi_byte_probe(struct spi_device *spi)
led->ldev.max_brightness = led->cdef->max_value - led->cdef->off_value;
led->ldev.brightness_set_blocking = spi_byte_brightness_set_blocking;
- child = of_get_next_available_child(dev_of_node(dev), NULL);
state = of_get_property(child, "default-state", NULL);
if (state) {
if (!strcmp(state, "on")) {
--
2.52.0.351.gbe84eed79e-goog
The soundwire update_status() callback may be called multiple times with
the same ATTACHED status but initialisation should only be done when
transitioning from UNATTACHED to ATTACHED.
Fix the inverted hw_init flag which was set to false instead of true
after initialisation which defeats its purpose and may result in
repeated unnecessary initialisation.
Similarly, the initial state of the flag was also inverted so that the
codec would only be initialised and brought out of regmap cache only
mode if its status first transitions to UNATTACHED.
Fixes: aa21a7d4f68a ("ASoC: codecs: wsa884x: Add WSA884x family of speakers")
Cc: stable(a)vger.kernel.org # 6.5
Cc: Krzysztof Kozlowski <krzk(a)kernel.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
sound/soc/codecs/wsa884x.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c
index 887edd2be705..6c6b497657d0 100644
--- a/sound/soc/codecs/wsa884x.c
+++ b/sound/soc/codecs/wsa884x.c
@@ -1534,7 +1534,7 @@ static void wsa884x_init(struct wsa884x_priv *wsa884x)
wsa884x_set_gain_parameters(wsa884x);
- wsa884x->hw_init = false;
+ wsa884x->hw_init = true;
}
static int wsa884x_update_status(struct sdw_slave *slave,
@@ -2109,7 +2109,6 @@ static int wsa884x_probe(struct sdw_slave *pdev,
/* Start in cache-only until device is enumerated */
regcache_cache_only(wsa884x->regmap, true);
- wsa884x->hw_init = true;
if (IS_REACHABLE(CONFIG_HWMON)) {
struct device *hwmon;
--
2.51.2
The patch below does not apply to the 6.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.18.y
git checkout FETCH_HEAD
git cherry-pick -x 8a0e4bdddd1c998b894d879a1d22f1e745606215
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122925-victory-numeral-2346@gregkh' --subject-prefix 'PATCH 6.18.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a0e4bdddd1c998b894d879a1d22f1e745606215 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang(a)gmail.com>
Date: Thu, 6 Nov 2025 03:41:55 +0000
Subject: [PATCH] mm/huge_memory: merge uniform_split_supported() and
non_uniform_split_supported()
uniform_split_supported() and non_uniform_split_supported() share
significantly similar logic.
The only functional difference is that uniform_split_supported() includes
an additional check on the requested @new_order.
The reason for this check comes from the following two aspects:
* some file system or swap cache just supports order-0 folio
* the behavioral difference between uniform/non-uniform split
The behavioral difference between uniform split and non-uniform:
* uniform split splits folio directly to @new_order
* non-uniform split creates after-split folios with orders from
folio_order(folio) - 1 to new_order.
This means for non-uniform split or !new_order split we should check the
file system and swap cache respectively.
This commit unifies the logic and merge the two functions into a single
combined helper, removing redundant code and simplifying the split
support checking mechanism.
Link: https://lkml.kernel.org/r/20251106034155.21398-3-richard.weiyang@gmail.com
Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: "David Hildenbrand (Red Hat)" <david(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Nico Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b74708dc5b5f..19d4a5f52ca2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -374,10 +374,8 @@ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list
unsigned int new_order, bool unmapped);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns);
int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
struct list_head *list);
@@ -408,7 +406,7 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o
static inline int try_folio_split_to_order(struct folio *folio,
struct page *page, unsigned int new_order)
{
- if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
+ if (!folio_split_supported(folio, new_order, SPLIT_TYPE_NON_UNIFORM, /* warns= */ false))
return split_huge_page_to_order(&folio->page, new_order);
return folio_split(folio, new_order, page, NULL);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4118f330c55e..d79a4bb363de 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3593,8 +3593,8 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
return 0;
}
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns)
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns)
{
if (folio_test_anon(folio)) {
/* order-1 is not supported for anonymous THP. */
@@ -3602,48 +3602,41 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
"Cannot split to order-1 folio");
if (new_order == 1)
return false;
- } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
- !mapping_large_folio_support(folio->mapping)) {
- /*
- * No split if the file system does not support large folio.
- * Note that we might still have THPs in such mappings due to
- * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
- * does not actually support large folios properly.
- */
- VM_WARN_ONCE(warns,
- "Cannot split file folio to non-0 order");
- return false;
- }
-
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio)) {
- VM_WARN_ONCE(warns,
- "Cannot split swapcache folio to non-0 order");
- return false;
- }
-
- return true;
-}
-
-/* See comments in non_uniform_split_supported() */
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns)
-{
- if (folio_test_anon(folio)) {
- VM_WARN_ONCE(warns && new_order == 1,
- "Cannot split to order-1 folio");
- if (new_order == 1)
- return false;
- } else if (new_order) {
+ } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) {
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * We can always split a folio down to a single page
+ * (new_order == 0) uniformly.
+ *
+ * For any other scenario
+ * a) uniform split targeting a large folio
+ * (new_order > 0)
+ * b) any non-uniform split
+ * we must confirm that the file system supports large
+ * folios.
+ *
+ * Note that we might still have THPs in such
+ * mappings, which is created from khugepaged when
+ * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that
+ * case, the mapping does not actually support large
+ * folios properly.
+ */
VM_WARN_ONCE(warns,
"Cannot split file folio to non-0 order");
return false;
}
}
- if (new_order && folio_test_swapcache(folio)) {
+ /*
+ * swapcache folio could only be split to order 0
+ *
+ * non-uniform split creates after-split folios with orders from
+ * folio_order(folio) - 1 to new_order, making it not suitable for any
+ * swapcache folio split. Only uniform split to order-0 can be used
+ * here.
+ */
+ if ((split_type == SPLIT_TYPE_NON_UNIFORM || new_order) && folio_test_swapcache(folio)) {
VM_WARN_ONCE(warns,
"Cannot split swapcache folio to non-0 order");
return false;
@@ -3711,11 +3704,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (new_order >= old_order)
return -EINVAL;
- if (split_type == SPLIT_TYPE_UNIFORM && !uniform_split_supported(folio, new_order, true))
- return -EINVAL;
-
- if (split_type == SPLIT_TYPE_NON_UNIFORM &&
- !non_uniform_split_supported(folio, new_order, true))
+ if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true))
return -EINVAL;
is_hzp = is_huge_zero_folio(folio);
Hi Greg, Sasha,
Please consider commit ba1b40ed0e34 ("drm: nova: depend on
CONFIG_64BIT") for 6.18.y. It should cherry-pick cleanly.
Without this commit, one can create a config where `CONFIG_DRM_NOVA`
selects `CONFIG_NOVA_CORE` without satisfying its `CONFIG_64BIT`
dependency.
In turn, this means arm32 builds can fail -- Kconfig warns:
WARNING: unmet direct dependencies detected for NOVA_CORE
Depends on [n]: HAS_IOMEM [=y] && 64BIT && PCI [=y] && RUST [=y]
&& RUST_FW_LOADER_ABSTRACTIONS [=y]
Selected by [y]:
- DRM_NOVA [=y] && HAS_IOMEM [=y] && DRM [=y]=y [=y] && PCI [=y]
&& RUST [=y]
And then the build fails with (among others, see the related commit
5c5a41a75452 ("gpu: nova-core: depend on CONFIG_64BIT") for more):
error[E0308]: mismatched types
--> drivers/gpu/nova-core/fb.rs:50:59
|
50 | hal::fb_hal(chipset).write_sysmem_flush_page(bar,
page.dma_handle())?;
| -----------------------
^^^^^^^^^^^^^^^^^ expected `u64`, found `u32`
| |
| arguments to this method are incorrect
|
Cc'ing Danilo and Alexandre so that they can confirm they agree.
Thanks!
Cheers,
Miguel
Hi -
a Gentoo user recently found that 6.18.2 started to reproducuibly
crash when building their go toolchain [1].
Apparently the addition of "sched/fair: Forfeit vruntime on yield"
(mainline 79104becf42b) can result in the infamous NULL returned from
pick_eevdf(), which is not supposed to happen.
It turned out that the mentioned commit triggered a bug related
to the recently added proxy execution feature, which was already
fixed in mainline by "sched/proxy: Yield the donor task"
(127b90315ca0), though not marked for stable.
Applying this to 6.18.2/.3-rc1 (and probably 6.12 as well)
has reproducibly fixed the problem. A possible reason the crash
was triggered by the Go runtime could be its specific use of yield(),
though that's just speculation on my part.
So please add 127b90315ca0 ("sched/proxy: Yield the donor task")
to 6.18.y/6.12.y. I know we're already in 6.18.3-rc1, but the
crasher seems reproducible.
Fernand, please correct me if I got the explanations wrong.
Thanks!
Holger
[1] https://bugs.gentoo.org/968116 starting at #8
The reference obtained by calling usb_get_dev() is not released in the
gpio_mpsse_probe() error paths. Fix that by using device managed helper
functions. Also remove the usb_put_dev() call in the disconnect function
since now it will be released automatically.
Cc: stable(a)vger.kernel.org
Fixes: c46a74ff05c0 ("gpio: add support for FTDI's MPSSE as GPIO")
Signed-off-by: Abdun Nihaal <nihaal(a)cse.iitm.ac.in>
---
Compile tested only. Not tested on real hardware.
v1->v2:
- Switched to use devm_add_action_or_reset() to avoid unnecessary gotos,
as suggested by Bartosz Golaszewski.
Link to v1: https://lore.kernel.org/all/20251223065306.131008-1-nihaal@cse.iitm.ac.in/
drivers/gpio/gpio-mpsse.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/gpio/gpio-mpsse.c b/drivers/gpio/gpio-mpsse.c
index ace652ba4df1..12191aeb6566 100644
--- a/drivers/gpio/gpio-mpsse.c
+++ b/drivers/gpio/gpio-mpsse.c
@@ -548,6 +548,13 @@ static void gpio_mpsse_ida_remove(void *data)
ida_free(&gpio_mpsse_ida, priv->id);
}
+static void gpio_mpsse_usb_put_dev(void *data)
+{
+ struct mpsse_priv *priv = data;
+
+ usb_put_dev(priv->udev);
+}
+
static int mpsse_init_valid_mask(struct gpio_chip *chip,
unsigned long *valid_mask,
unsigned int ngpios)
@@ -592,6 +599,10 @@ static int gpio_mpsse_probe(struct usb_interface *interface,
INIT_LIST_HEAD(&priv->workers);
priv->udev = usb_get_dev(interface_to_usbdev(interface));
+ err = devm_add_action_or_reset(dev, gpio_mpsse_usb_put_dev, priv);
+ if (err)
+ return err;
+
priv->intf = interface;
priv->intf_id = interface->cur_altsetting->desc.bInterfaceNumber;
@@ -713,7 +724,6 @@ static void gpio_mpsse_disconnect(struct usb_interface *intf)
priv->intf = NULL;
usb_set_intfdata(intf, NULL);
- usb_put_dev(priv->udev);
}
static struct usb_driver gpio_mpsse_driver = {
--
2.43.0
Hi all,
This backport wires up AMD perfmon v2 so BPF and other software clients
can snapshot LBR stacks on demand, similar to the Intel support
upstream. The series keeps the LBR-freeze path branchless, adds the
perf_snapshot_branch_stack callback for AMD, and drops the
sampling-only restriction now that snapshots can be taken from software
contexts.
Leon Hwang (4):
perf/x86/amd: Ensure amd_pmu_core_disable_all() is always inlined
perf/x86/amd: Avoid taking branches before disabling LBR
perf/x86/amd: Support capturing LBR from software events
perf/x86/amd: Don't reject non-sampling events with configured LBR
arch/x86/events/amd/core.c | 37 +++++++++++++++++++++++++++++++++++-
arch/x86/events/amd/lbr.c | 13 +------------
arch/x86/events/perf_event.h | 13 +++++++++++++
3 files changed, 50 insertions(+), 13 deletions(-)
--
2.52.0
Hi and happy new year!
I would like to report a problem that I am encountering with the sdcard
storage.
I have a laptop/convertible where /home is on an sd-card (because the
internal disk is too small). The card is luks encrypted and has a btrfs
filesystem on it.
When the laptop sleeps and then resumes, there is a race. The sdcard
gets accessed for read/write but is not yet ready, so there are I/O
errors. BTRFS is not happy with them and tends to remount RO.
This issue is well known to purism developers (e.g. see
https://source.puri.sm/Librem5/linux/-/issues/484 and
https://forums.puri.sm/t/sdcard-becomes-read-only-after-waking-up-from-susp…).
My kernel logs are identical to those in
https://source.puri.sm/Librem5/linux/-/issues/484 (first comment), apart
from the fact that I get the errors from BTRFS, while the reporter there
gets the errors from EXT4. This indicates that the race is not specific
to BTRFS.
The errors in the kernel logs come right after the PM: suspend exit message.
From what I understand:
1. The error is more frequent with the SD-LUKS-FILESYSTEM
stratification, but not specific to it
2. A phone/tablet set up such as those that purism developers address
will generally use sdcard for storage and require suspend, being a good
trigger for the problem. However, the problem is in no means specific to
phones, ARM devices, etc. I am getting it on an X86-64 laptop.
3. It is unclear to me if there is a real risk of data loss. Possibly
with BTRFS that has a more complex data management this can be the case.
4.Even without data loss, the issue requires a reboot to get the
filesystem back to RW, so it is annoying.
5. Purism developers have a kernel patch for it at
https://source.puri.sm/Librem5/linux/-/merge_requests/788, but I believe
it is not in linux mainline or stable. Would it make sense to consider
that patch?
6. For stable kernels, there is a mitigation consisting in a systemd
sleep-resume hook as in
#!/bin/sh
/usr/bin/systemd-cat -p5 /usr/bin/echo ${1} ${2}
case "${1}" in
post)
sleep 1.5
systemd-cat -p4 /usr/bin/echo "hack, wait for sdcard"
;;
esac
see https://source.puri.sm/Librem5/linux/-/issues/484#note_277648
This appears to reduce the occurrence of the problem, but not to
eliminate it completely.
Thanks for the attention
Sergio
The issue occurs when gfs2_freeze_lock_shared() fails in
gfs2_fill_super(). If !sb_rdonly(sb), threads for the quotad and logd
were started, however, in the error path for gfs2_freeze_lock_shared(),
the threads are not stopped by gfs2_destroy_threads() before jumping to
fail_per_node.
Introduce fail_threads to handle stopping the threads if the threads were
started.
Reported-by: syzbot+4cb0d0336db6bc6930e9(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=4cb0d0336db6bc6930e9
Fixes: a28dc123fa66 ("gfs2: init system threads before freeze lock")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ryota Sakamoto <sakamo.ryota(a)gmail.com>
---
Changes in v2:
- Fix commit message style (imperative mood) as suggested by Markus Elfring.
- Add parentheses to function name in subject as suggested by Markus Elfring.
- Link to v1: https://lore.kernel.org/r/20251230-fix-use-after-free-gfs2-v1-1-ef0e46db6ec…
---
fs/gfs2/ops_fstype.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e7a88b717991ae3647c1da039636daef7005a7f0..4b5ac1a7050f1fd34e10be4100a2bc381f49c83d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1269,21 +1269,23 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
error = gfs2_freeze_lock_shared(sdp);
if (error)
- goto fail_per_node;
+ goto fail_threads;
if (!sb_rdonly(sb))
error = gfs2_make_fs_rw(sdp);
if (error) {
gfs2_freeze_unlock(sdp);
- gfs2_destroy_threads(sdp);
fs_err(sdp, "can't make FS RW: %d\n", error);
- goto fail_per_node;
+ goto fail_threads;
}
gfs2_glock_dq_uninit(&mount_gh);
gfs2_online_uevent(sdp);
return 0;
+fail_threads:
+ if (!sb_rdonly(sb))
+ gfs2_destroy_threads(sdp);
fail_per_node:
init_per_node(sdp, UNDO);
fail_inodes:
---
base-commit: 7839932417dd53bb09eb5a585a7a92781dfd7cb2
change-id: 20251230-fix-use-after-free-gfs2-66cfbe23baa8
Best regards,
--
Ryota Sakamoto <sakamo.ryota(a)gmail.com>
Dear,
Please grant me the permission to share important discussions with you. I
am looking forward to hearing from you at your earliest convenience.
Best Regards.
Mrs. Mariam Kouame
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f5e1e5ec204da11fa87fdf006d451d80ce06e118
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122951-giggle-reveler-2e6b@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5e1e5ec204da11fa87fdf006d451d80ce06e118 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj(a)kernel.org>
Date: Thu, 11 Dec 2025 15:45:04 -1000
Subject: [PATCH] sched_ext: Fix missing post-enqueue handling in
move_local_task_to_local_dsq()
move_local_task_to_local_dsq() is used when moving a task from a non-local
DSQ to a local DSQ on the same CPU. It directly manipulates the local DSQ
without going through dispatch_enqueue() and was missing the post-enqueue
handling that triggers preemption when SCX_ENQ_PREEMPT is set or the idle
task is running.
The function is used by move_task_between_dsqs() which backs
scx_bpf_dsq_move() and may be called while the CPU is busy.
Add local_dsq_post_enq() call to move_local_task_to_local_dsq(). As the
dispatch path doesn't need post-enqueue handling, add SCX_RQ_IN_BALANCE
early exit to keep consume_dispatch_q() behavior unchanged and avoid
triggering unnecessary resched when scx_bpf_dsq_move() is used from the
dispatch path.
Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()")
Cc: stable(a)vger.kernel.org # v6.12+
Reviewed-by: Andrea Righi <arighi(a)nvidia.com>
Reviewed-by: Emil Tsalapatis <emil(a)etsalapatis.com>
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index c78efa99406f..695503a2f7d1 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -988,6 +988,14 @@ static void local_dsq_post_enq(struct scx_dispatch_q *dsq, struct task_struct *p
struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
bool preempt = false;
+ /*
+ * If @rq is in balance, the CPU is already vacant and looking for the
+ * next task to run. No need to preempt or trigger resched after moving
+ * @p into its local DSQ.
+ */
+ if (rq->scx.flags & SCX_RQ_IN_BALANCE)
+ return;
+
if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr &&
rq->curr->sched_class == &ext_sched_class) {
rq->curr->scx.slice = 0;
@@ -1636,6 +1644,8 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
dsq_mod_nr(dst_dsq, 1);
p->scx.dsq = dst_dsq;
+
+ local_dsq_post_enq(dst_dsq, p, enq_flags);
}
/**
When software issues a Cache Maintenance Operation (CMO) targeting a
dirty cache line, the CPU and DSU cluster may optimize the operation by
combining the CopyBack Write and CMO into a single combined CopyBack
Write plus CMO transaction presented to the interconnect (MCN).
For these combined transactions, the MCN splits the operation into two
separate transactions, one Write and one CMO, and then propagates the
write and optionally the CMO to the downstream memory system or external
Point of Serialization (PoS).
However, the MCN may return an early CompCMO response to the DSU cluster
before the corresponding Write and CMO transactions have completed at
the external PoS or downstream memory. As a result, stale data may be
observed by external observers that are directly connected to the
external PoS or downstream memory.
This erratum affects any system topology in which the following
conditions apply:
- The Point of Serialization (PoS) is located downstream of the
interconnect.
- A downstream observer accesses memory directly, bypassing the
interconnect.
Conditions:
This erratum occurs only when all of the following conditions are met:
1. Software executes a data cache maintenance operation, specifically,
a clean or invalidate by virtual address (DC CVAC, DC CIVAC, or DC
IVAC), that hits on unique dirty data in the CPU or DSU cache. This
results in a combined CopyBack and CMO being issued to the
interconnect.
2. The interconnect splits the combined transaction into separate Write
and CMO transactions and returns an early completion response to the
CPU or DSU before the write has completed at the downstream memory
or PoS.
3. A downstream observer accesses the affected memory address after the
early completion response is issued but before the actual memory
write has completed. This allows the observer to read stale data
that has not yet been updated at the PoS or downstream memory.
The implementation of workaround put a second loop of CMOs at the same
virtual address whose operation meet erratum conditions to wait until
cache data be cleaned to PoC.. This way of implementation mitigates
performance panalty compared to purly duplicate orignial CMO.
Reported-by: kernel test robot <lkp(a)intel.com>
Cc: stable(a)vger.kernel.org # 6.12.x
Signed-off-by: Lucas Wei <lucaswei(a)google.com>
---
Changes in v2:
1. Fixed warning from kernel test robot by changing
arm_si_l1_workaround_4311569 to static
[Reported-by: kernel test robot <lkp(a)intel.com>]
---
Documentation/arch/arm64/silicon-errata.rst | 3 ++
arch/arm64/Kconfig | 19 +++++++++++++
arch/arm64/include/asm/assembler.h | 10 +++++++
arch/arm64/kernel/cpu_errata.c | 31 +++++++++++++++++++++
arch/arm64/mm/cache.S | 13 ++++++++-
arch/arm64/tools/cpucaps | 1 +
6 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index a7ec57060f64..98efdf528719 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -213,6 +213,9 @@ stable kernels.
| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | SI L1 | #4311569 | ARM64_ERRATUM_4311569 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 65db12f66b8f..a834d30859cc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1153,6 +1153,25 @@ config ARM64_ERRATUM_3194386
If unsure, say Y.
+config ARM64_ERRATUM_4311569
+ bool "SI L1: 4311569: workaround for premature CMO completion erratum"
+ default y
+ help
+ This option adds the workaround for ARM SI L1 erratum 4311569.
+
+ The erratum of SI L1 can cause an early response to a combined write
+ and cache maintenance operation (WR+CMO) before the operation is fully
+ completed to the Point of Serialization (POS).
+ This can result in a non-I/O coherent agent observing stale data,
+ potentially leading to system instability or incorrect behavior.
+
+ Enabling this option implements a software workaround by inserting a
+ second loop of Cache Maintenance Operation (CMO) immediately following the
+ end of function to do CMOs. This ensures that the data is correctly serialized
+ before the buffer is handed off to a non-coherent agent.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index f0ca7196f6fa..d3d46e5f7188 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -381,6 +381,9 @@ alternative_endif
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
sub \tmp, \linesz, #1
bic \start, \start, \tmp
+alternative_if ARM64_WORKAROUND_4311569
+ mov \tmp, \start
+alternative_else_nop_endif
.Ldcache_op\@:
.ifc \op, cvau
__dcache_op_workaround_clean_cache \op, \start
@@ -402,6 +405,13 @@ alternative_endif
add \start, \start, \linesz
cmp \start, \end
b.lo .Ldcache_op\@
+alternative_if ARM64_WORKAROUND_4311569
+ .ifnc \op, cvau
+ mov \start, \tmp
+ mov \tmp, xzr
+ cbnz \start, .Ldcache_op\@
+ .endif
+alternative_else_nop_endif
dsb \domain
_cond_uaccess_extable .Ldcache_op\@, \fixup
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 8cb3b575a031..5c0ab6bfd44a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -141,6 +141,30 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
return (ctr_real != sys) && (ctr_raw != sys);
}
+#ifdef CONFIG_ARM64_ERRATUM_4311569
+static DEFINE_STATIC_KEY_FALSE(arm_si_l1_workaround_4311569);
+static int __init early_arm_si_l1_workaround_4311569_cfg(char *arg)
+{
+ static_branch_enable(&arm_si_l1_workaround_4311569);
+ pr_info("Enabling cache maintenance workaround for ARM SI-L1 erratum 4311569\n");
+
+ return 0;
+}
+early_param("arm_si_l1_workaround_4311569", early_arm_si_l1_workaround_4311569_cfg);
+
+/*
+ * We have some earlier use cases to call cache maintenance operation functions, for example,
+ * dcache_inval_poc() and dcache_clean_poc() in head.S, before making decision to turn on this
+ * workaround. Since the scope of this workaround is limited to non-coherent DMA agents, its
+ * safe to have the workaround off by default.
+ */
+static bool
+need_arm_si_l1_workaround_4311569(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return static_branch_unlikely(&arm_si_l1_workaround_4311569);
+}
+#endif
+
static void
cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
{
@@ -870,6 +894,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_4311569
+ {
+ .capability = ARM64_WORKAROUND_4311569,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = need_arm_si_l1_workaround_4311569,
+ },
+#endif
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
{
.desc = "ARM errata 2966298, 3117295",
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 503567c864fd..ddf0097624ed 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -143,9 +143,14 @@ SYM_FUNC_END(dcache_clean_pou)
* - end - kernel end address of region
*/
SYM_FUNC_START(__pi_dcache_inval_poc)
+alternative_if ARM64_WORKAROUND_4311569
+ mov x4, x0
+ mov x5, x1
+ mov x6, #1
+alternative_else_nop_endif
dcache_line_size x2, x3
sub x3, x2, #1
- tst x1, x3 // end cache line aligned?
+again: tst x1, x3 // end cache line aligned?
bic x1, x1, x3
b.eq 1f
dc civac, x1 // clean & invalidate D / U line
@@ -158,6 +163,12 @@ SYM_FUNC_START(__pi_dcache_inval_poc)
3: add x0, x0, x2
cmp x0, x1
b.lo 2b
+alternative_if ARM64_WORKAROUND_4311569
+ mov x0, x4
+ mov x1, x5
+ sub x6, x6, #1
+ cbz x6, again
+alternative_else_nop_endif
dsb sy
ret
SYM_FUNC_END(__pi_dcache_inval_poc)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 1b32c1232d28..3b18734f9744 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -101,6 +101,7 @@ WORKAROUND_2077057
WORKAROUND_2457168
WORKAROUND_2645198
WORKAROUND_2658417
+WORKAROUND_4311569
WORKAROUND_AMPERE_AC03_CPU_38
WORKAROUND_AMPERE_AC04_CPU_23
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
base-commit: edde060637b92607f3522252c03d64ad06369933
--
2.52.0.358.g0dd7633a29-goog
Hi,
This series intends to fix the race between the MHI stack and the MHI client
drivers due to the MHI 'auto_queue' feature. As it turns out often, the best
way to fix an issue in a feature is to drop the feature itself and this series
does exactly that.
There is no real benefit in having the 'auto_queue' feature in the MHI stack,
other than saving a few lines of code in the client drivers. Since the QRTR is
the only client driver which makes use of this feature, this series reworks the
QRTR driver to manage the buffer on its own.
Testing
=======
Tested on Qcom X1E based Lenovo Thinkpad T14s laptop with WLAN device.
Merge Strategy
==============
Since this series modifies many subsystem drivers, I'd like to get acks from
relevant subsystem maintainers and take the series through MHI tree.
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
---
Changes in v2:
- Used mhi_get_free_desc_count() to queue the buffers
- Collected tags
- Link to v1: https://lore.kernel.org/r/20251217-qrtr-fix-v1-0-f6142a3ec9d8@oss.qualcomm.…
---
Manivannan Sadhasivam (2):
net: qrtr: Drop the MHI auto_queue feature for IPCR DL channels
bus: mhi: host: Drop the auto_queue support
drivers/accel/qaic/mhi_controller.c | 44 -------------------
drivers/bus/mhi/host/init.c | 10 -----
drivers/bus/mhi/host/internal.h | 3 --
drivers/bus/mhi/host/main.c | 81 +----------------------------------
drivers/bus/mhi/host/pci_generic.c | 20 +--------
drivers/net/wireless/ath/ath11k/mhi.c | 4 --
drivers/net/wireless/ath/ath12k/mhi.c | 4 --
include/linux/mhi.h | 14 ------
net/qrtr/mhi.c | 69 ++++++++++++++++++++++++-----
9 files changed, 62 insertions(+), 187 deletions(-)
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251217-qrtr-fix-c058251d8d1a
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
After discussion with the devicetree maintainers we agreed to not extend
lists with the generic compatible "apple,admac" anymore [1]. Use
"apple,t8103-admac" as base compatible as it is the SoC the driver and
bindings were written for.
[1]: https://lore.kernel.org/asahi/12ab93b7-1fc2-4ce0-926e-c8141cfe81bf@kernel.o…
Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver")
Cc: stable(a)vger.kernel.org
Reviewed-by: Neal Gompa <neal(a)gompa.dev>
Signed-off-by: Janne Grunau <j(a)jannau.net>
---
This is split off from the v1 series adding Apple M2 Pro/Max/Ultra
device trees in
https://lore.kernel.org/r/20250828-dt-apple-t6020-v1-0-507ba4c4b98e@jannau.…
Changes compared to the patch in that series:
- rebased onto v6.19-rc1
- added "Fixes:" and "Cc: stable" tags for handling this as fix adding a
device id
- added Neal's Reviewed-by:
---
drivers/dma/apple-admac.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
index bd49f03742912198804a02a22e38da2c21093761..04bbd774b3b444f928986c266c53becf286daeea 100644
--- a/drivers/dma/apple-admac.c
+++ b/drivers/dma/apple-admac.c
@@ -936,6 +936,7 @@ static void admac_remove(struct platform_device *pdev)
}
static const struct of_device_id admac_of_match[] = {
+ { .compatible = "apple,t8103-admac", },
{ .compatible = "apple,admac", },
{ }
};
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251231-apple-admac-t8103-base-compat-18288e7d62b3
Best regards,
--
Janne Grunau <j(a)jannau.net>
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x bd5603eaae0aabf527bfb3ce1bb07e979ce5bd50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122915-discover-sediment-3371@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bd5603eaae0aabf527bfb3ce1bb07e979ce5bd50 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong(a)gmail.com>
Date: Fri, 10 Oct 2025 15:07:38 -0700
Subject: [PATCH] fuse: fix readahead reclaim deadlock
Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is
needed") skips allocating ff->release_args if the server does not
implement open. However in doing so, fuse_prepare_release() now skips
grabbing the reference on the inode, which makes it possible for an
inode to be evicted from the dcache while there are inflight readahead
requests. This causes a deadlock if the server triggers reclaim while
servicing the readahead request and reclaim attempts to evict the inode
of the file being read ahead. Since the folio is locked during
readahead, when reclaim evicts the fuse inode and fuse_evict_inode()
attempts to remove all folios associated with the inode from the page
cache (truncate_inode_pages_range()), reclaim will block forever waiting
for the lock since readahead cannot relinquish the lock because it is
itself blocked in reclaim:
>>> stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4)
folio_lock (./include/linux/pagemap.h:1052:3)
truncate_inode_pages_range (mm/truncate.c:336:10)
fuse_evict_inode (fs/fuse/inode.c:161:2)
evict (fs/inode.c:704:3)
dentry_unlink_inode (fs/dcache.c:412:3)
__dentry_kill (fs/dcache.c:615:3)
shrink_kill (fs/dcache.c:1060:12)
shrink_dentry_list (fs/dcache.c:1087:3)
prune_dcache_sb (fs/dcache.c:1168:2)
super_cache_scan (fs/super.c:221:10)
do_shrink_slab (mm/shrinker.c:435:9)
shrink_slab (mm/shrinker.c:626:10)
shrink_node (mm/vmscan.c:5951:2)
shrink_zones (mm/vmscan.c:6195:3)
do_try_to_free_pages (mm/vmscan.c:6257:3)
do_swap_page (mm/memory.c:4136:11)
handle_pte_fault (mm/memory.c:5562:10)
handle_mm_fault (mm/memory.c:5870:9)
do_user_addr_fault (arch/x86/mm/fault.c:1338:10)
handle_page_fault (arch/x86/mm/fault.c:1481:3)
exc_page_fault (arch/x86/mm/fault.c:1539:2)
asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the
reference on the inode when preparing the file for release even if the
server does not implement open. The inode reference will be dropped when
the last reference on the fuse file is dropped (see fuse_file_put() ->
fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong(a)gmail.com>
Reported-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f1ef77a0be05..4d96e684d736 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
fuse_file_io_release(ff, ra->inode);
if (!args) {
- /* Do nothing when server does not implement 'open' */
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) {
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- /* No release needed */
- kfree(ff->args);
- ff->args = NULL;
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x bd5603eaae0aabf527bfb3ce1bb07e979ce5bd50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122914-hundredth-munchkin-531c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bd5603eaae0aabf527bfb3ce1bb07e979ce5bd50 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong(a)gmail.com>
Date: Fri, 10 Oct 2025 15:07:38 -0700
Subject: [PATCH] fuse: fix readahead reclaim deadlock
Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is
needed") skips allocating ff->release_args if the server does not
implement open. However in doing so, fuse_prepare_release() now skips
grabbing the reference on the inode, which makes it possible for an
inode to be evicted from the dcache while there are inflight readahead
requests. This causes a deadlock if the server triggers reclaim while
servicing the readahead request and reclaim attempts to evict the inode
of the file being read ahead. Since the folio is locked during
readahead, when reclaim evicts the fuse inode and fuse_evict_inode()
attempts to remove all folios associated with the inode from the page
cache (truncate_inode_pages_range()), reclaim will block forever waiting
for the lock since readahead cannot relinquish the lock because it is
itself blocked in reclaim:
>>> stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4)
folio_lock (./include/linux/pagemap.h:1052:3)
truncate_inode_pages_range (mm/truncate.c:336:10)
fuse_evict_inode (fs/fuse/inode.c:161:2)
evict (fs/inode.c:704:3)
dentry_unlink_inode (fs/dcache.c:412:3)
__dentry_kill (fs/dcache.c:615:3)
shrink_kill (fs/dcache.c:1060:12)
shrink_dentry_list (fs/dcache.c:1087:3)
prune_dcache_sb (fs/dcache.c:1168:2)
super_cache_scan (fs/super.c:221:10)
do_shrink_slab (mm/shrinker.c:435:9)
shrink_slab (mm/shrinker.c:626:10)
shrink_node (mm/vmscan.c:5951:2)
shrink_zones (mm/vmscan.c:6195:3)
do_try_to_free_pages (mm/vmscan.c:6257:3)
do_swap_page (mm/memory.c:4136:11)
handle_pte_fault (mm/memory.c:5562:10)
handle_mm_fault (mm/memory.c:5870:9)
do_user_addr_fault (arch/x86/mm/fault.c:1338:10)
handle_page_fault (arch/x86/mm/fault.c:1481:3)
exc_page_fault (arch/x86/mm/fault.c:1539:2)
asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the
reference on the inode when preparing the file for release even if the
server does not implement open. The inode reference will be dropped when
the last reference on the fuse file is dropped (see fuse_file_put() ->
fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong(a)gmail.com>
Reported-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f1ef77a0be05..4d96e684d736 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
fuse_file_io_release(ff, ra->inode);
if (!args) {
- /* Do nothing when server does not implement 'open' */
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) {
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- /* No release needed */
- kfree(ff->args);
- ff->args = NULL;
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x bd5603eaae0aabf527bfb3ce1bb07e979ce5bd50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122913-nucleus-deny-f4d4@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bd5603eaae0aabf527bfb3ce1bb07e979ce5bd50 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong(a)gmail.com>
Date: Fri, 10 Oct 2025 15:07:38 -0700
Subject: [PATCH] fuse: fix readahead reclaim deadlock
Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is
needed") skips allocating ff->release_args if the server does not
implement open. However in doing so, fuse_prepare_release() now skips
grabbing the reference on the inode, which makes it possible for an
inode to be evicted from the dcache while there are inflight readahead
requests. This causes a deadlock if the server triggers reclaim while
servicing the readahead request and reclaim attempts to evict the inode
of the file being read ahead. Since the folio is locked during
readahead, when reclaim evicts the fuse inode and fuse_evict_inode()
attempts to remove all folios associated with the inode from the page
cache (truncate_inode_pages_range()), reclaim will block forever waiting
for the lock since readahead cannot relinquish the lock because it is
itself blocked in reclaim:
>>> stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4)
folio_lock (./include/linux/pagemap.h:1052:3)
truncate_inode_pages_range (mm/truncate.c:336:10)
fuse_evict_inode (fs/fuse/inode.c:161:2)
evict (fs/inode.c:704:3)
dentry_unlink_inode (fs/dcache.c:412:3)
__dentry_kill (fs/dcache.c:615:3)
shrink_kill (fs/dcache.c:1060:12)
shrink_dentry_list (fs/dcache.c:1087:3)
prune_dcache_sb (fs/dcache.c:1168:2)
super_cache_scan (fs/super.c:221:10)
do_shrink_slab (mm/shrinker.c:435:9)
shrink_slab (mm/shrinker.c:626:10)
shrink_node (mm/vmscan.c:5951:2)
shrink_zones (mm/vmscan.c:6195:3)
do_try_to_free_pages (mm/vmscan.c:6257:3)
do_swap_page (mm/memory.c:4136:11)
handle_pte_fault (mm/memory.c:5562:10)
handle_mm_fault (mm/memory.c:5870:9)
do_user_addr_fault (arch/x86/mm/fault.c:1338:10)
handle_page_fault (arch/x86/mm/fault.c:1481:3)
exc_page_fault (arch/x86/mm/fault.c:1539:2)
asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the
reference on the inode when preparing the file for release even if the
server does not implement open. The inode reference will be dropped when
the last reference on the fuse file is dropped (see fuse_file_put() ->
fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong(a)gmail.com>
Reported-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f1ef77a0be05..4d96e684d736 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
fuse_file_io_release(ff, ra->inode);
if (!args) {
- /* Do nothing when server does not implement 'open' */
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) {
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- /* No release needed */
- kfree(ff->args);
- ff->args = NULL;
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
According to documentation, the DP PHY on x1e80100 has another clock
called ref.
The current X Elite devices supported upstream work fine without this
clock, because the boot firmware leaves this clock enabled. But we should
not rely on that. Also, when it comes to power management, this clock
needs to be also disabled on suspend. So even though this change breaks
the ABI, it is needed in order to make we disable this clock on runtime
PM, when that is going to be enabled in the driver.
So rework the driver to allow different number of clocks, fix the
dt-bindings schema and add the clock to the DT node as well.
Signed-off-by: Abel Vesa <abel.vesa(a)linaro.org>
---
Changes in v5:
- Picked-up Bjorn's R-b tags.
- Replaced "parse" with "get" on clocks acquiring failure.
- Link to v4: https://lore.kernel.org/r/20251029-phy-qcom-edp-add-missing-refclk-v4-0-adb…
Changes in v4:
- Picked Dmitry's R-b tag for the driver patch
- Added x1e80100 substring to subject of dts patch
- Link to v3 (resend): https://lore.kernel.org/r/20251014-phy-qcom-edp-add-missing-refclk-v3-0-078…
Changes in v3 (resend)
- picked-up Krzysztof's R-b tag for bindings patch
- Link to v3: https://lore.kernel.org/r/20250909-phy-qcom-edp-add-missing-refclk-v3-0-4ec…
Changes in v3:
- Use dev_err_probe() on clocks parsing failure.
- Explain why the ABI break is necessary.
- Drop the extra 'clk' suffix from the clock name. So ref instead of
refclk.
- Link to v2: https://lore.kernel.org/r/20250903-phy-qcom-edp-add-missing-refclk-v2-0-d88…
Changes in v2:
- Fix schema by adding the minItems, as suggested by Krzysztof.
- Use devm_clk_bulk_get_all, as suggested by Konrad.
- Rephrase the commit messages to reflect the flexible number of clocks.
- Link to v1: https://lore.kernel.org/r/20250730-phy-qcom-edp-add-missing-refclk-v1-0-6f7…
---
Abel Vesa (3):
dt-bindings: phy: qcom-edp: Add missing clock for X Elite
phy: qcom: edp: Make the number of clocks flexible
arm64: dts: qcom: x1e80100: Add missing TCSR ref clock to the DP PHYs
.../devicetree/bindings/phy/qcom,edp-phy.yaml | 28 +++++++++++++++++++++-
arch/arm64/boot/dts/qcom/hamoa.dtsi | 12 ++++++----
drivers/phy/qualcomm/phy-qcom-edp.c | 16 ++++++-------
3 files changed, 43 insertions(+), 13 deletions(-)
---
base-commit: 131f3d9446a6075192cdd91f197989d98302faa6
change-id: 20250730-phy-qcom-edp-add-missing-refclk-5ab82828f8e7
Best regards,
--
Abel Vesa <abel.vesa(a)oss.qualcomm.com>
From: Sean Christopherson <seanjc(a)google.com>
When loading guest XSAVE state via KVM_SET_XSAVE, and when updating XFD in
response to a guest WRMSR, clear XFD-disabled features in the saved (or to
be restored) XSTATE_BV to ensure KVM doesn't attempt to load state for
features that are disabled via the guest's XFD. Because the kernel
executes XRSTOR with the guest's XFD, saving XSTATE_BV[i]=1 with XFD[i]=1
will cause XRSTOR to #NM and panic the kernel.
E.g. if fpu_update_guest_xfd() sets XFD without clearing XSTATE_BV:
------------[ cut here ]------------
WARNING: arch/x86/kernel/traps.c:1524 at exc_device_not_available+0x101/0x110, CPU#29: amx_test/848
Modules linked in: kvm_intel kvm irqbypass
CPU: 29 UID: 1000 PID: 848 Comm: amx_test Not tainted 6.19.0-rc2-ffa07f7fd437-x86_amx_nm_xfd_non_init-vm #171 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:exc_device_not_available+0x101/0x110
Call Trace:
<TASK>
asm_exc_device_not_available+0x1a/0x20
RIP: 0010:restore_fpregs_from_fpstate+0x36/0x90
switch_fpu_return+0x4a/0xb0
kvm_arch_vcpu_ioctl_run+0x1245/0x1e40 [kvm]
kvm_vcpu_ioctl+0x2c3/0x8f0 [kvm]
__x64_sys_ioctl+0x8f/0xd0
do_syscall_64+0x62/0x940
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
---[ end trace 0000000000000000 ]---
This can happen if the guest executes WRMSR(MSR_IA32_XFD) to set XFD[18] = 1,
and a host IRQ triggers kernel_fpu_begin() prior to the vmexit handler's
call to fpu_update_guest_xfd().
and if userspace stuffs XSTATE_BV[i]=1 via KVM_SET_XSAVE:
------------[ cut here ]------------
WARNING: arch/x86/kernel/traps.c:1524 at exc_device_not_available+0x101/0x110, CPU#14: amx_test/867
Modules linked in: kvm_intel kvm irqbypass
CPU: 14 UID: 1000 PID: 867 Comm: amx_test Not tainted 6.19.0-rc2-2dace9faccd6-x86_amx_nm_xfd_non_init-vm #168 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:exc_device_not_available+0x101/0x110
Call Trace:
<TASK>
asm_exc_device_not_available+0x1a/0x20
RIP: 0010:restore_fpregs_from_fpstate+0x36/0x90
fpu_swap_kvm_fpstate+0x6b/0x120
kvm_load_guest_fpu+0x30/0x80 [kvm]
kvm_arch_vcpu_ioctl_run+0x85/0x1e40 [kvm]
kvm_vcpu_ioctl+0x2c3/0x8f0 [kvm]
__x64_sys_ioctl+0x8f/0xd0
do_syscall_64+0x62/0x940
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
---[ end trace 0000000000000000 ]---
The new behavior is consistent with the AMX architecture. Per Intel's SDM,
XSAVE saves XSTATE_BV as '0' for components that are disabled via XFD
(and non-compacted XSAVE saves the initial configuration of the state
component):
If XSAVE, XSAVEC, XSAVEOPT, or XSAVES is saving the state component i,
the instruction does not generate #NM when XCR0[i] = IA32_XFD[i] = 1;
instead, it operates as if XINUSE[i] = 0 (and the state component was
in its initial state): it saves bit i of XSTATE_BV field of the XSAVE
header as 0; in addition, XSAVE saves the initial configuration of the
state component (the other instructions do not save state component i).
Alternatively, KVM could always do XRSTOR with XFD=0, e.g. by using
a constant XFD based on the set of enabled features when XSAVEing for
a struct fpu_guest. However, having XSTATE_BV[i]=1 for XFD-disabled
features can only happen in the above interrupt case, or in similar
scenarios involving preemption on preemptible kernels, because
fpu_swap_kvm_fpstate()'s call to save_fpregs_to_fpstate() saves the
outgoing FPU state with the current XFD; and that is (on all but the
first WRMSR to XFD) the guest XFD.
Therefore, XFD can only go out of sync with XSTATE_BV in the above
interrupt case, or in similar scenarios involving preemption on
preemptible kernels, and it we can consider it (de facto) part of KVM
ABI that KVM_GET_XSAVE returns XSTATE_BV[i]=0 for XFD-disabled features.
Reported-by: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: 820a6ee944e7 ("kvm: x86: Add emulation for IA32_XFD", 2022-01-14)
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
[Move clearing of XSTATE_BV from fpu_copy_uabi_to_guest_fpstate
to kvm_vcpu_ioctl_x86_set_xsave. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kernel/fpu/core.c | 32 +++++++++++++++++++++++++++++---
arch/x86/kvm/x86.c | 9 +++++++++
2 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index da233f20ae6f..166c380b0161 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -319,10 +319,29 @@ EXPORT_SYMBOL_FOR_KVM(fpu_enable_guest_xfd_features);
#ifdef CONFIG_X86_64
void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
{
+ struct fpstate *fpstate = guest_fpu->fpstate;
+
fpregs_lock();
- guest_fpu->fpstate->xfd = xfd;
- if (guest_fpu->fpstate->in_use)
- xfd_update_state(guest_fpu->fpstate);
+
+ /*
+ * KVM's guest ABI is that setting XFD[i]=1 *can* immediately revert
+ * the save state to initialized. Likewise, KVM_GET_XSAVE does the
+ * same as XSAVE and returns XSTATE_BV[i]=0 whenever XFD[i]=1.
+ *
+ * If the guest's FPU state is in hardware, just update XFD: the XSAVE
+ * in fpu_swap_kvm_fpstate will clear XSTATE_BV[i] whenever XFD[i]=1.
+ *
+ * If however the guest's FPU state is NOT resident in hardware, clear
+ * disabled components in XSTATE_BV now, or a subsequent XRSTOR will
+ * attempt to load disabled components and generate #NM _in the host_.
+ */
+ if (xfd && test_thread_flag(TIF_NEED_FPU_LOAD))
+ fpstate->regs.xsave.header.xfeatures &= ~xfd;
+
+ fpstate->xfd = xfd;
+ if (fpstate->in_use)
+ xfd_update_state(fpstate);
+
fpregs_unlock();
}
EXPORT_SYMBOL_FOR_KVM(fpu_update_guest_xfd);
@@ -430,6 +449,13 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
+ /*
+ * Disabled features must be in their initial state, otherwise XRSTOR
+ * causes an exception.
+ */
+ if (WARN_ON_ONCE(ustate->xsave.header.xfeatures & kstate->xfd))
+ return -EINVAL;
+
/*
* Nullify @vpkru to preserve its current value if PKRU's bit isn't set
* in the header. KVM's odd ABI is to leave PKRU untouched in this
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ff8812f3a129..c0416f53b5f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5807,9 +5807,18 @@ static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
+ union fpregs_state *xstate = (union fpregs_state *)guest_xsave->region;
+
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
+ /*
+ * Do not reject non-initialized disabled features for backwards
+ * compatibility, but clear XSTATE_BV[i] whenever XFD[i]=1.
+ * Otherwise, XRSTOR would cause a #NM.
+ */
+ xstate->xsave.header.xfeatures &= ~vcpu->arch.guest_fpu.fpstate->xfd;
+
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
guest_xsave->region,
kvm_caps.supported_xcr0,
--
2.52.0
If damon_call() is executed against a DAMON context that is not running,
the function returns error while keeping the damon_call_control object
linked to the context's call_controls list. Let's suppose the object is
deallocated after the damon_call(), and yet another damon_call() is
executed against the same context. The function tries to add the new
damon_call_control object to the call_controls list, which still has the
pointer to the previous damon_call_control object, which is deallocated.
As a result, use-after-free happens.
This can actually be triggered using the DAMON sysfs interface. It is
not easily exploitable since it requires the sysfs write permission and
making a definitely weird file writes, though. Please refer to the
report for more details about the issue reproduction steps.
Fix the issue by making damon_call() to cleanup the damon_call_control
object before returning the error.
Reported-by: JaeJoon Jung <rgbi3307(a)gmail.com>
Closes: https://lore.kernel.org/20251224094401.20384-1-rgbi3307@gmail.com
Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()")
Cc: <stable(a)vger.kernel.org> # 6.14.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/core.c | 31 ++++++++++++++++++++++++++++++-
1 file changed, 30 insertions(+), 1 deletion(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 2d3e8006db50..65482a0ce20b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1442,6 +1442,35 @@ bool damon_is_running(struct damon_ctx *ctx)
return running;
}
+/*
+ * damon_call_handle_inactive_ctx() - handle DAMON call request that added to
+ * an inactive context.
+ * @ctx: The inactive DAMON context.
+ * @control: Control variable of the call request.
+ *
+ * This function is called in a case that @control is added to @ctx but @ctx is
+ * not running (inactive). See if @ctx handled @control or not, and cleanup
+ * @control if it was not handled.
+ *
+ * Returns 0 if @control was handled by @ctx, negative error code otherwise.
+ */
+static int damon_call_handle_inactive_ctx(
+ struct damon_ctx *ctx, struct damon_call_control *control)
+{
+ struct damon_call_control *c;
+
+ mutex_lock(&ctx->call_controls_lock);
+ list_for_each_entry(c, &ctx->call_controls, list) {
+ if (c == control) {
+ list_del(&control->list);
+ mutex_unlock(&ctx->call_controls_lock);
+ return -EINVAL;
+ }
+ }
+ mutex_unlock(&ctx->call_controls_lock);
+ return 0;
+}
+
/**
* damon_call() - Invoke a given function on DAMON worker thread (kdamond).
* @ctx: DAMON context to call the function for.
@@ -1472,7 +1501,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
list_add_tail(&control->list, &ctx->call_controls);
mutex_unlock(&ctx->call_controls_lock);
if (!damon_is_running(ctx))
- return -EINVAL;
+ return damon_call_handle_inactive_ctx(ctx, control);
if (control->repeat)
return 0;
wait_for_completion(&control->completion);
base-commit: a100272b3541cb00a5e29fdc16e428234ebfddd1
--
2.47.3
The patch titled
Subject: mm/damon/core: remove call_control in inactive contexts
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-core-remove-call_control-in-inactive-contexts.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: remove call_control in inactive contexts
Date: Tue, 30 Dec 2025 17:23:13 -0800
If damon_call() is executed against a DAMON context that is not running,
the function returns error while keeping the damon_call_control object
linked to the context's call_controls list. Let's suppose the object is
deallocated after the damon_call(), and yet another damon_call() is
executed against the same context. The function tries to add the new
damon_call_control object to the call_controls list, which still has the
pointer to the previous damon_call_control object, which is deallocated.
As a result, use-after-free happens.
This can actually be triggered using the DAMON sysfs interface. It is not
easily exploitable since it requires the sysfs write permission and making
a definitely weird file writes, though. Please refer to the report for
more details about the issue reproduction steps.
Fix the issue by making two changes. Firstly, move the final
kdamond_call() for cancelling all existing damon_call() requests from
terminating DAMON context to be done before the ctx->kdamond reset. This
makes any code that sees NULL ctx->kdamond can safely assume the context
may not access damon_call() requests anymore. Secondly, let damon_call()
to cleanup the damon_call_control objects that were added to the
already-terminated DAMON context, before returning the error.
Link: https://lkml.kernel.org/r/20251231012315.75835-1-sj@kernel.org
Fixes: 004ded6bee11 ("mm/damon: accept parallel damon_call() requests")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: JaeJoon Jung <rgbi3307(a)gmail.com>
Closes: https://lore.kernel.org/20251224094401.20384-1-rgbi3307@gmail.com
Cc: <stable(a)vger.kernel.org> # 6.17.x
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
--- a/mm/damon/core.c~mm-damon-core-remove-call_control-in-inactive-contexts
+++ a/mm/damon/core.c
@@ -1431,6 +1431,35 @@ bool damon_is_running(struct damon_ctx *
return running;
}
+/*
+ * damon_call_handle_inactive_ctx() - handle DAMON call request that added to
+ * an inactive context.
+ * @ctx: The inactive DAMON context.
+ * @control: Control variable of the call request.
+ *
+ * This function is called in a case that @control is added to @ctx but @ctx is
+ * not running (inactive). See if @ctx handled @control or not, and cleanup
+ * @control if it was not handled.
+ *
+ * Returns 0 if @control was handled by @ctx, negative error code otherwise.
+ */
+static int damon_call_handle_inactive_ctx(
+ struct damon_ctx *ctx, struct damon_call_control *control)
+{
+ struct damon_call_control *c;
+
+ mutex_lock(&ctx->call_controls_lock);
+ list_for_each_entry(c, &ctx->call_controls, list) {
+ if (c == control) {
+ list_del(&control->list);
+ mutex_unlock(&ctx->call_controls_lock);
+ return -EINVAL;
+ }
+ }
+ mutex_unlock(&ctx->call_controls_lock);
+ return 0;
+}
+
/**
* damon_call() - Invoke a given function on DAMON worker thread (kdamond).
* @ctx: DAMON context to call the function for.
@@ -1461,7 +1490,7 @@ int damon_call(struct damon_ctx *ctx, st
list_add_tail(&control->list, &ctx->call_controls);
mutex_unlock(&ctx->call_controls_lock);
if (!damon_is_running(ctx))
- return -EINVAL;
+ return damon_call_handle_inactive_ctx(ctx, control);
if (control->repeat)
return 0;
wait_for_completion(&control->completion);
@@ -2755,13 +2784,13 @@ done:
if (ctx->ops.cleanup)
ctx->ops.cleanup(ctx);
kfree(ctx->regions_score_histogram);
+ kdamond_call(ctx, true);
pr_debug("kdamond (%d) finishes\n", current->pid);
mutex_lock(&ctx->kdamond_lock);
ctx->kdamond = NULL;
mutex_unlock(&ctx->kdamond_lock);
- kdamond_call(ctx, true);
damos_walk_cancel(ctx);
mutex_lock(&damon_lock);
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-core-remove-call_control-in-inactive-contexts.patch
mm-damon-core-introduce-nr_snapshots-damos-stat.patch
mm-damon-sysfs-schemes-introduce-nr_snapshots-damos-stat-file.patch
docs-mm-damon-design-update-for-nr_snapshots-damos-stat.patch
docs-admin-guide-mm-damon-usage-update-for-nr_snapshots-damos-stat.patch
docs-abi-damon-update-for-nr_snapshots-damos-stat.patch
mm-damon-update-damos-kerneldoc-for-stat-field.patch
mm-damon-core-implement-max_nr_snapshots.patch
mm-damon-sysfs-schemes-implement-max_nr_snapshots-file.patch
docs-mm-damon-design-update-for-max_nr_snapshots.patch
docs-admin-guide-mm-damon-usage-update-for-max_nr_snapshots.patch
docs-abi-damon-update-for-max_nr_snapshots.patch
mm-damon-core-add-trace-point-for-damos-stat-per-apply-interval.patch
mm-damon-sysfs-cleanup-intervals-subdirs-on-attrs-dir-setup-failure.patch
mm-damon-sysfs-cleanup-attrs-subdirs-on-context-dir-setup-failure.patch
mm-damon-sysfs-scheme-cleanup-quotas-subdirs-on-scheme-dir-setup-failure.patch
mm-damon-sysfs-scheme-cleanup-access_pattern-subdirs-on-scheme-dir-setup-failure.patch
mm-damon-tests-core-kunit-add-test-cases-for-multiple-regions-in-damon_test_split_regions_of-fix.patch
The patch titled
Subject: x86/kexec: add a sanity check on previous kernel's ima kexec buffer
has been added to the -mm mm-nonmm-unstable branch. Its filename is
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: x86/kexec: add a sanity check on previous kernel's ima kexec buffer
Date: Tue, 30 Dec 2025 22:16:09 -0800
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>", the physical range that contains the carried
over IMA measurement list may fall outside the truncated RAM leading to a
kernel panic.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) ��� not-present page
Other architectures already validate the range with page_is_ram(), as done
in commit cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds") do a similar check on x86.
Without carrying the measurement list across kexec, the attestation
would fail.
Link: https://lkml.kernel.org/r/20251231061609.907170-4-harshit.m.mogalapalli@ora…
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Reported-by: Paul Webb <paul.x.webb(a)oracle.com>
Reviewed-by: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Jonathan McDowell <noodles(a)fb.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/kernel/setup.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/arch/x86/kernel/setup.c~x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer
+++ a/arch/x86/kernel/setup.c
@@ -439,9 +439,15 @@ int __init ima_free_kexec_buffer(void)
int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
+ int ret;
+
if (!ima_kexec_buffer_size)
return -ENOENT;
+ ret = ima_validate_range(ima_kexec_buffer_phys, ima_kexec_buffer_size);
+ if (ret)
+ return ret;
+
*addr = __va(ima_kexec_buffer_phys);
*size = ima_kexec_buffer_size;
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
ima-verify-the-previous-kernels-ima-buffer-lies-in-addressable-ram.patch
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
The patch titled
Subject: of/kexec: refactor ima_get_kexec_buffer() to use ima_validate_range()
has been added to the -mm mm-nonmm-unstable branch. Its filename is
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: of/kexec: refactor ima_get_kexec_buffer() to use ima_validate_range()
Date: Tue, 30 Dec 2025 22:16:08 -0800
Refactor the OF/DT ima_get_kexec_buffer() to use a generic helper to
validate the address range. No functional change intended.
Link: https://lkml.kernel.org/r/20251231061609.907170-3-harshit.m.mogalapalli@ora…
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Reviewed-by: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Jonathan McDowell <noodles(a)fb.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Paul Webb <paul.x.webb(a)oracle.com>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/of/kexec.c | 15 +++------------
1 file changed, 3 insertions(+), 12 deletions(-)
--- a/drivers/of/kexec.c~of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range
+++ a/drivers/of/kexec.c
@@ -128,7 +128,6 @@ int __init ima_get_kexec_buffer(void **a
{
int ret, len;
unsigned long tmp_addr;
- unsigned long start_pfn, end_pfn;
size_t tmp_size;
const void *prop;
@@ -144,17 +143,9 @@ int __init ima_get_kexec_buffer(void **a
if (!tmp_size)
return -ENOENT;
- /*
- * Calculate the PFNs for the buffer and ensure
- * they are with in addressable memory.
- */
- start_pfn = PHYS_PFN(tmp_addr);
- end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1);
- if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) {
- pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n",
- tmp_addr, tmp_size);
- return -EINVAL;
- }
+ ret = ima_validate_range(tmp_addr, tmp_size);
+ if (ret)
+ return ret;
*addr = __va(tmp_addr);
*size = tmp_size;
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
ima-verify-the-previous-kernels-ima-buffer-lies-in-addressable-ram.patch
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
The patch titled
Subject: ima: verify the previous kernel's IMA buffer lies in addressable RAM
has been added to the -mm mm-nonmm-unstable branch. Its filename is
ima-verify-the-previous-kernels-ima-buffer-lies-in-addressable-ram.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: ima: verify the previous kernel's IMA buffer lies in addressable RAM
Date: Tue, 30 Dec 2025 22:16:07 -0800
Patch series "Address page fault in ima_restore_measurement_list()", v3.
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>" we observe a pafe fault that happens.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) not-present page
This happens on x86_64 only, as this is already fixed in aarch64 in
commit: cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds")
This patch (of 3):
When the second-stage kernel is booted with a limiting command line (e.g.
"mem=<size>"), the IMA measurement buffer handed over from the previous
kernel may fall outside the addressable RAM of the new kernel. Accessing
such a buffer can fault during early restore.
Introduce a small generic helper, ima_validate_range(), which verifies
that a physical [start, end] range for the previous-kernel IMA buffer lies
within addressable memory:
- On x86, use pfn_range_is_mapped().
- On OF based architectures, use page_is_ram().
Link: https://lkml.kernel.org/r/20251231061609.907170-1-harshit.m.mogalapalli@ora…
Link: https://lkml.kernel.org/r/20251231061609.907170-2-harshit.m.mogalapalli@ora…
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Jonathan McDowell <noodles(a)fb.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Paul Webb <paul.x.webb(a)oracle.com>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/ima.h | 1
security/integrity/ima/ima_kexec.c | 35 +++++++++++++++++++++++++++
2 files changed, 36 insertions(+)
--- a/include/linux/ima.h~ima-verify-the-previous-kernels-ima-buffer-lies-in-addressable-ram
+++ a/include/linux/ima.h
@@ -69,6 +69,7 @@ static inline int ima_measure_critical_d
#ifdef CONFIG_HAVE_IMA_KEXEC
int __init ima_free_kexec_buffer(void);
int __init ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_validate_range(phys_addr_t phys, size_t size);
#endif
#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
--- a/security/integrity/ima/ima_kexec.c~ima-verify-the-previous-kernels-ima-buffer-lies-in-addressable-ram
+++ a/security/integrity/ima/ima_kexec.c
@@ -12,6 +12,8 @@
#include <linux/kexec.h>
#include <linux/of.h>
#include <linux/ima.h>
+#include <linux/mm.h>
+#include <linux/overflow.h>
#include <linux/reboot.h>
#include <asm/page.h>
#include "ima.h"
@@ -294,3 +296,36 @@ void __init ima_load_kexec_buffer(void)
pr_debug("Error restoring the measurement list: %d\n", rc);
}
}
+
+/*
+ * ima_validate_range - verify a physical buffer lies in addressable RAM
+ * @phys: physical start address of the buffer from previous kernel
+ * @size: size of the buffer
+ *
+ * On success return 0. On failure returns -EINVAL so callers can skip
+ * restoring.
+ */
+int ima_validate_range(phys_addr_t phys, size_t size)
+{
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t end_phys;
+
+ if (check_add_overflow(phys, (phys_addr_t)size - 1, &end_phys))
+ return -EINVAL;
+
+ start_pfn = PHYS_PFN(phys);
+ end_pfn = PHYS_PFN(end_phys);
+
+#ifdef CONFIG_X86
+ if (!pfn_range_is_mapped(start_pfn, end_pfn))
+#else
+ if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn))
+#endif
+ {
+ pr_warn("IMA: previous kernel measurement buffer %pa (size 0x%zx) lies outside available memory\n",
+ &phys, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
ima-verify-the-previous-kernels-ima-buffer-lies-in-addressable-ram.patch
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x bba4322e3f303b2d656e748be758320b567f046f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122927-untapped-stimulate-e26d@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bba4322e3f303b2d656e748be758320b567f046f Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal(a)kernel.org>
Date: Wed, 5 Nov 2025 06:22:36 +0900
Subject: [PATCH] block: freeze queue when updating zone resources
Modify disk_update_zone_resources() to freeze the device queue before
updating the number of zones, zone capacity and other zone related
resources. The locking order resulting from the call to
queue_limits_commit_update_frozen() is preserved, that is, the queue
limits lock is first taken by calling queue_limits_start_update() before
freezing the queue, and the queue is unfrozen after executing
queue_limits_commit_update(), which replaces the call to
queue_limits_commit_update_frozen().
This change ensures that there are no in-flights I/Os when the zone
resources are updated due to a zone revalidation. In case of error when
the limits are applied, directly call disk_free_zone_resources() from
disk_update_zone_resources() while the disk queue is still frozen to
avoid needing to freeze & unfreeze the queue again in
blk_revalidate_disk_zones(), thus simplifying that function code a
little.
Fixes: 0b83c86b444a ("block: Prevent potential deadlock in blk_revalidate_disk_zones()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch(a)nvidia.com>
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 1621e8f78338..39381f2b2e94 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -1557,8 +1557,13 @@ static int disk_update_zone_resources(struct gendisk *disk,
{
struct request_queue *q = disk->queue;
unsigned int nr_seq_zones, nr_conv_zones;
- unsigned int pool_size;
+ unsigned int pool_size, memflags;
struct queue_limits lim;
+ int ret = 0;
+
+ lim = queue_limits_start_update(q);
+
+ memflags = blk_mq_freeze_queue(q);
disk->nr_zones = args->nr_zones;
disk->zone_capacity = args->zone_capacity;
@@ -1568,11 +1573,10 @@ static int disk_update_zone_resources(struct gendisk *disk,
if (nr_conv_zones >= disk->nr_zones) {
pr_warn("%s: Invalid number of conventional zones %u / %u\n",
disk->disk_name, nr_conv_zones, disk->nr_zones);
- return -ENODEV;
+ ret = -ENODEV;
+ goto unfreeze;
}
- lim = queue_limits_start_update(q);
-
/*
* Some devices can advertize zone resource limits that are larger than
* the number of sequential zones of the zoned block device, e.g. a
@@ -1609,7 +1613,15 @@ static int disk_update_zone_resources(struct gendisk *disk,
}
commit:
- return queue_limits_commit_update_frozen(q, &lim);
+ ret = queue_limits_commit_update(q, &lim);
+
+unfreeze:
+ if (ret)
+ disk_free_zone_resources(disk);
+
+ blk_mq_unfreeze_queue(q, memflags);
+
+ return ret;
}
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
@@ -1774,7 +1786,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
sector_t zone_sectors = q->limits.chunk_sectors;
sector_t capacity = get_capacity(disk);
struct blk_revalidate_zone_args args = { };
- unsigned int noio_flag;
+ unsigned int memflags, noio_flag;
int ret = -ENOMEM;
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
@@ -1824,20 +1836,14 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
ret = -ENODEV;
}
- /*
- * Set the new disk zone parameters only once the queue is frozen and
- * all I/Os are completed.
- */
if (ret > 0)
- ret = disk_update_zone_resources(disk, &args);
- else
- pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
- if (ret) {
- unsigned int memflags = blk_mq_freeze_queue(q);
+ return disk_update_zone_resources(disk, &args);
- disk_free_zone_resources(disk);
- blk_mq_unfreeze_queue(q, memflags);
- }
+ pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
+
+ memflags = blk_mq_freeze_queue(q);
+ disk_free_zone_resources(disk);
+ blk_mq_unfreeze_queue(q, memflags);
return ret;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x de83d4617f9fe059623e97acf7e1e10d209625b5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122941-reluctant-exhale-a49f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From de83d4617f9fe059623e97acf7e1e10d209625b5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Mon, 20 Oct 2025 06:53:10 +0200
Subject: [PATCH] iommu/mediatek: fix use-after-free on probe deferral
The driver is dropping the references taken to the larb devices during
probe after successful lookup as well as on errors. This can
potentially lead to a use-after-free in case a larb device has not yet
been bound to its driver so that the iommu driver probe defers.
Fix this by keeping the references as expected while the iommu driver is
bound.
Fixes: 26593928564c ("iommu/mediatek: Add error path for loop of mm_dts_parse")
Cc: stable(a)vger.kernel.org
Cc: Yong Wu <yong.wu(a)mediatek.com>
Acked-by: Robin Murphy <robin.murphy(a)arm.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Reviewed-by: Yong Wu <yong.wu(a)mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Signed-off-by: Joerg Roedel <joerg.roedel(a)amd.com>
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 82a55fe19a62..54d8936d9d11 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1213,16 +1213,19 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
}
component_match_add(dev, match, component_compare_dev, &plarbdev->dev);
- platform_device_put(plarbdev);
}
- if (!frst_avail_smicomm_node)
- return -EINVAL;
+ if (!frst_avail_smicomm_node) {
+ ret = -EINVAL;
+ goto err_larbdev_put;
+ }
pcommdev = of_find_device_by_node(frst_avail_smicomm_node);
of_node_put(frst_avail_smicomm_node);
- if (!pcommdev)
- return -ENODEV;
+ if (!pcommdev) {
+ ret = -ENODEV;
+ goto err_larbdev_put;
+ }
data->smicomm_dev = &pcommdev->dev;
link = device_link_add(data->smicomm_dev, dev,
@@ -1230,7 +1233,8 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
platform_device_put(pcommdev);
if (!link) {
dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev));
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_larbdev_put;
}
return 0;
@@ -1402,8 +1406,12 @@ static int mtk_iommu_probe(struct platform_device *pdev)
iommu_device_sysfs_remove(&data->iommu);
out_list_del:
list_del(&data->list);
- if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
+ if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
device_link_remove(data->smicomm_dev, dev);
+
+ for (i = 0; i < MTK_LARB_NR_MAX; i++)
+ put_device(data->larb_imu[i].dev);
+ }
out_runtime_disable:
pm_runtime_disable(dev);
return ret;
@@ -1423,6 +1431,9 @@ static void mtk_iommu_remove(struct platform_device *pdev)
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
device_link_remove(data->smicomm_dev, &pdev->dev);
component_master_del(&pdev->dev, &mtk_iommu_com_ops);
+
+ for (i = 0; i < MTK_LARB_NR_MAX; i++)
+ put_device(data->larb_imu[i].dev);
}
pm_runtime_disable(&pdev->dev);
for (i = 0; i < data->plat_data->banks_num; i++) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122900-ripple-expert-4378@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Date: Fri, 14 Nov 2025 11:33:13 +0100
Subject: [PATCH] ARM: dts: microchip: sama7g5: fix uart fifo size to 32
On some flexcom nodes related to uart, the fifo sizes were wrong: fix
them to 32 data.
Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek")
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Link: https://lore.kernel.org/r/20251114103313.20220-2-nicolas.ferre@microchip.com
Signed-off-by: Claudiu Beznea <claudiu.beznea(a)tuxon.dev>
diff --git a/arch/arm/boot/dts/microchip/sama7g5.dtsi b/arch/arm/boot/dts/microchip/sama7g5.dtsi
index 381cbcfcb34a..03ef3d9aaeec 100644
--- a/arch/arm/boot/dts/microchip/sama7g5.dtsi
+++ b/arch/arm/boot/dts/microchip/sama7g5.dtsi
@@ -824,7 +824,7 @@ uart4: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
@@ -850,7 +850,7 @@ uart7: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d1bea0ce35b6095544ee82bb54156fc62c067e58
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122934-smartness-abrasive-06be@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1bea0ce35b6095544ee82bb54156fc62c067e58 Mon Sep 17 00:00:00 2001
From: Joshua Rogers <linux(a)joshua.hu>
Date: Fri, 7 Nov 2025 10:09:49 -0500
Subject: [PATCH] svcrdma: bound check rq_pages index in inline path
svc_rdma_copy_inline_range indexed rqstp->rq_pages[rc_curpage] without
verifying rc_curpage stays within the allocated page array. Add guards
before the first use and after advancing to a new page.
Fixes: d7cc73972661 ("svcrdma: support multiple Read chunks per RPC")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Rogers <linux(a)joshua.hu>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index e813e5463352..310de7a80be5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -841,6 +841,9 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
+ if (head->rc_curpage >= rqstp->rq_maxpages)
+ return -EINVAL;
+
page_len = min_t(unsigned int, remaining,
PAGE_SIZE - head->rc_pageoff);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x d1bea0ce35b6095544ee82bb54156fc62c067e58
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122918-oversight-jolliness-46c2@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1bea0ce35b6095544ee82bb54156fc62c067e58 Mon Sep 17 00:00:00 2001
From: Joshua Rogers <linux(a)joshua.hu>
Date: Fri, 7 Nov 2025 10:09:49 -0500
Subject: [PATCH] svcrdma: bound check rq_pages index in inline path
svc_rdma_copy_inline_range indexed rqstp->rq_pages[rc_curpage] without
verifying rc_curpage stays within the allocated page array. Add guards
before the first use and after advancing to a new page.
Fixes: d7cc73972661 ("svcrdma: support multiple Read chunks per RPC")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Rogers <linux(a)joshua.hu>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index e813e5463352..310de7a80be5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -841,6 +841,9 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
+ if (head->rc_curpage >= rqstp->rq_maxpages)
+ return -EINVAL;
+
page_len = min_t(unsigned int, remaining,
PAGE_SIZE - head->rc_pageoff);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122959-siding-astound-c864@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Date: Fri, 14 Nov 2025 11:33:13 +0100
Subject: [PATCH] ARM: dts: microchip: sama7g5: fix uart fifo size to 32
On some flexcom nodes related to uart, the fifo sizes were wrong: fix
them to 32 data.
Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek")
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Link: https://lore.kernel.org/r/20251114103313.20220-2-nicolas.ferre@microchip.com
Signed-off-by: Claudiu Beznea <claudiu.beznea(a)tuxon.dev>
diff --git a/arch/arm/boot/dts/microchip/sama7g5.dtsi b/arch/arm/boot/dts/microchip/sama7g5.dtsi
index 381cbcfcb34a..03ef3d9aaeec 100644
--- a/arch/arm/boot/dts/microchip/sama7g5.dtsi
+++ b/arch/arm/boot/dts/microchip/sama7g5.dtsi
@@ -824,7 +824,7 @@ uart4: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
@@ -850,7 +850,7 @@ uart7: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122959-rumor-work-1d89@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Date: Fri, 14 Nov 2025 11:33:13 +0100
Subject: [PATCH] ARM: dts: microchip: sama7g5: fix uart fifo size to 32
On some flexcom nodes related to uart, the fifo sizes were wrong: fix
them to 32 data.
Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek")
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Link: https://lore.kernel.org/r/20251114103313.20220-2-nicolas.ferre@microchip.com
Signed-off-by: Claudiu Beznea <claudiu.beznea(a)tuxon.dev>
diff --git a/arch/arm/boot/dts/microchip/sama7g5.dtsi b/arch/arm/boot/dts/microchip/sama7g5.dtsi
index 381cbcfcb34a..03ef3d9aaeec 100644
--- a/arch/arm/boot/dts/microchip/sama7g5.dtsi
+++ b/arch/arm/boot/dts/microchip/sama7g5.dtsi
@@ -824,7 +824,7 @@ uart4: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
@@ -850,7 +850,7 @@ uart7: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122958-dingbat-canary-d44c@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5654889a94b0de5ad6ceae3793e7f5e0b61b50b6 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Date: Fri, 14 Nov 2025 11:33:13 +0100
Subject: [PATCH] ARM: dts: microchip: sama7g5: fix uart fifo size to 32
On some flexcom nodes related to uart, the fifo sizes were wrong: fix
them to 32 data.
Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek")
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Link: https://lore.kernel.org/r/20251114103313.20220-2-nicolas.ferre@microchip.com
Signed-off-by: Claudiu Beznea <claudiu.beznea(a)tuxon.dev>
diff --git a/arch/arm/boot/dts/microchip/sama7g5.dtsi b/arch/arm/boot/dts/microchip/sama7g5.dtsi
index 381cbcfcb34a..03ef3d9aaeec 100644
--- a/arch/arm/boot/dts/microchip/sama7g5.dtsi
+++ b/arch/arm/boot/dts/microchip/sama7g5.dtsi
@@ -824,7 +824,7 @@ uart4: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
@@ -850,7 +850,7 @@ uart7: serial@200 {
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
- atmel,fifo-size = <16>;
+ atmel,fifo-size = <32>;
status = "disabled";
};
};
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6f6e309328d53a10c0fe1f77dec2db73373179b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122930-sinner-squad-c6cd@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6f6e309328d53a10c0fe1f77dec2db73373179b6 Mon Sep 17 00:00:00 2001
From: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
Date: Tue, 23 Sep 2025 23:01:48 -0700
Subject: [PATCH] crypto: af_alg - zero initialize memory allocated via
sock_kmalloc
Several crypto user API contexts and requests allocated with
sock_kmalloc() were left uninitialized, relying on callers to
set fields explicitly. This resulted in the use of uninitialized
data in certain error paths or when new fields are added in the
future.
The ACVP patches also contain two user-space interface files:
algif_kpp.c and algif_akcipher.c. These too rely on proper
initialization of their context structures.
A particular issue has been observed with the newly added
'inflight' variable introduced in af_alg_ctx by commit:
67b164a871af ("crypto: af_alg - Disallow multiple in-flight AIO requests")
Because the context is not memset to zero after allocation,
the inflight variable has contained garbage values. As a result,
af_alg_alloc_areq() has incorrectly returned -EBUSY randomly when
the garbage value was interpreted as true:
https://github.com/gregkh/linux/blame/master/crypto/af_alg.c#L1209
The check directly tests ctx->inflight without explicitly
comparing against true/false. Since inflight is only ever set to
true or false later, an uninitialized value has triggered
-EBUSY failures. Zero-initializing memory allocated with
sock_kmalloc() ensures inflight and other fields start in a known
state, removing random issues caused by uninitialized data.
Fixes: fe869cdb89c9 ("crypto: algif_hash - User-space interface for hash operations")
Fixes: 5afdfd22e6ba ("crypto: algif_rng - add random number generator support")
Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
Fixes: 67b164a871af ("crypto: af_alg - Disallow multiple in-flight AIO requests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ca6fdcc6c54a..6c271e55f44d 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1212,15 +1212,14 @@ struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
if (unlikely(!areq))
return ERR_PTR(-ENOMEM);
+ memset(areq, 0, areqlen);
+
ctx->inflight = true;
areq->areqlen = areqlen;
areq->sk = sk;
areq->first_rsgl.sgl.sgt.sgl = areq->first_rsgl.sgl.sgl;
- areq->last_rsgl = NULL;
INIT_LIST_HEAD(&areq->rsgl_list);
- areq->tsgl = NULL;
- areq->tsgl_entries = 0;
return areq;
}
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index e3f1a4852737..4d3dfc60a16a 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -416,9 +416,8 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
if (!ctx)
return -ENOMEM;
- ctx->result = NULL;
+ memset(ctx, 0, len);
ctx->len = len;
- ctx->more = false;
crypto_init_wait(&ctx->wait);
ask->private = ctx;
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 10c41adac3b1..1a86e40c8372 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -248,9 +248,8 @@ static int rng_accept_parent(void *private, struct sock *sk)
if (!ctx)
return -ENOMEM;
+ memset(ctx, 0, len);
ctx->len = len;
- ctx->addtl = NULL;
- ctx->addtl_len = 0;
/*
* No seeding done at that point -- if multiple accepts are
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b8d5acdcf525f44e521ca4ef51dce4dac403dab4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122905-unstable-smuggling-c1a3@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8d5acdcf525f44e521ca4ef51dce4dac403dab4 Mon Sep 17 00:00:00 2001
From: Gui-Dong Han <hanguidong02(a)gmail.com>
Date: Fri, 28 Nov 2025 20:47:09 +0800
Subject: [PATCH] hwmon: (max16065) Use local variable to avoid TOCTOU
In max16065_current_show, data->curr_sense is read twice: once for the
error check and again for the calculation. Since
i2c_smbus_read_byte_data returns negative error codes on failure, if the
data changes to an error code between the check and the use, ADC_TO_CURR
results in an incorrect calculation.
Read data->curr_sense into a local variable to ensure consistency. Note
that data->curr_gain is constant and safe to access directly.
This aligns max16065_current_show with max16065_input_show, which
already uses a local variable for the same reason.
Link: https://lore.kernel.org/all/CALbr=LYJ_ehtp53HXEVkSpYoub+XYSTU8Rg=o1xxMJ8=5z…
Fixes: f5bae2642e3d ("hwmon: Driver for MAX16065 System Manager and compatibles")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
Link: https://lore.kernel.org/r/20251128124709.3876-1-hanguidong02@gmail.com
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c
index 0ccb5eb596fc..4c9e7892a73c 100644
--- a/drivers/hwmon/max16065.c
+++ b/drivers/hwmon/max16065.c
@@ -216,12 +216,13 @@ static ssize_t max16065_current_show(struct device *dev,
struct device_attribute *da, char *buf)
{
struct max16065_data *data = max16065_update_device(dev);
+ int curr_sense = data->curr_sense;
- if (unlikely(data->curr_sense < 0))
- return data->curr_sense;
+ if (unlikely(curr_sense < 0))
+ return curr_sense;
return sysfs_emit(buf, "%d\n",
- ADC_TO_CURR(data->curr_sense, data->curr_gain));
+ ADC_TO_CURR(curr_sense, data->curr_gain));
}
static ssize_t max16065_limit_store(struct device *dev,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6f6e309328d53a10c0fe1f77dec2db73373179b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122929-grid-certify-c610@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6f6e309328d53a10c0fe1f77dec2db73373179b6 Mon Sep 17 00:00:00 2001
From: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
Date: Tue, 23 Sep 2025 23:01:48 -0700
Subject: [PATCH] crypto: af_alg - zero initialize memory allocated via
sock_kmalloc
Several crypto user API contexts and requests allocated with
sock_kmalloc() were left uninitialized, relying on callers to
set fields explicitly. This resulted in the use of uninitialized
data in certain error paths or when new fields are added in the
future.
The ACVP patches also contain two user-space interface files:
algif_kpp.c and algif_akcipher.c. These too rely on proper
initialization of their context structures.
A particular issue has been observed with the newly added
'inflight' variable introduced in af_alg_ctx by commit:
67b164a871af ("crypto: af_alg - Disallow multiple in-flight AIO requests")
Because the context is not memset to zero after allocation,
the inflight variable has contained garbage values. As a result,
af_alg_alloc_areq() has incorrectly returned -EBUSY randomly when
the garbage value was interpreted as true:
https://github.com/gregkh/linux/blame/master/crypto/af_alg.c#L1209
The check directly tests ctx->inflight without explicitly
comparing against true/false. Since inflight is only ever set to
true or false later, an uninitialized value has triggered
-EBUSY failures. Zero-initializing memory allocated with
sock_kmalloc() ensures inflight and other fields start in a known
state, removing random issues caused by uninitialized data.
Fixes: fe869cdb89c9 ("crypto: algif_hash - User-space interface for hash operations")
Fixes: 5afdfd22e6ba ("crypto: algif_rng - add random number generator support")
Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
Fixes: 67b164a871af ("crypto: af_alg - Disallow multiple in-flight AIO requests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ca6fdcc6c54a..6c271e55f44d 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1212,15 +1212,14 @@ struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
if (unlikely(!areq))
return ERR_PTR(-ENOMEM);
+ memset(areq, 0, areqlen);
+
ctx->inflight = true;
areq->areqlen = areqlen;
areq->sk = sk;
areq->first_rsgl.sgl.sgt.sgl = areq->first_rsgl.sgl.sgl;
- areq->last_rsgl = NULL;
INIT_LIST_HEAD(&areq->rsgl_list);
- areq->tsgl = NULL;
- areq->tsgl_entries = 0;
return areq;
}
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index e3f1a4852737..4d3dfc60a16a 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -416,9 +416,8 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
if (!ctx)
return -ENOMEM;
- ctx->result = NULL;
+ memset(ctx, 0, len);
ctx->len = len;
- ctx->more = false;
crypto_init_wait(&ctx->wait);
ask->private = ctx;
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 10c41adac3b1..1a86e40c8372 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -248,9 +248,8 @@ static int rng_accept_parent(void *private, struct sock *sk)
if (!ctx)
return -ENOMEM;
+ memset(ctx, 0, len);
ctx->len = len;
- ctx->addtl = NULL;
- ctx->addtl_len = 0;
/*
* No seeding done at that point -- if multiple accepts are
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6f6e309328d53a10c0fe1f77dec2db73373179b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122928-monotone-prodigy-ba4d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6f6e309328d53a10c0fe1f77dec2db73373179b6 Mon Sep 17 00:00:00 2001
From: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
Date: Tue, 23 Sep 2025 23:01:48 -0700
Subject: [PATCH] crypto: af_alg - zero initialize memory allocated via
sock_kmalloc
Several crypto user API contexts and requests allocated with
sock_kmalloc() were left uninitialized, relying on callers to
set fields explicitly. This resulted in the use of uninitialized
data in certain error paths or when new fields are added in the
future.
The ACVP patches also contain two user-space interface files:
algif_kpp.c and algif_akcipher.c. These too rely on proper
initialization of their context structures.
A particular issue has been observed with the newly added
'inflight' variable introduced in af_alg_ctx by commit:
67b164a871af ("crypto: af_alg - Disallow multiple in-flight AIO requests")
Because the context is not memset to zero after allocation,
the inflight variable has contained garbage values. As a result,
af_alg_alloc_areq() has incorrectly returned -EBUSY randomly when
the garbage value was interpreted as true:
https://github.com/gregkh/linux/blame/master/crypto/af_alg.c#L1209
The check directly tests ctx->inflight without explicitly
comparing against true/false. Since inflight is only ever set to
true or false later, an uninitialized value has triggered
-EBUSY failures. Zero-initializing memory allocated with
sock_kmalloc() ensures inflight and other fields start in a known
state, removing random issues caused by uninitialized data.
Fixes: fe869cdb89c9 ("crypto: algif_hash - User-space interface for hash operations")
Fixes: 5afdfd22e6ba ("crypto: algif_rng - add random number generator support")
Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
Fixes: 67b164a871af ("crypto: af_alg - Disallow multiple in-flight AIO requests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ca6fdcc6c54a..6c271e55f44d 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1212,15 +1212,14 @@ struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
if (unlikely(!areq))
return ERR_PTR(-ENOMEM);
+ memset(areq, 0, areqlen);
+
ctx->inflight = true;
areq->areqlen = areqlen;
areq->sk = sk;
areq->first_rsgl.sgl.sgt.sgl = areq->first_rsgl.sgl.sgl;
- areq->last_rsgl = NULL;
INIT_LIST_HEAD(&areq->rsgl_list);
- areq->tsgl = NULL;
- areq->tsgl_entries = 0;
return areq;
}
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index e3f1a4852737..4d3dfc60a16a 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -416,9 +416,8 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
if (!ctx)
return -ENOMEM;
- ctx->result = NULL;
+ memset(ctx, 0, len);
ctx->len = len;
- ctx->more = false;
crypto_init_wait(&ctx->wait);
ask->private = ctx;
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 10c41adac3b1..1a86e40c8372 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -248,9 +248,8 @@ static int rng_accept_parent(void *private, struct sock *sk)
if (!ctx)
return -ENOMEM;
+ memset(ctx, 0, len);
ctx->len = len;
- ctx->addtl = NULL;
- ctx->addtl_len = 0;
/*
* No seeding done at that point -- if multiple accepts are
Commit ef0ff68351be ("driver core: Probe devices asynchronously instead of
the driver") speeds up the loading of large numbers of device drivers by
submitting asynchronous probe workers to an unbounded workqueue and binding
each worker to the CPU near the device’s NUMA node. These workers are not
scheduled on isolated CPUs because their cpumask is restricted to
housekeeping_cpumask(HK_TYPE_WQ) and housekeeping_cpumask(HK_TYPE_DOMAIN).
However, when PCI devices reside on the same NUMA node, all their
drivers’ probe workers are bound to the same CPU within that node, yet
the probes still run in parallel because pci_call_probe() invokes
work_on_cpu(). Introduced by commit 873392ca514f ("PCI: work_on_cpu: use
in drivers/pci/pci-driver.c"), work_on_cpu() queues a worker on
system_percpu_wq to bind the probe thread to the first CPU in the
device’s NUMA node (chosen via cpumask_any_and() in pci_call_probe()).
1. The function __driver_attach() submits an asynchronous worker with
callback __driver_attach_async_helper().
__driver_attach()
async_schedule_dev(__driver_attach_async_helper, dev)
async_schedule_node(func, dev, dev_to_node(dev))
async_schedule_node_domain(func, data, node, &async_dfl_domain)
__async_schedule_node_domain(func, data, node, domain, entry)
queue_work_node(node, async_wq, &entry->work)
2. The asynchronous probe worker ultimately calls work_on_cpu() in
pci_call_probe(), binding the worker to the same CPU within the
device’s NUMA node.
__driver_attach_async_helper()
driver_probe_device(drv, dev)
__driver_probe_device(drv, dev)
really_probe(dev, drv)
call_driver_probe(dev, drv)
dev->bus->probe(dev)
pci_device_probe(dev)
__pci_device_probe(drv, pci_dev)
pci_call_probe(drv, pci_dev, id)
cpu = cpumask_any_and(cpumask_of_node(node), wq_domain_mask)
error = work_on_cpu(cpu, local_pci_probe, &ddi)
schedule_work_on(cpu, &wfc.work);
queue_work_on(cpu, system_percpu_wq, work)
To fix the issue, pci_call_probe() must not call work_on_cpu() when it is
already running inside an unbounded asynchronous worker. Because a driver
can be probed asynchronously either by probe_type or by the kernel command
line, we cannot rely on PROBE_PREFER_ASYNCHRONOUS alone. Instead, we test
the PF_WQ_WORKER flag in current->flags; if it is set, pci_call_probe() is
executing within an unbounded workqueue worker and should skip the extra
work_on_cpu() call.
Testing three NVMe devices on the same NUMA node of an AMD EPYC 9A64
2.4 GHz processor shows a 35 % probe-time improvement with the patch:
Before (all on CPU 0):
nvme 0000:01:00.0: CPU: 0, COMM: kworker/0:1, probe cost: 53372612 ns
nvme 0000:02:00.0: CPU: 0, COMM: kworker/0:2, probe cost: 49532941 ns
nvme 0000:03:00.0: CPU: 0, COMM: kworker/0:3, probe cost: 47315175 ns
After (spread across CPUs 1, 2, 5):
nvme 0000:01:00.0: CPU: 5, COMM: kworker/u1025:5, probe cost: 34765890 ns
nvme 0000:02:00.0: CPU: 1, COMM: kworker/u1025:2, probe cost: 34696433 ns
nvme 0000:03:00.0: CPU: 2, COMM: kworker/u1025:3, probe cost: 33233323 ns
The improvement grows with more PCI devices because fewer probes contend
for the same CPU.
Fixes: ef0ff68351be ("driver core: Probe devices asynchronously instead of the driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jinhui Guo <guojinhui.liam(a)bytedance.com>
---
drivers/pci/pci-driver.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7c2d9d596258..4bc47a84d330 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -366,9 +366,11 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
/*
* Prevent nesting work_on_cpu() for the case where a Virtual Function
* device is probed from work_on_cpu() of the Physical device.
+ * Check PF_WQ_WORKER to prevent invoking work_on_cpu() in an asynchronous
+ * probe worker when the driver allows asynchronous probing.
*/
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
- pci_physfn_is_probed(dev)) {
+ pci_physfn_is_probed(dev) || (current->flags & PF_WQ_WORKER)) {
cpu = nr_cpu_ids;
} else {
cpumask_var_t wq_domain_mask;
--
2.20.1
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 83cbb4d33dc22b0ca1a4e85c6e892c9b729e28d4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122954-stony-herring-2347@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 83cbb4d33dc22b0ca1a4e85c6e892c9b729e28d4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Tue, 28 Oct 2025 22:07:27 +0200
Subject: [PATCH] drm/displayid: add quirk to ignore DisplayID checksum errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add a mechanism for DisplayID specific quirks, and add the first quirk
to ignore DisplayID section checksum errors.
It would be quite inconvenient to pass existing EDID quirks from
drm_edid.c for DisplayID parsing. Not all places doing DisplayID
iteration have the quirks readily available, and would have to pass it
in all places. Simply add a separate array of DisplayID specific EDID
quirks. We do end up checking it every time we iterate DisplayID blocks,
but hopefully the number of quirks remains small.
There are a few laptop models with DisplayID checksum failures, leading
to higher refresh rates only present in the DisplayID blocks being
ignored. Add a quirk for the panel in the machines.
Reported-by: Tiago Martins Araújo <tiago.martins.araujo(a)gmail.com>
Closes: https://lore.kernel.org/r/CACRbrPGvLP5LANXuFi6z0S7XMbAG4X5y2YOLBDxfOVtfGGqi…
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14703
Acked-by: Alex Deucher <alexander.deucher(a)amd.com>
Tested-by: Tiago Martins Araújo <tiago.martins.araujo(a)gmail.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/c04d81ae648c5f21b3f5b7953f924718051f2798.176168196…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/drm_displayid.c b/drivers/gpu/drm/drm_displayid.c
index 20b453d2b854..58d0bb6d2676 100644
--- a/drivers/gpu/drm/drm_displayid.c
+++ b/drivers/gpu/drm/drm_displayid.c
@@ -9,6 +9,34 @@
#include "drm_crtc_internal.h"
#include "drm_displayid_internal.h"
+enum {
+ QUIRK_IGNORE_CHECKSUM,
+};
+
+struct displayid_quirk {
+ const struct drm_edid_ident ident;
+ u8 quirks;
+};
+
+static const struct displayid_quirk quirks[] = {
+ {
+ .ident = DRM_EDID_IDENT_INIT('C', 'S', 'O', 5142, "MNE007ZA1-5"),
+ .quirks = BIT(QUIRK_IGNORE_CHECKSUM),
+ },
+};
+
+static u8 get_quirks(const struct drm_edid *drm_edid)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(quirks); i++) {
+ if (drm_edid_match(drm_edid, &quirks[i].ident))
+ return quirks[i].quirks;
+ }
+
+ return 0;
+}
+
static const struct displayid_header *
displayid_get_header(const u8 *displayid, int length, int index)
{
@@ -23,7 +51,7 @@ displayid_get_header(const u8 *displayid, int length, int index)
}
static const struct displayid_header *
-validate_displayid(const u8 *displayid, int length, int idx)
+validate_displayid(const u8 *displayid, int length, int idx, bool ignore_checksum)
{
int i, dispid_length;
u8 csum = 0;
@@ -41,8 +69,11 @@ validate_displayid(const u8 *displayid, int length, int idx)
for (i = 0; i < dispid_length; i++)
csum += displayid[idx + i];
if (csum) {
- DRM_NOTE("DisplayID checksum invalid, remainder is %d\n", csum);
- return ERR_PTR(-EINVAL);
+ DRM_NOTE("DisplayID checksum invalid, remainder is %d%s\n", csum,
+ ignore_checksum ? " (ignoring)" : "");
+
+ if (!ignore_checksum)
+ return ERR_PTR(-EINVAL);
}
return base;
@@ -52,6 +83,7 @@ static const u8 *find_next_displayid_extension(struct displayid_iter *iter)
{
const struct displayid_header *base;
const u8 *displayid;
+ bool ignore_checksum = iter->quirks & BIT(QUIRK_IGNORE_CHECKSUM);
displayid = drm_edid_find_extension(iter->drm_edid, DISPLAYID_EXT, &iter->ext_index);
if (!displayid)
@@ -61,7 +93,7 @@ static const u8 *find_next_displayid_extension(struct displayid_iter *iter)
iter->length = EDID_LENGTH - 1;
iter->idx = 1;
- base = validate_displayid(displayid, iter->length, iter->idx);
+ base = validate_displayid(displayid, iter->length, iter->idx, ignore_checksum);
if (IS_ERR(base))
return NULL;
@@ -76,6 +108,7 @@ void displayid_iter_edid_begin(const struct drm_edid *drm_edid,
memset(iter, 0, sizeof(*iter));
iter->drm_edid = drm_edid;
+ iter->quirks = get_quirks(drm_edid);
}
static const struct displayid_block *
diff --git a/drivers/gpu/drm/drm_displayid_internal.h b/drivers/gpu/drm/drm_displayid_internal.h
index 957dd0619f5c..5b1b32f73516 100644
--- a/drivers/gpu/drm/drm_displayid_internal.h
+++ b/drivers/gpu/drm/drm_displayid_internal.h
@@ -167,6 +167,8 @@ struct displayid_iter {
u8 version;
u8 primary_use;
+
+ u8 quirks;
};
void displayid_iter_edid_begin(const struct drm_edid *drm_edid,
The patch below does not apply to the 6.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.18.y
git checkout FETCH_HEAD
git cherry-pick -x 83cbb4d33dc22b0ca1a4e85c6e892c9b729e28d4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122944-trimester-congrats-2c82@gregkh' --subject-prefix 'PATCH 6.18.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 83cbb4d33dc22b0ca1a4e85c6e892c9b729e28d4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Tue, 28 Oct 2025 22:07:27 +0200
Subject: [PATCH] drm/displayid: add quirk to ignore DisplayID checksum errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add a mechanism for DisplayID specific quirks, and add the first quirk
to ignore DisplayID section checksum errors.
It would be quite inconvenient to pass existing EDID quirks from
drm_edid.c for DisplayID parsing. Not all places doing DisplayID
iteration have the quirks readily available, and would have to pass it
in all places. Simply add a separate array of DisplayID specific EDID
quirks. We do end up checking it every time we iterate DisplayID blocks,
but hopefully the number of quirks remains small.
There are a few laptop models with DisplayID checksum failures, leading
to higher refresh rates only present in the DisplayID blocks being
ignored. Add a quirk for the panel in the machines.
Reported-by: Tiago Martins Araújo <tiago.martins.araujo(a)gmail.com>
Closes: https://lore.kernel.org/r/CACRbrPGvLP5LANXuFi6z0S7XMbAG4X5y2YOLBDxfOVtfGGqi…
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14703
Acked-by: Alex Deucher <alexander.deucher(a)amd.com>
Tested-by: Tiago Martins Araújo <tiago.martins.araujo(a)gmail.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/c04d81ae648c5f21b3f5b7953f924718051f2798.176168196…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/drm_displayid.c b/drivers/gpu/drm/drm_displayid.c
index 20b453d2b854..58d0bb6d2676 100644
--- a/drivers/gpu/drm/drm_displayid.c
+++ b/drivers/gpu/drm/drm_displayid.c
@@ -9,6 +9,34 @@
#include "drm_crtc_internal.h"
#include "drm_displayid_internal.h"
+enum {
+ QUIRK_IGNORE_CHECKSUM,
+};
+
+struct displayid_quirk {
+ const struct drm_edid_ident ident;
+ u8 quirks;
+};
+
+static const struct displayid_quirk quirks[] = {
+ {
+ .ident = DRM_EDID_IDENT_INIT('C', 'S', 'O', 5142, "MNE007ZA1-5"),
+ .quirks = BIT(QUIRK_IGNORE_CHECKSUM),
+ },
+};
+
+static u8 get_quirks(const struct drm_edid *drm_edid)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(quirks); i++) {
+ if (drm_edid_match(drm_edid, &quirks[i].ident))
+ return quirks[i].quirks;
+ }
+
+ return 0;
+}
+
static const struct displayid_header *
displayid_get_header(const u8 *displayid, int length, int index)
{
@@ -23,7 +51,7 @@ displayid_get_header(const u8 *displayid, int length, int index)
}
static const struct displayid_header *
-validate_displayid(const u8 *displayid, int length, int idx)
+validate_displayid(const u8 *displayid, int length, int idx, bool ignore_checksum)
{
int i, dispid_length;
u8 csum = 0;
@@ -41,8 +69,11 @@ validate_displayid(const u8 *displayid, int length, int idx)
for (i = 0; i < dispid_length; i++)
csum += displayid[idx + i];
if (csum) {
- DRM_NOTE("DisplayID checksum invalid, remainder is %d\n", csum);
- return ERR_PTR(-EINVAL);
+ DRM_NOTE("DisplayID checksum invalid, remainder is %d%s\n", csum,
+ ignore_checksum ? " (ignoring)" : "");
+
+ if (!ignore_checksum)
+ return ERR_PTR(-EINVAL);
}
return base;
@@ -52,6 +83,7 @@ static const u8 *find_next_displayid_extension(struct displayid_iter *iter)
{
const struct displayid_header *base;
const u8 *displayid;
+ bool ignore_checksum = iter->quirks & BIT(QUIRK_IGNORE_CHECKSUM);
displayid = drm_edid_find_extension(iter->drm_edid, DISPLAYID_EXT, &iter->ext_index);
if (!displayid)
@@ -61,7 +93,7 @@ static const u8 *find_next_displayid_extension(struct displayid_iter *iter)
iter->length = EDID_LENGTH - 1;
iter->idx = 1;
- base = validate_displayid(displayid, iter->length, iter->idx);
+ base = validate_displayid(displayid, iter->length, iter->idx, ignore_checksum);
if (IS_ERR(base))
return NULL;
@@ -76,6 +108,7 @@ void displayid_iter_edid_begin(const struct drm_edid *drm_edid,
memset(iter, 0, sizeof(*iter));
iter->drm_edid = drm_edid;
+ iter->quirks = get_quirks(drm_edid);
}
static const struct displayid_block *
diff --git a/drivers/gpu/drm/drm_displayid_internal.h b/drivers/gpu/drm/drm_displayid_internal.h
index 957dd0619f5c..5b1b32f73516 100644
--- a/drivers/gpu/drm/drm_displayid_internal.h
+++ b/drivers/gpu/drm/drm_displayid_internal.h
@@ -167,6 +167,8 @@ struct displayid_iter {
u8 version;
u8 primary_use;
+
+ u8 quirks;
};
void displayid_iter_edid_begin(const struct drm_edid *drm_edid,
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 266273eaf4d99475f1ae57f687b3e42bc71ec6f0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122955-unguided-tinsmith-5d03@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 266273eaf4d99475f1ae57f687b3e42bc71ec6f0 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Thu, 27 Nov 2025 16:35:59 +0000
Subject: [PATCH] btrfs: don't log conflicting inode if it's a dir moved in the
current transaction
We can't log a conflicting inode if it's a directory and it was moved
from one parent directory to another parent directory in the current
transaction, as this can result an attempt to have a directory with
two hard links during log replay, one for the old parent directory and
another for the new parent directory.
The following scenario triggers that issue:
1) We have directories "dir1" and "dir2" created in a past transaction.
Directory "dir1" has inode A as its parent directory;
2) We move "dir1" to some other directory;
3) We create a file with the name "dir1" in directory inode A;
4) We fsync the new file. This results in logging the inode of the new file
and the inode for the directory "dir1" that was previously moved in the
current transaction. So the log tree has the INODE_REF item for the
new location of "dir1";
5) We move the new file to some other directory. This results in updating
the log tree to included the new INODE_REF for the new location of the
file and removes the INODE_REF for the old location. This happens
during the rename when we call btrfs_log_new_name();
6) We fsync the file, and that persists the log tree changes done in the
previous step (btrfs_log_new_name() only updates the log tree in
memory);
7) We have a power failure;
8) Next time the fs is mounted, log replay happens and when processing
the inode for directory "dir1" we find a new INODE_REF and add that
link, but we don't remove the old link of the inode since we have
not logged the old parent directory of the directory inode "dir1".
As a result after log replay finishes when we trigger writeback of the
subvolume tree's extent buffers, the tree check will detect that we have
a directory a hard link count of 2 and we get a mount failure.
The errors and stack traces reported in dmesg/syslog are like this:
[ 3845.729764] BTRFS info (device dm-0): start tree-log replay
[ 3845.730304] page: refcount:3 mapcount:0 mapping:000000005c8a3027 index:0x1d00 pfn:0x11510c
[ 3845.731236] memcg:ffff9264c02f4e00
[ 3845.731751] aops:btree_aops [btrfs] ino:1
[ 3845.732300] flags: 0x17fffc00000400a(uptodate|private|writeback|node=0|zone=2|lastcpupid=0x1ffff)
[ 3845.733346] raw: 017fffc00000400a 0000000000000000 dead000000000122 ffff9264d978aea8
[ 3845.734265] raw: 0000000000001d00 ffff92650e6d4738 00000003ffffffff ffff9264c02f4e00
[ 3845.735305] page dumped because: eb page dump
[ 3845.735981] BTRFS critical (device dm-0): corrupt leaf: root=5 block=30408704 slot=6 ino=257, invalid nlink: has 2 expect no more than 1 for dir
[ 3845.737786] BTRFS info (device dm-0): leaf 30408704 gen 10 total ptrs 17 free space 14881 owner 5
[ 3845.737789] BTRFS info (device dm-0): refs 4 lock_owner 0 current 30701
[ 3845.737792] item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
[ 3845.737794] inode generation 3 transid 9 size 16 nbytes 16384
[ 3845.737795] block group 0 mode 40755 links 1 uid 0 gid 0
[ 3845.737797] rdev 0 sequence 2 flags 0x0
[ 3845.737798] atime 1764259517.0
[ 3845.737800] ctime 1764259517.572889464
[ 3845.737801] mtime 1764259517.572889464
[ 3845.737802] otime 1764259517.0
[ 3845.737803] item 1 key (256 INODE_REF 256) itemoff 16111 itemsize 12
[ 3845.737805] index 0 name_len 2
[ 3845.737807] item 2 key (256 DIR_ITEM 2363071922) itemoff 16077 itemsize 34
[ 3845.737808] location key (257 1 0) type 2
[ 3845.737810] transid 9 data_len 0 name_len 4
[ 3845.737811] item 3 key (256 DIR_ITEM 2676584006) itemoff 16043 itemsize 34
[ 3845.737813] location key (258 1 0) type 2
[ 3845.737814] transid 9 data_len 0 name_len 4
[ 3845.737815] item 4 key (256 DIR_INDEX 2) itemoff 16009 itemsize 34
[ 3845.737816] location key (257 1 0) type 2
[ 3845.737818] transid 9 data_len 0 name_len 4
[ 3845.737819] item 5 key (256 DIR_INDEX 3) itemoff 15975 itemsize 34
[ 3845.737820] location key (258 1 0) type 2
[ 3845.737821] transid 9 data_len 0 name_len 4
[ 3845.737822] item 6 key (257 INODE_ITEM 0) itemoff 15815 itemsize 160
[ 3845.737824] inode generation 9 transid 10 size 6 nbytes 0
[ 3845.737825] block group 0 mode 40755 links 2 uid 0 gid 0
[ 3845.737826] rdev 0 sequence 1 flags 0x0
[ 3845.737827] atime 1764259517.572889464
[ 3845.737828] ctime 1764259517.572889464
[ 3845.737830] mtime 1764259517.572889464
[ 3845.737831] otime 1764259517.572889464
[ 3845.737832] item 7 key (257 INODE_REF 256) itemoff 15801 itemsize 14
[ 3845.737833] index 2 name_len 4
[ 3845.737834] item 8 key (257 INODE_REF 258) itemoff 15787 itemsize 14
[ 3845.737836] index 2 name_len 4
[ 3845.737837] item 9 key (257 DIR_ITEM 2507850652) itemoff 15754 itemsize 33
[ 3845.737838] location key (259 1 0) type 1
[ 3845.737839] transid 10 data_len 0 name_len 3
[ 3845.737840] item 10 key (257 DIR_INDEX 2) itemoff 15721 itemsize 33
[ 3845.737842] location key (259 1 0) type 1
[ 3845.737843] transid 10 data_len 0 name_len 3
[ 3845.737844] item 11 key (258 INODE_ITEM 0) itemoff 15561 itemsize 160
[ 3845.737846] inode generation 9 transid 10 size 8 nbytes 0
[ 3845.737847] block group 0 mode 40755 links 1 uid 0 gid 0
[ 3845.737848] rdev 0 sequence 1 flags 0x0
[ 3845.737849] atime 1764259517.572889464
[ 3845.737850] ctime 1764259517.572889464
[ 3845.737851] mtime 1764259517.572889464
[ 3845.737852] otime 1764259517.572889464
[ 3845.737853] item 12 key (258 INODE_REF 256) itemoff 15547 itemsize 14
[ 3845.737855] index 3 name_len 4
[ 3845.737856] item 13 key (258 DIR_ITEM 1843588421) itemoff 15513 itemsize 34
[ 3845.737857] location key (257 1 0) type 2
[ 3845.737858] transid 10 data_len 0 name_len 4
[ 3845.737860] item 14 key (258 DIR_INDEX 2) itemoff 15479 itemsize 34
[ 3845.737861] location key (257 1 0) type 2
[ 3845.737862] transid 10 data_len 0 name_len 4
[ 3845.737863] item 15 key (259 INODE_ITEM 0) itemoff 15319 itemsize 160
[ 3845.737865] inode generation 10 transid 10 size 0 nbytes 0
[ 3845.737866] block group 0 mode 100600 links 1 uid 0 gid 0
[ 3845.737867] rdev 0 sequence 2 flags 0x0
[ 3845.737868] atime 1764259517.580874966
[ 3845.737869] ctime 1764259517.586121869
[ 3845.737870] mtime 1764259517.580874966
[ 3845.737872] otime 1764259517.580874966
[ 3845.737873] item 16 key (259 INODE_REF 257) itemoff 15306 itemsize 13
[ 3845.737874] index 2 name_len 3
[ 3845.737875] BTRFS error (device dm-0): block=30408704 write time tree block corruption detected
[ 3845.739448] ------------[ cut here ]------------
[ 3845.740092] WARNING: CPU: 5 PID: 30701 at fs/btrfs/disk-io.c:335 btree_csum_one_bio+0x25a/0x270 [btrfs]
[ 3845.741439] Modules linked in: btrfs dm_flakey crc32c_cryptoapi (...)
[ 3845.750626] CPU: 5 UID: 0 PID: 30701 Comm: mount Tainted: G W 6.18.0-rc6-btrfs-next-218+ #1 PREEMPT(full)
[ 3845.752414] Tainted: [W]=WARN
[ 3845.752828] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
[ 3845.754499] RIP: 0010:btree_csum_one_bio+0x25a/0x270 [btrfs]
[ 3845.755460] Code: 31 f6 48 89 (...)
[ 3845.758685] RSP: 0018:ffffa8d9c5677678 EFLAGS: 00010246
[ 3845.759450] RAX: 0000000000000000 RBX: ffff92650e6d4738 RCX: 0000000000000000
[ 3845.760309] RDX: 0000000000000000 RSI: ffffffff9aab45b9 RDI: ffff9264c4748000
[ 3845.761239] RBP: ffff9264d4324000 R08: 0000000000000000 R09: ffffa8d9c5677468
[ 3845.762607] R10: ffff926bdc1fffa8 R11: 0000000000000003 R12: ffffa8d9c5677680
[ 3845.764099] R13: 0000000000004000 R14: ffff9264dd624000 R15: ffff9264d978aba8
[ 3845.765094] FS: 00007f751fa5a840(0000) GS:ffff926c42a82000(0000) knlGS:0000000000000000
[ 3845.766226] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3845.766970] CR2: 0000558df1815380 CR3: 000000010ed88003 CR4: 0000000000370ef0
[ 3845.768009] Call Trace:
[ 3845.768392] <TASK>
[ 3845.768714] btrfs_submit_bbio+0x6ee/0x7f0 [btrfs]
[ 3845.769640] ? write_one_eb+0x28e/0x340 [btrfs]
[ 3845.770588] btree_write_cache_pages+0x2f0/0x550 [btrfs]
[ 3845.771286] ? alloc_extent_state+0x19/0x100 [btrfs]
[ 3845.771967] ? merge_next_state+0x1a/0x90 [btrfs]
[ 3845.772586] ? set_extent_bit+0x233/0x8b0 [btrfs]
[ 3845.773198] ? xas_load+0x9/0xc0
[ 3845.773589] ? xas_find+0x14d/0x1a0
[ 3845.773969] do_writepages+0xc6/0x160
[ 3845.774367] filemap_fdatawrite_wbc+0x48/0x60
[ 3845.775003] __filemap_fdatawrite_range+0x5b/0x80
[ 3845.775902] btrfs_write_marked_extents+0x61/0x170 [btrfs]
[ 3845.776707] btrfs_write_and_wait_transaction+0x4e/0xc0 [btrfs]
[ 3845.777379] ? _raw_spin_unlock_irqrestore+0x23/0x40
[ 3845.777923] btrfs_commit_transaction+0x5ea/0xd20 [btrfs]
[ 3845.778551] ? _raw_spin_unlock+0x15/0x30
[ 3845.778986] ? release_extent_buffer+0x34/0x160 [btrfs]
[ 3845.779659] btrfs_recover_log_trees+0x7a3/0x7c0 [btrfs]
[ 3845.780416] ? __pfx_replay_one_buffer+0x10/0x10 [btrfs]
[ 3845.781499] open_ctree+0x10bb/0x15f0 [btrfs]
[ 3845.782194] btrfs_get_tree.cold+0xb/0x16c [btrfs]
[ 3845.782764] ? fscontext_read+0x15c/0x180
[ 3845.783202] ? rw_verify_area+0x50/0x180
[ 3845.783667] vfs_get_tree+0x25/0xd0
[ 3845.784047] vfs_cmd_create+0x59/0xe0
[ 3845.784458] __do_sys_fsconfig+0x4f6/0x6b0
[ 3845.784914] do_syscall_64+0x50/0x1220
[ 3845.785340] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 3845.785980] RIP: 0033:0x7f751fc7f4aa
[ 3845.786759] Code: 73 01 c3 48 (...)
[ 3845.789951] RSP: 002b:00007ffcdba45dc8 EFLAGS: 00000246 ORIG_RAX: 00000000000001af
[ 3845.791402] RAX: ffffffffffffffda RBX: 000055ccc8291c20 RCX: 00007f751fc7f4aa
[ 3845.792688] RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003
[ 3845.794308] RBP: 000055ccc8292120 R08: 0000000000000000 R09: 0000000000000000
[ 3845.795829] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[ 3845.797183] R13: 00007f751fe11580 R14: 00007f751fe1326c R15: 00007f751fdf8a23
[ 3845.798633] </TASK>
[ 3845.799067] ---[ end trace 0000000000000000 ]---
[ 3845.800215] BTRFS: error (device dm-0) in btrfs_commit_transaction:2553: errno=-5 IO failure (Error while writing out transaction)
[ 3845.801860] BTRFS warning (device dm-0 state E): Skipping commit of aborted transaction.
[ 3845.802815] BTRFS error (device dm-0 state EA): Transaction aborted (error -5)
[ 3845.803728] BTRFS: error (device dm-0 state EA) in cleanup_transaction:2036: errno=-5 IO failure
[ 3845.805374] BTRFS: error (device dm-0 state EA) in btrfs_replay_log:2083: errno=-5 IO failure (Failed to recover log tree)
[ 3845.807919] BTRFS error (device dm-0 state EA): open_ctree failed: -5
Fix this by never logging a conflicting inode that is a directory and was
moved in the current transaction (its last_unlink_trans equals the current
transaction) and instead fallback to a transaction commit.
A test case for fstests will follow soon.
Reported-by: Vyacheslav Kovalevsky <slva.kovalevskiy.2014(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/7bbc9419-5c56-450a-b5a0-efeae7457113@gm…
CC: stable(a)vger.kernel.org # 6.1+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fff37c8d96a4..64c1155160a2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6051,6 +6051,33 @@ static int conflicting_inode_is_dir(struct btrfs_root *root, u64 ino,
return ret;
}
+static bool can_log_conflicting_inode(const struct btrfs_trans_handle *trans,
+ const struct btrfs_inode *inode)
+{
+ if (!S_ISDIR(inode->vfs_inode.i_mode))
+ return true;
+
+ if (inode->last_unlink_trans < trans->transid)
+ return true;
+
+ /*
+ * If this is a directory and its unlink_trans is not from a past
+ * transaction then we must fallback to a transaction commit in order
+ * to avoid getting a directory with 2 hard links after log replay.
+ *
+ * This happens if a directory A is renamed, moved from one parent
+ * directory to another one, a new file is created in the old parent
+ * directory with the old name of our directory A, the new file is
+ * fsynced, then we moved the new file to some other parent directory
+ * and fsync again the new file. This results in a log tree where we
+ * logged that directory A existed, with the INODE_REF item for the
+ * new location but without having logged its old parent inode, so
+ * that on log replay we add a new link for the new location but the
+ * old link remains, resulting in a link count of 2.
+ */
+ return false;
+}
+
static int add_conflicting_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -6154,6 +6181,11 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
return 0;
}
+ if (!can_log_conflicting_inode(trans, inode)) {
+ btrfs_add_delayed_iput(inode);
+ return BTRFS_LOG_FORCE_COMMIT;
+ }
+
btrfs_add_delayed_iput(inode);
ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
@@ -6218,6 +6250,12 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
break;
}
+ if (!can_log_conflicting_inode(trans, inode)) {
+ btrfs_add_delayed_iput(inode);
+ ret = BTRFS_LOG_FORCE_COMMIT;
+ break;
+ }
+
/*
* Always log the directory, we cannot make this
* conditional on need_log_inode() because the directory
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x d1bea0ce35b6095544ee82bb54156fc62c067e58
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122943-program-skipper-de04@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1bea0ce35b6095544ee82bb54156fc62c067e58 Mon Sep 17 00:00:00 2001
From: Joshua Rogers <linux(a)joshua.hu>
Date: Fri, 7 Nov 2025 10:09:49 -0500
Subject: [PATCH] svcrdma: bound check rq_pages index in inline path
svc_rdma_copy_inline_range indexed rqstp->rq_pages[rc_curpage] without
verifying rc_curpage stays within the allocated page array. Add guards
before the first use and after advancing to a new page.
Fixes: d7cc73972661 ("svcrdma: support multiple Read chunks per RPC")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Rogers <linux(a)joshua.hu>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index e813e5463352..310de7a80be5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -841,6 +841,9 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
+ if (head->rc_curpage >= rqstp->rq_maxpages)
+ return -EINVAL;
+
page_len = min_t(unsigned int, remaining,
PAGE_SIZE - head->rc_pageoff);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 29763138830916f46daaa50e83e7f4f907a3236b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122926-gigabyte-rectangle-0c68@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 29763138830916f46daaa50e83e7f4f907a3236b Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang(a)oracle.com>
Date: Fri, 5 Dec 2025 15:19:05 -0800
Subject: [PATCH] KVM: nVMX: Immediately refresh APICv controls as needed on
nested VM-Exit
If an APICv status updated was pended while L2 was active, immediately
refresh vmcs01's controls instead of pending KVM_REQ_APICV_UPDATE as
kvm_vcpu_update_apicv() only calls into vendor code if a change is
necessary.
E.g. if APICv is inhibited, and then activated while L2 is running:
kvm_vcpu_update_apicv()
|
-> __kvm_vcpu_update_apicv()
|
-> apic->apicv_active = true
|
-> vmx_refresh_apicv_exec_ctrl()
|
-> vmx->nested.update_vmcs01_apicv_status = true
|
-> return
Then L2 exits to L1:
__nested_vmx_vmexit()
|
-> kvm_make_request(KVM_REQ_APICV_UPDATE)
vcpu_enter_guest(): KVM_REQ_APICV_UPDATE
-> kvm_vcpu_update_apicv()
|
-> __kvm_vcpu_update_apicv()
|
-> return // because if (apic->apicv_active == activate)
Reported-by: Chao Gao <chao.gao(a)intel.com>
Closes: https://lore.kernel.org/all/aQ2jmnN8wUYVEawF@intel.com
Fixes: 7c69661e225c ("KVM: nVMX: Defer APICv updates while L2 is active until L1 is active")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dongli Zhang <dongli.zhang(a)oracle.com>
[sean: write changelog]
Link: https://patch.msgid.link/20251205231913.441872-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcea087b642f..1725c6a94f99 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
#include "trace.h"
#include "vmx.h"
#include "smm.h"
+#include "x86_ops.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5216,7 +5217,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vmx->nested.update_vmcs01_apicv_status) {
vmx->nested.update_vmcs01_apicv_status = false;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ vmx_refresh_apicv_exec_ctrl(vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 29763138830916f46daaa50e83e7f4f907a3236b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122917-cadet-worrier-23c0@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 29763138830916f46daaa50e83e7f4f907a3236b Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang(a)oracle.com>
Date: Fri, 5 Dec 2025 15:19:05 -0800
Subject: [PATCH] KVM: nVMX: Immediately refresh APICv controls as needed on
nested VM-Exit
If an APICv status updated was pended while L2 was active, immediately
refresh vmcs01's controls instead of pending KVM_REQ_APICV_UPDATE as
kvm_vcpu_update_apicv() only calls into vendor code if a change is
necessary.
E.g. if APICv is inhibited, and then activated while L2 is running:
kvm_vcpu_update_apicv()
|
-> __kvm_vcpu_update_apicv()
|
-> apic->apicv_active = true
|
-> vmx_refresh_apicv_exec_ctrl()
|
-> vmx->nested.update_vmcs01_apicv_status = true
|
-> return
Then L2 exits to L1:
__nested_vmx_vmexit()
|
-> kvm_make_request(KVM_REQ_APICV_UPDATE)
vcpu_enter_guest(): KVM_REQ_APICV_UPDATE
-> kvm_vcpu_update_apicv()
|
-> __kvm_vcpu_update_apicv()
|
-> return // because if (apic->apicv_active == activate)
Reported-by: Chao Gao <chao.gao(a)intel.com>
Closes: https://lore.kernel.org/all/aQ2jmnN8wUYVEawF@intel.com
Fixes: 7c69661e225c ("KVM: nVMX: Defer APICv updates while L2 is active until L1 is active")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dongli Zhang <dongli.zhang(a)oracle.com>
[sean: write changelog]
Link: https://patch.msgid.link/20251205231913.441872-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcea087b642f..1725c6a94f99 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
#include "trace.h"
#include "vmx.h"
#include "smm.h"
+#include "x86_ops.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5216,7 +5217,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vmx->nested.update_vmcs01_apicv_status) {
vmx->nested.update_vmcs01_apicv_status = false;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ vmx_refresh_apicv_exec_ctrl(vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d4b69a6186b215d2dc1ebcab965ed88e8d41768d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122941-pupil-strongly-abe3@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d4b69a6186b215d2dc1ebcab965ed88e8d41768d Mon Sep 17 00:00:00 2001
From: Joshua Rogers <linux(a)joshua.hu>
Date: Fri, 7 Nov 2025 10:05:33 -0500
Subject: [PATCH] SUNRPC: svcauth_gss: avoid NULL deref on zero length
gss_token in gss_read_proxy_verf
A zero length gss_token results in pages == 0 and in_token->pages[0]
is NULL. The code unconditionally evaluates
page_address(in_token->pages[0]) for the initial memcpy, which can
dereference NULL even when the copy length is 0. Guard the first
memcpy so it only runs when length > 0.
Fixes: 5866efa8cbfb ("SUNRPC: Fix svcauth_gss_proxy_init()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Rogers <linux(a)joshua.hu>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a8ec30759a18..e2f0df8cdaa6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1083,7 +1083,8 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
}
length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p);
- memcpy(page_address(in_token->pages[0]), xdr->p, length);
+ if (length)
+ memcpy(page_address(in_token->pages[0]), xdr->p, length);
inlen -= length;
to_offs = length;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7c8b465a1c91f674655ea9cec5083744ec5f796a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122929-riveter-outreach-a5e9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7c8b465a1c91f674655ea9cec5083744ec5f796a Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson(a)google.com>
Date: Mon, 22 Sep 2025 09:29:23 -0700
Subject: [PATCH] KVM: SVM: Mark VMCB_NPT as dirty on nested VMRUN
Mark the VMCB_NPT bit as dirty in nested_vmcb02_prepare_save()
on every nested VMRUN.
If L1 changes the PAT MSR between two VMRUN instructions on the same
L1 vCPU, the g_pat field in the associated vmcb02 will change, and the
VMCB_NPT clean bit should be cleared.
Fixes: 4bb170a5430b ("KVM: nSVM: do not mark all VMCB02 fields dirty on nested vmexit")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20250922162935.621409-3-jmattson@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 35cea27862c6..83de3456df70 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -613,6 +613,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
struct kvm_vcpu *vcpu = &svm->vcpu;
nested_vmcb02_compute_g_pat(svm);
+ vmcb_mark_dirty(vmcb02, VMCB_NPT);
/* Load the nested guest state */
if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d4b69a6186b215d2dc1ebcab965ed88e8d41768d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122940-illusion-nervy-c582@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d4b69a6186b215d2dc1ebcab965ed88e8d41768d Mon Sep 17 00:00:00 2001
From: Joshua Rogers <linux(a)joshua.hu>
Date: Fri, 7 Nov 2025 10:05:33 -0500
Subject: [PATCH] SUNRPC: svcauth_gss: avoid NULL deref on zero length
gss_token in gss_read_proxy_verf
A zero length gss_token results in pages == 0 and in_token->pages[0]
is NULL. The code unconditionally evaluates
page_address(in_token->pages[0]) for the initial memcpy, which can
dereference NULL even when the copy length is 0. Guard the first
memcpy so it only runs when length > 0.
Fixes: 5866efa8cbfb ("SUNRPC: Fix svcauth_gss_proxy_init()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Rogers <linux(a)joshua.hu>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a8ec30759a18..e2f0df8cdaa6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1083,7 +1083,8 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
}
length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p);
- memcpy(page_address(in_token->pages[0]), xdr->p, length);
+ if (length)
+ memcpy(page_address(in_token->pages[0]), xdr->p, length);
inlen -= length;
to_offs = length;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d4b69a6186b215d2dc1ebcab965ed88e8d41768d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122940-ember-smilingly-3df0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d4b69a6186b215d2dc1ebcab965ed88e8d41768d Mon Sep 17 00:00:00 2001
From: Joshua Rogers <linux(a)joshua.hu>
Date: Fri, 7 Nov 2025 10:05:33 -0500
Subject: [PATCH] SUNRPC: svcauth_gss: avoid NULL deref on zero length
gss_token in gss_read_proxy_verf
A zero length gss_token results in pages == 0 and in_token->pages[0]
is NULL. The code unconditionally evaluates
page_address(in_token->pages[0]) for the initial memcpy, which can
dereference NULL even when the copy length is 0. Guard the first
memcpy so it only runs when length > 0.
Fixes: 5866efa8cbfb ("SUNRPC: Fix svcauth_gss_proxy_init()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Rogers <linux(a)joshua.hu>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a8ec30759a18..e2f0df8cdaa6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1083,7 +1083,8 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
}
length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p);
- memcpy(page_address(in_token->pages[0]), xdr->p, length);
+ if (length)
+ memcpy(page_address(in_token->pages[0]), xdr->p, length);
inlen -= length;
to_offs = length;
After discussion with the devicetree maintainers we agreed to not extend
lists with the generic compatible "apple,wdt" anymore [1]. Use
"apple,t8103-wdt" as base compatible as it is the SoC the driver and
bindings were written for.
[1]: https://lore.kernel.org/asahi/12ab93b7-1fc2-4ce0-926e-c8141cfe81bf@kernel.o…
Fixes: 4ed224aeaf66 ("watchdog: Add Apple SoC watchdog driver")
Cc: stable(a)vger.kernel.org
Reviewed-by: Neal Gompa <neal(a)gompa.dev>
Signed-off-by: Janne Grunau <j(a)jannau.net>
---
This is split off from the v1 series adding Apple M2 Pro/Max/Ultra
device trees in [2].
2: https://lore.kernel.org/r/20250828-dt-apple-t6020-v1-0-507ba4c4b98e@jannau.…
---
drivers/watchdog/apple_wdt.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c
index 66a158f67a712bbed394d660071e02140e66c2e5..6b9b0f9b05cedfd7fc5d0d79ba19ab356dc2a080 100644
--- a/drivers/watchdog/apple_wdt.c
+++ b/drivers/watchdog/apple_wdt.c
@@ -218,6 +218,7 @@ static int apple_wdt_suspend(struct device *dev)
static DEFINE_SIMPLE_DEV_PM_OPS(apple_wdt_pm_ops, apple_wdt_suspend, apple_wdt_resume);
static const struct of_device_id apple_wdt_of_match[] = {
+ { .compatible = "apple,t8103-wdt" },
{ .compatible = "apple,wdt" },
{},
};
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251231-watchdog-apple-t8103-base-compat-8a623e9831b6
Best regards,
--
Janne Grunau <j(a)jannau.net>
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 27d17641cacfedd816789b75d342430f6b912bd2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122921-wanting-sixfold-db66@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 27d17641cacfedd816789b75d342430f6b912bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 17 Nov 2025 11:00:49 -0500
Subject: [PATCH] NFSD: Clear SECLABEL in the suppattr_exclcreat bitmap
>From RFC 8881:
5.8.1.14. Attribute 75: suppattr_exclcreat
> The bit vector that would set all REQUIRED and RECOMMENDED
> attributes that are supported by the EXCLUSIVE4_1 method of file
> creation via the OPEN operation. The scope of this attribute
> applies to all objects with a matching fsid.
There's nothing in RFC 8881 that states that suppattr_exclcreat is
or is not allowed to contain bits for attributes that are clear in
the reported supported_attrs bitmask. But it doesn't make sense for
an NFS server to indicate that it /doesn't/ implement an attribute,
but then also indicate that clients /are/ allowed to set that
attribute using OPEN(create) with EXCLUSIVE4_1.
Ensure that the SECURITY_LABEL and ACL bits are not set in the
suppattr_exclcreat bitmask when they are also not set in the
supported_attrs bitmask.
Fixes: 8c18f2052e75 ("nfsd41: SUPPATTR_EXCLCREAT attribute")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 30ce5851fe4c..51ef97c25456 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3375,6 +3375,11 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
u32 supp[3];
memcpy(supp, nfsd_suppattrs[resp->cstate.minorversion], sizeof(supp));
+ if (!IS_POSIXACL(d_inode(args->dentry)))
+ supp[0] &= ~FATTR4_WORD0_ACL;
+ if (!args->contextsupport)
+ supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x dd75c723ef566f7f009c047f47e0eee95fe348ab
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122945-mold-ducktail-63db@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dd75c723ef566f7f009c047f47e0eee95fe348ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Rebe?= <rene(a)exactco.de>
Date: Tue, 2 Dec 2025 19:41:37 +0100
Subject: [PATCH] r8169: fix RTL8117 Wake-on-Lan in DASH mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Wake-on-Lan does currently not work for r8169 in DASH mode, e.g. the
ASUS Pro WS X570-ACE with RTL8168fp/RTL8117.
Fix by not returning early in rtl_prepare_power_down when dash_enabled.
While this fixes WoL, it still kills the OOB RTL8117 remote management
BMC connection. Fix by not calling rtl8168_driver_stop if WoL is enabled.
Fixes: 065c27c184d6 ("r8169: phy power ops")
Signed-off-by: René Rebe <rene(a)exactco.de>
Cc: stable(a)vger.kernel.org
Reviewed-by: Heiner Kallweit <hkallweit1(a)gmail.com>
Link: https://patch.msgid.link/20251202.194137.1647877804487085954.rene@exactco.de
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 405e91eb3141..755083852eef 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2655,9 +2655,6 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp)
static void rtl_prepare_power_down(struct rtl8169_private *tp)
{
- if (tp->dash_enabled)
- return;
-
if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
tp->mac_version == RTL_GIGA_MAC_VER_33)
rtl_ephy_write(tp, 0x19, 0xff64);
@@ -4812,7 +4809,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
rtl_disable_exit_l1(tp);
rtl_prepare_power_down(tp);
- if (tp->dash_type != RTL_DASH_NONE)
+ if (tp->dash_type != RTL_DASH_NONE && !tp->saved_wolopts)
rtl8168_driver_stop(tp);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 27d17641cacfedd816789b75d342430f6b912bd2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122921-startle-proximity-8c22@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 27d17641cacfedd816789b75d342430f6b912bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 17 Nov 2025 11:00:49 -0500
Subject: [PATCH] NFSD: Clear SECLABEL in the suppattr_exclcreat bitmap
>From RFC 8881:
5.8.1.14. Attribute 75: suppattr_exclcreat
> The bit vector that would set all REQUIRED and RECOMMENDED
> attributes that are supported by the EXCLUSIVE4_1 method of file
> creation via the OPEN operation. The scope of this attribute
> applies to all objects with a matching fsid.
There's nothing in RFC 8881 that states that suppattr_exclcreat is
or is not allowed to contain bits for attributes that are clear in
the reported supported_attrs bitmask. But it doesn't make sense for
an NFS server to indicate that it /doesn't/ implement an attribute,
but then also indicate that clients /are/ allowed to set that
attribute using OPEN(create) with EXCLUSIVE4_1.
Ensure that the SECURITY_LABEL and ACL bits are not set in the
suppattr_exclcreat bitmask when they are also not set in the
supported_attrs bitmask.
Fixes: 8c18f2052e75 ("nfsd41: SUPPATTR_EXCLCREAT attribute")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 30ce5851fe4c..51ef97c25456 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3375,6 +3375,11 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
u32 supp[3];
memcpy(supp, nfsd_suppattrs[resp->cstate.minorversion], sizeof(supp));
+ if (!IS_POSIXACL(d_inode(args->dentry)))
+ supp[0] &= ~FATTR4_WORD0_ACL;
+ if (!args->contextsupport)
+ supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
The arm64 kernel doesn't boot with annotated branches
(PROFILE_ANNOTATED_BRANCHES) enabled and CONFIG_DEBUG_VIRTUAL together.
Bisecting it, I found that disabling branch profiling in arch/arm64/mm
solved the problem. Narrowing down a bit further, I found that
physaddr.c is the file that needs to have branch profiling disabled to
get the machine to boot.
I suspect that it might invoke some ftrace helper very early in the boot
process and ftrace is still not enabled(!?).
Disable branch profiling for physaddr.o to allow booting an arm64
machine with CONFIG_PROFILE_ANNOTATED_BRANCHES and
CONFIG_DEBUG_VIRTUAL together.
Cc: stable(a)vger.kernel.org
Fixes: ec6d06efb0bac ("arm64: Add support for CONFIG_DEBUG_VIRTUAL")
Signed-off-by: Breno Leitao <leitao(a)debian.org>
---
Another approach is to disable profiling on all arch/arm64 code, similarly to
x86, where DISABLE_BRANCH_PROFILING is called for all arch/x86 code. See
commit 2cbb20b008dba ("tracing: Disable branch profiling in noinstr
code").
---
arch/arm64/mm/Makefile | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index c26489cf96cd..8bfe2451ea26 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -14,5 +14,10 @@ obj-$(CONFIG_ARM64_MTE) += mteswap.o
obj-$(CONFIG_ARM64_GCS) += gcs.o
KASAN_SANITIZE_physaddr.o += n
+# Branch profiling isn't noinstr-safe
+ifdef CONFIG_TRACE_BRANCH_PROFILING
+CFLAGS_physaddr.o += -DDISABLE_BRANCH_PROFILING
+endif
+
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
---
base-commit: c8ebd433459bcbf068682b09544e830acd7ed222
change-id: 20251231-annotated-75de3f33cd7b
Best regards,
--
Breno Leitao <leitao(a)debian.org>
After discussion with the devicetree maintainers we agreed to not extend
lists with the generic compatible "apple,nco" anymore [1]. Use
"apple,t8103-nco" as base compatible as it is the SoC the driver and
bindings were written for.
[1]: https://lore.kernel.org/asahi/12ab93b7-1fc2-4ce0-926e-c8141cfe81bf@kernel.o…
Fixes: 6641057d5dba ("clk: clk-apple-nco: Add driver for Apple NCO")
Cc: stable(a)vger.kernel.org
Acked-by: Stephen Boyd <sboyd(a)kernel.org>
Reviewed-by: Neal Gompa <neal(a)gompa.dev>
Signed-off-by: Janne Grunau <j(a)jannau.net>
---
This is split off from the v1 series adding Apple M2 Pro/Max/Ultra
device trees in [2].
Changes compared to that series:
- add "Fixes:" and "Cc: stable" for handling this as fix adding a device
id
- add Neal's and Stephen's trailers
---
drivers/clk/clk-apple-nco.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/clk/clk-apple-nco.c b/drivers/clk/clk-apple-nco.c
index d3ced4a0f029ec0440ff42d49d31e314fdf86846..434c067968bbc1afdb5c9216277486bf826666c2 100644
--- a/drivers/clk/clk-apple-nco.c
+++ b/drivers/clk/clk-apple-nco.c
@@ -320,6 +320,7 @@ static int applnco_probe(struct platform_device *pdev)
}
static const struct of_device_id applnco_ids[] = {
+ { .compatible = "apple,t8103-nco" },
{ .compatible = "apple,nco" },
{ }
};
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251231-clk-apple-nco-t8103-base-compat-dea037ecce88
Best regards,
--
Janne Grunau <j(a)jannau.net>
If nvmem_add_one_cell() failed, the ownership of "child" (or "info.np"),
thus its OF reference, is not passed further and function should clean
up by putting the reference it got via earlier of_node_get(). Note that
this is independent of references obtained via for_each_child_of_node()
loop.
Fixes: 50014d659617 ("nvmem: core: use nvmem_add_one_cell() in nvmem_add_cells_from_of()")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)oss.qualcomm.com>
---
drivers/nvmem/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 387c88c55259..ff68fd5ad3d6 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -831,6 +831,7 @@ static int nvmem_add_cells_from_dt(struct nvmem_device *nvmem, struct device_nod
kfree(info.name);
if (ret) {
of_node_put(child);
+ of_node_put(info.np);
return ret;
}
}
--
2.51.0
After discussion with the devicetree maintainers we agreed to not extend
lists with the generic compatible "apple,spmi" anymore [1]. Use
"apple,t8103-spmi" as base compatible as it is the SoC the driver and
bindings were written for.
[1]: https://lore.kernel.org/asahi/12ab93b7-1fc2-4ce0-926e-c8141cfe81bf@kernel.o…
Fixes: 77ca75e80c71 ("spmi: add a spmi driver for Apple SoC")
Cc: stable(a)vger.kernel.org
Reviewed-by: Neal Gompa <neal(a)gompa.dev>
Signed-off-by: Janne Grunau <j(a)jannau.net>
---
This is split off from the v1 series adding Apple M2 Pro/Max/Ultra
device trees in [2].
2: https://lore.kernel.org/r/20250828-dt-apple-t6020-v1-0-507ba4c4b98e@jannau.…
---
drivers/spmi/spmi-apple-controller.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/spmi/spmi-apple-controller.c b/drivers/spmi/spmi-apple-controller.c
index 697b3e8bb023566f17911fc222666d84f5e14c91..87e3ee9d4f2aa5517808827f5dd365055c08446a 100644
--- a/drivers/spmi/spmi-apple-controller.c
+++ b/drivers/spmi/spmi-apple-controller.c
@@ -149,6 +149,7 @@ static int apple_spmi_probe(struct platform_device *pdev)
}
static const struct of_device_id apple_spmi_match_table[] = {
+ { .compatible = "apple,t8103-spmi", },
{ .compatible = "apple,spmi", },
{}
};
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251231-spmi-apple-t8103-base-compat-e7c1b862b9a4
Best regards,
--
Janne Grunau <j(a)jannau.net>
After discussion with the devicetree maintainers we agreed to not extend
lists with the generic compatible "apple,nvme-ans2" anymore [1]. Add
"apple,t8103-nvme-ans2" as fallback compatible as it is the SoC the
driver and bindings were written for.
[1]: https://lore.kernel.org/asahi/12ab93b7-1fc2-4ce0-926e-c8141cfe81bf@kernel.o…
Cc: stable(a)vger.kernel.org # v6.18+
Fixes: 5bd2927aceba ("nvme-apple: Add initial Apple SoC NVMe driver")
Reviewed-by: Neal Gompa <neal(a)gompa.dev>
Signed-off-by: Janne Grunau <j(a)jannau.net>
---
This is split off from the v1 series adding Apple M2 Pro/Max/Ultra
device trees in [2]. Handling this as fix adding a device id only for
v6.18+ for two reasons. apple_nvme_of_match gained hw_data in v6.18 and
device trees using this compatible were only added in v6.18.
2: https://lore.kernel.org/r/20250828-dt-apple-t6020-v1-0-507ba4c4b98e@jannau.…
Changes compared to the patch in that series:
- rebased onto v6.19-rc1 since commit 04d8ecf37b5e ("nvme: apple: Add
Apple A11 support") introduced hw data for the match table
---
drivers/nvme/host/apple.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 15b3d07f8ccdd023cd3be75eedd349b747c1ecad..ed61b97fde59f7e02664798d9c2612ac16307f5c 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1704,6 +1704,7 @@ static const struct apple_nvme_hw apple_nvme_t8103_hw = {
static const struct of_device_id apple_nvme_of_match[] = {
{ .compatible = "apple,t8015-nvme-ans2", .data = &apple_nvme_t8015_hw },
+ { .compatible = "apple,t8103-nvme-ans2", .data = &apple_nvme_t8103_hw },
{ .compatible = "apple,nvme-ans2", .data = &apple_nvme_t8103_hw },
{},
};
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251026-nvme-apple-t8103-base-compat-358ba0564f76
Best regards,
--
Janne Grunau <j(a)jannau.net>
The driver does not set hw->sta_data_size, which causes mac80211 to
allocate insufficient space for driver private station data in
__sta_info_alloc(). When rtl8xxxu_sta_add() accesses members of
struct rtl8xxxu_sta_info through sta->drv_priv, this results in a
slab-out-of-bounds write.
KASAN report on RISC-V (VisionFive 2) with RTL8192EU adapter:
BUG: KASAN: slab-out-of-bounds in rtl8xxxu_sta_add+0x31c/0x346
Write of size 8 at addr ffffffd6d3e9ae88 by task kworker/u16:0/12
Set hw->sta_data_size to sizeof(struct rtl8xxxu_sta_info) during
probe, similar to how hw->vif_data_size is configured. This ensures
mac80211 allocates sufficient space for the driver's per-station
private data.
Tested on StarFive VisionFive 2 v1.2A board.
Fixes: eef55f1545c9 ("wifi: rtl8xxxu: support multiple interfaces in {add,remove}_interface()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ali Tariq <alitariq45892(a)gmail.com>
---
drivers/net/wireless/realtek/rtl8xxxu/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c
index c06ad064f37c..f9a527f6a175 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c
@@ -7826,6 +7826,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
goto err_set_intfdata;
hw->vif_data_size = sizeof(struct rtl8xxxu_vif);
+ hw->sta_data_size = sizeof(struct rtl8xxxu_sta_info);
hw->wiphy->max_scan_ssids = 1;
hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
--
2.43.0
On x86_64:
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>" we observe a pafe fault that happens.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) – not-present page
This happens on x86_64 only, as this is already fixed in aarch64 in
commit: cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds")
V1: https://lore.kernel.org/all/20251112193005.3772542-1-harshit.m.mogalapalli@…
V1 attempted to do a similar sanity check in x86_64. Borislav suggested
to add a generic helper ima_validate_range() which could then be used
for both OF based and x86_64.
Testing information:
--------------------
On x86_64: With latest 6.19-rc2 based, we could reproduce the issue, and
patched kernel works fine. (with mem=8G on a 16G memory machine)
Thanks for the reviews on V1.
V1 -> V2:
- Patch 1: Add a generic helper "ima_validate_range()"
- Patch 2: Use this new helper in drivers/of/kexec.c -> No functional
change.
- Patch 3: Fix the page fault by doing sanity check with
"ima_validate_range()"
Thanks,
Harshit
Harshit Mogalapalli (3):
ima: Add ima_validate_range() for previous kernel IMA buffer
of/kexec: refactor ima_get_kexec_buffer() to use ima_validate_range()
x86/kexec: Add a sanity check on previous kernel's ima kexec buffer
arch/x86/kernel/setup.c | 6 +++++
drivers/of/kexec.c | 15 +++----------
include/linux/ima.h | 1 +
security/integrity/ima/ima_kexec.c | 35 ++++++++++++++++++++++++++++++
4 files changed, 45 insertions(+), 12 deletions(-)
--
2.50.1
When a fault occurs, the connection is abandoned, reestablished, and any
pending operations are retried. The OSD client tracks the progress of a
sparse-read reply using a separate state machine, largely independent of
the messenger's state.
If a connection is lost mid-payload or the sparse-read state machine
returns an error, the sparse-read state is not reset. The OSD client
will then interpret the beginning of a new reply as the continuation of
the old one. If this makes the sparse-read machinery enter a failure
state, it may never recover, producing loops like:
libceph: [0] got 0 extents
libceph: data len 142248331 != extent len 0
libceph: osd0 (1)...:6801 socket error on read
libceph: data len 142248331 != extent len 0
libceph: osd0 (1)...:6801 socket error on read
Therefore, reset the sparse-read state in osd_fault(), ensuring retries
start from a clean state.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sam Edwards <CFSworks(a)gmail.com>
---
net/ceph/osd_client.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3667319b949d..1a7be2f615dc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4281,6 +4281,9 @@ static void osd_fault(struct ceph_connection *con)
goto out_unlock;
}
+ osd->o_sparse_op_idx = -1;
+ ceph_init_sparse_read(&osd->o_sparse_read);
+
if (!reopen_osd(osd))
kick_osd_requests(osd);
maybe_request_map(osdc);
--
2.51.2
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 27d17641cacfedd816789b75d342430f6b912bd2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122920-sequence-vixen-bb32@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 27d17641cacfedd816789b75d342430f6b912bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 17 Nov 2025 11:00:49 -0500
Subject: [PATCH] NFSD: Clear SECLABEL in the suppattr_exclcreat bitmap
>From RFC 8881:
5.8.1.14. Attribute 75: suppattr_exclcreat
> The bit vector that would set all REQUIRED and RECOMMENDED
> attributes that are supported by the EXCLUSIVE4_1 method of file
> creation via the OPEN operation. The scope of this attribute
> applies to all objects with a matching fsid.
There's nothing in RFC 8881 that states that suppattr_exclcreat is
or is not allowed to contain bits for attributes that are clear in
the reported supported_attrs bitmask. But it doesn't make sense for
an NFS server to indicate that it /doesn't/ implement an attribute,
but then also indicate that clients /are/ allowed to set that
attribute using OPEN(create) with EXCLUSIVE4_1.
Ensure that the SECURITY_LABEL and ACL bits are not set in the
suppattr_exclcreat bitmask when they are also not set in the
supported_attrs bitmask.
Fixes: 8c18f2052e75 ("nfsd41: SUPPATTR_EXCLCREAT attribute")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 30ce5851fe4c..51ef97c25456 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3375,6 +3375,11 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
u32 supp[3];
memcpy(supp, nfsd_suppattrs[resp->cstate.minorversion], sizeof(supp));
+ if (!IS_POSIXACL(d_inode(args->dentry)))
+ supp[0] &= ~FATTR4_WORD0_ACL;
+ if (!args->contextsupport)
+ supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 27d17641cacfedd816789b75d342430f6b912bd2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122919-carried-livestock-9465@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 27d17641cacfedd816789b75d342430f6b912bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 17 Nov 2025 11:00:49 -0500
Subject: [PATCH] NFSD: Clear SECLABEL in the suppattr_exclcreat bitmap
>From RFC 8881:
5.8.1.14. Attribute 75: suppattr_exclcreat
> The bit vector that would set all REQUIRED and RECOMMENDED
> attributes that are supported by the EXCLUSIVE4_1 method of file
> creation via the OPEN operation. The scope of this attribute
> applies to all objects with a matching fsid.
There's nothing in RFC 8881 that states that suppattr_exclcreat is
or is not allowed to contain bits for attributes that are clear in
the reported supported_attrs bitmask. But it doesn't make sense for
an NFS server to indicate that it /doesn't/ implement an attribute,
but then also indicate that clients /are/ allowed to set that
attribute using OPEN(create) with EXCLUSIVE4_1.
Ensure that the SECURITY_LABEL and ACL bits are not set in the
suppattr_exclcreat bitmask when they are also not set in the
supported_attrs bitmask.
Fixes: 8c18f2052e75 ("nfsd41: SUPPATTR_EXCLCREAT attribute")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 30ce5851fe4c..51ef97c25456 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3375,6 +3375,11 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
u32 supp[3];
memcpy(supp, nfsd_suppattrs[resp->cstate.minorversion], sizeof(supp));
+ if (!IS_POSIXACL(d_inode(args->dentry)))
+ supp[0] &= ~FATTR4_WORD0_ACL;
+ if (!args->contextsupport)
+ supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x fc40459de82543b565ebc839dca8f7987f16f62e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122919-catty-unrated-e504@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc40459de82543b565ebc839dca8f7987f16f62e Mon Sep 17 00:00:00 2001
From: Haoxiang Li <lihaoxiang(a)isrc.iscas.ac.cn>
Date: Wed, 10 Dec 2025 17:06:01 +0800
Subject: [PATCH] xfs: fix a memory leak in xfs_buf_item_init()
xfs_buf_item_get_format() may allocate memory for bip->bli_formats,
free the memory in the error path.
Fixes: c3d5f0c2fb85 ("xfs: complain if anyone tries to create a too-large buffer log item")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <lihaoxiang(a)isrc.iscas.ac.cn>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino(a)redhat.com>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8d85b5eee444..f4c5be67826e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -896,6 +896,7 @@ xfs_buf_item_init(
map_size = DIV_ROUND_UP(chunks, NBWORD);
if (map_size > XFS_BLF_DATAMAP_SIZE) {
+ xfs_buf_item_free_format(bip);
kmem_cache_free(xfs_buf_item_cache, bip);
xfs_err(mp,
"buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 29763138830916f46daaa50e83e7f4f907a3236b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122927-delegate-composed-5765@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 29763138830916f46daaa50e83e7f4f907a3236b Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang(a)oracle.com>
Date: Fri, 5 Dec 2025 15:19:05 -0800
Subject: [PATCH] KVM: nVMX: Immediately refresh APICv controls as needed on
nested VM-Exit
If an APICv status updated was pended while L2 was active, immediately
refresh vmcs01's controls instead of pending KVM_REQ_APICV_UPDATE as
kvm_vcpu_update_apicv() only calls into vendor code if a change is
necessary.
E.g. if APICv is inhibited, and then activated while L2 is running:
kvm_vcpu_update_apicv()
|
-> __kvm_vcpu_update_apicv()
|
-> apic->apicv_active = true
|
-> vmx_refresh_apicv_exec_ctrl()
|
-> vmx->nested.update_vmcs01_apicv_status = true
|
-> return
Then L2 exits to L1:
__nested_vmx_vmexit()
|
-> kvm_make_request(KVM_REQ_APICV_UPDATE)
vcpu_enter_guest(): KVM_REQ_APICV_UPDATE
-> kvm_vcpu_update_apicv()
|
-> __kvm_vcpu_update_apicv()
|
-> return // because if (apic->apicv_active == activate)
Reported-by: Chao Gao <chao.gao(a)intel.com>
Closes: https://lore.kernel.org/all/aQ2jmnN8wUYVEawF@intel.com
Fixes: 7c69661e225c ("KVM: nVMX: Defer APICv updates while L2 is active until L1 is active")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dongli Zhang <dongli.zhang(a)oracle.com>
[sean: write changelog]
Link: https://patch.msgid.link/20251205231913.441872-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcea087b642f..1725c6a94f99 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
#include "trace.h"
#include "vmx.h"
#include "smm.h"
+#include "x86_ops.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5216,7 +5217,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vmx->nested.update_vmcs01_apicv_status) {
vmx->nested.update_vmcs01_apicv_status = false;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ vmx_refresh_apicv_exec_ctrl(vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
The patch below does not apply to the 6.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.18.y
git checkout FETCH_HEAD
git cherry-pick -x 2f393c228cc519ddf19b8c6c05bf15723241aa96
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122907-grant-reformist-a323@gregkh' --subject-prefix 'PATCH 6.18.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2f393c228cc519ddf19b8c6c05bf15723241aa96 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda(a)linux.ibm.com>
Date: Tue, 4 Nov 2025 16:40:48 +0100
Subject: [PATCH] KVM: s390: Fix gmap_helper_zap_one_page() again
A few checks were missing in gmap_helper_zap_one_page(), which can lead
to memory corruption in the guest under specific circumstances.
Add the missing checks.
Fixes: 5deafa27d9ae ("KVM: s390: Fix to clear PTE when discarding a swapped page")
Cc: stable(a)vger.kernel.org
Reported-by: Marc Hartmayer <mhartmay(a)linux.ibm.com>
Tested-by: Marc Hartmayer <mhartmay(a)linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index 549f14ad08af..d41b19925a5a 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -47,6 +47,7 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
+ unsigned long pgstev;
spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep;
@@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
if (pte_swap(*ptep)) {
preempt_disable();
pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
- ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
- pte_clear(mm, vmaddr, ptep);
+ if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ (pgstev & _PGSTE_GPS_ZERO)) {
+ ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
+ pte_clear(mm, vmaddr, ptep);
+ }
pgste_set_unlock(ptep, pgste);
preempt_enable();
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x fc40459de82543b565ebc839dca8f7987f16f62e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122919-balancing-colony-41ee@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc40459de82543b565ebc839dca8f7987f16f62e Mon Sep 17 00:00:00 2001
From: Haoxiang Li <lihaoxiang(a)isrc.iscas.ac.cn>
Date: Wed, 10 Dec 2025 17:06:01 +0800
Subject: [PATCH] xfs: fix a memory leak in xfs_buf_item_init()
xfs_buf_item_get_format() may allocate memory for bip->bli_formats,
free the memory in the error path.
Fixes: c3d5f0c2fb85 ("xfs: complain if anyone tries to create a too-large buffer log item")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <lihaoxiang(a)isrc.iscas.ac.cn>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino(a)redhat.com>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8d85b5eee444..f4c5be67826e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -896,6 +896,7 @@ xfs_buf_item_init(
map_size = DIV_ROUND_UP(chunks, NBWORD);
if (map_size > XFS_BLF_DATAMAP_SIZE) {
+ xfs_buf_item_free_format(bip);
kmem_cache_free(xfs_buf_item_cache, bip);
xfs_err(mp,
"buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",
CephFS stores file data across multiple RADOS objects. An object is the
atomic unit of storage, so the writeback code must clean only folios
that belong to the same object with each OSD request.
CephFS also supports RAID0-style striping of file contents: if enabled,
each object stores multiple unbroken "stripe units" covering different
portions of the file; if disabled, a "stripe unit" is simply the whole
object. The stripe unit is (usually) reported as the inode's block size.
Though the writeback logic could, in principle, lock all dirty folios
belonging to the same object, its current design is to lock only a
single stripe unit at a time. Ever since this code was first written,
it has determined this size by checking the inode's block size.
However, the relatively-new fscrypt support needed to reduce the block
size for encrypted inodes to the crypto block size (see 'fixes' commit),
which causes an unnecessarily high number of write operations (~1024x as
many, with 4MiB objects) and grossly degraded performance.
Fix this (and clarify intent) by using i_layout.stripe_unit directly in
ceph_define_write_size() so that encrypted inodes are written back with
the same number of operations as if they were unencrypted.
Fixes: 94af0470924c ("ceph: add some fscrypt guardrails")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sam Edwards <CFSworks(a)gmail.com>
---
fs/ceph/addr.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b3569d44d510..cb1da8e27c2b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1000,7 +1000,8 @@ unsigned int ceph_define_write_size(struct address_space *mapping)
{
struct inode *inode = mapping->host;
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
- unsigned int wsize = i_blocksize(inode);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ unsigned int wsize = ci->i_layout.stripe_unit;
if (fsc->mount_options->wsize < wsize)
wsize = fsc->mount_options->wsize;
--
2.51.2
If `locked_pages` is zero, the page array must not be allocated:
ceph_process_folio_batch() uses `locked_pages` to decide when to
allocate `pages`, and redundant allocations trigger
ceph_allocate_page_array()'s BUG_ON(), resulting in a worker oops (and
writeback stall) or even a kernel panic. Consequently, the main loop in
ceph_writepages_start() assumes that the lifetime of `pages` is confined
to a single iteration.
The ceph_submit_write() function claims ownership of the page array on
success. But failures only redirty/unlock the pages and fail to free the
array, making the failure case in ceph_submit_write() fatal.
Free the page array in ceph_submit_write()'s error-handling 'if' block
so that the caller's invariant (that the array does not outlive the
iteration) is maintained unconditionally, allowing failures in
ceph_submit_write() to be recoverable as originally intended.
Fixes: 1551ec61dc55 ("ceph: introduce ceph_submit_write() method")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sam Edwards <CFSworks(a)gmail.com>
---
fs/ceph/addr.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 2b722916fb9b..91cc43950162 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1466,6 +1466,13 @@ int ceph_submit_write(struct address_space *mapping,
unlock_page(page);
}
+ if (ceph_wbc->from_pool) {
+ mempool_free(ceph_wbc->pages, ceph_wb_pagevec_pool);
+ ceph_wbc->from_pool = false;
+ } else
+ kfree(ceph_wbc->pages);
+ ceph_wbc->pages = NULL;
+
ceph_osdc_put_request(req);
return -EIO;
}
--
2.51.2
When fscrypt is enabled, move_dirty_folio_in_page_array() may fail
because it needs to allocate bounce buffers to store the encrypted
versions of each folio. Each folio beyond the first allocates its bounce
buffer with GFP_NOWAIT. Failures are common (and expected) under this
allocation mode; they should flush (not abort) the batch.
However, ceph_process_folio_batch() uses the same `rc` variable for its
own return code and for capturing the return codes of its routine calls;
failing to reset `rc` back to 0 results in the error being propagated
out to the main writeback loop, which cannot actually tolerate any
errors here: once `ceph_wbc.pages` is allocated, it must be passed to
ceph_submit_write() to be freed. If it survives until the next iteration
(e.g. due to the goto being followed), ceph_allocate_page_array()'s
BUG_ON() will oops the worker. (Subsequent patches in this series make
the loop more robust.)
Note that this failure mode is currently masked due to another bug
(addressed later in this series) that prevents multiple encrypted folios
from being selected for the same write.
For now, just reset `rc` when redirtying the folio and prevent the
error from propagating. After this change, ceph_process_folio_batch() no
longer returns errors; its only remaining failure indicator is
`locked_pages == 0`, which the caller already handles correctly. The
next patch in this series addresses this.
Fixes: ce80b76dd327 ("ceph: introduce ceph_process_folio_batch() method")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sam Edwards <CFSworks(a)gmail.com>
---
fs/ceph/addr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 63b75d214210..3462df35d245 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1369,6 +1369,7 @@ int ceph_process_folio_batch(struct address_space *mapping,
rc = move_dirty_folio_in_page_array(mapping, wbc, ceph_wbc,
folio);
if (rc) {
+ rc = 0;
folio_redirty_for_writepage(wbc, folio);
folio_unlock(folio);
break;
--
2.51.2
If damon_call() is executed against a DAMON context that is not running,
the function returns error while keeping the damon_call_control object
linked to the context's call_controls list. Let's suppose the object is
deallocated after the damon_call(), and yet another damon_call() is
executed against the same context. The function tries to add the new
damon_call_control object to the call_controls list, which still has the
pointer to the previous damon_call_control object, which is deallocated.
As a result, use-after-free happens.
This can actually be triggered using the DAMON sysfs interface. It is not
easily exploitable since it requires the sysfs write permission and making
a definitely weird file writes, though. Please refer to the report for
more details about the issue reproduction steps.
Fix the issue by making two changes. Firstly, move the final
kdamond_call() for cancelling all existing damon_call() requests from
terminating DAMON context to be done before the ctx->kdamond reset.
This makes any code that sees NULL ctx->kdamond can safely assume the
context may not access damon_call() requests anymore. Secondly, let
damon_call() to cleanup the damon_call_control objects that were added
to the already-terminated DAMON context, before returning the error.
Fixes: 004ded6bee11 ("mm/damon: accept parallel damon_call() requests")
Reported-by: JaeJoon Jung <rgbi3307(a)gmail.com>
Closes: https://lore.kernel.org/20251224094401.20384-1-rgbi3307@gmail.com
Cc: <stable(a)vger.kernel.org> # 6.17.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
Changes from v1
(https://lkml.kernel.org/r/20251228183105.289441-1-sj@kernel.org):
- Do final kdamond_call() before ctx->kdamond reset.
- Fix Fixes: tag.
mm/damon/core.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 2d3e8006db50..199529dd7c66 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1442,6 +1442,35 @@ bool damon_is_running(struct damon_ctx *ctx)
return running;
}
+/*
+ * damon_call_handle_inactive_ctx() - handle DAMON call request that added to
+ * an inactive context.
+ * @ctx: The inactive DAMON context.
+ * @control: Control variable of the call request.
+ *
+ * This function is called in a case that @control is added to @ctx but @ctx is
+ * not running (inactive). See if @ctx handled @control or not, and cleanup
+ * @control if it was not handled.
+ *
+ * Returns 0 if @control was handled by @ctx, negative error code otherwise.
+ */
+static int damon_call_handle_inactive_ctx(
+ struct damon_ctx *ctx, struct damon_call_control *control)
+{
+ struct damon_call_control *c;
+
+ mutex_lock(&ctx->call_controls_lock);
+ list_for_each_entry(c, &ctx->call_controls, list) {
+ if (c == control) {
+ list_del(&control->list);
+ mutex_unlock(&ctx->call_controls_lock);
+ return -EINVAL;
+ }
+ }
+ mutex_unlock(&ctx->call_controls_lock);
+ return 0;
+}
+
/**
* damon_call() - Invoke a given function on DAMON worker thread (kdamond).
* @ctx: DAMON context to call the function for.
@@ -1472,7 +1501,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
list_add_tail(&control->list, &ctx->call_controls);
mutex_unlock(&ctx->call_controls_lock);
if (!damon_is_running(ctx))
- return -EINVAL;
+ return damon_call_handle_inactive_ctx(ctx, control);
if (control->repeat)
return 0;
wait_for_completion(&control->completion);
@@ -2797,13 +2826,13 @@ static int kdamond_fn(void *data)
if (ctx->ops.cleanup)
ctx->ops.cleanup(ctx);
kfree(ctx->regions_score_histogram);
+ kdamond_call(ctx, true);
pr_debug("kdamond (%d) finishes\n", current->pid);
mutex_lock(&ctx->kdamond_lock);
ctx->kdamond = NULL;
mutex_unlock(&ctx->kdamond_lock);
- kdamond_call(ctx, true);
damos_walk_cancel(ctx);
mutex_lock(&damon_lock);
base-commit: 40fd05d807b3a7678e3284b8a9a6cb89a32fa8ce
--
2.47.3
When starting multi-core loongarch virtualization on loongarch physical
machine, loading livepatch on the physical machine will cause an error
similar to the following:
[ 411.686289] livepatch: klp_try_switch_task: CPU 31/KVM:3116 has an
unreliable stack
The specific test steps are as follows:
1.Start a multi-core virtual machine on a physical machine
2.Enter the following command on the physical machine to turn on the debug
switch:
echo "file kernel/livepatch/transition.c +p" > /sys/kernel/debug/\
dynamic_debug/control
3.Load livepatch:
modprobe livepatch-sample
Through the above steps, similar prints can be viewed in dmesg.
The reason for this issue is that the code of the kvm_exc_entry function
was copied in the function kvm_loongarch_env_init. When the cpu needs to
execute kvm_exc_entry, it will switch to the copied address for execution.
The new address of the kvm_exc_entry function cannot be recognized in ORC,
which eventually leads to the arch_stack_walk_reliable function returning
an error and printing an exception message.
To solve the above problems, we directly compile the switch.S file into
the kernel instead of the module. In this way, the function kvm_exc_entry
will no longer need to be copied.
changlog:
V3<-V2:
1.Replace the EXPORT_SYMBOL macro declaration symbol with the
EXPORT_SYMBOL_FOR_KVM macro
2.Add some comments in kvm_enter_guest
3.Place the correct pc address in era
4.Move .p2align after .text
V2<-V1:
1.Rollback the modification of function parameter types such as
kvm_save_fpu. In the asm-prototypes.h header file, only the parameter types
it depends on are included
Cc: Huacai Chen <chenhuacai(a)kernel.org>
Cc: WANG Xuerui <kernel(a)xen0n.name>
Cc: Tianrui Zhao <zhaotianrui(a)loongson.cn>
Cc: Bibo Mao <maobibo(a)loongson.cn>
Cc: Charlie Jenkins <charlie(a)rivosinc.com>
Cc: Xianglai Li <lixianglai(a)loongson.cn>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Cc: Tiezhu Yang <yangtiezhu(a)loongson.cn>
Xianglai Li (2):
LoongArch: KVM: Compile the switch.S file directly into the kernel
LoongArch: KVM: fix "unreliable stack" issue
arch/loongarch/Kbuild | 2 +-
arch/loongarch/include/asm/asm-prototypes.h | 21 +++++++++++++
arch/loongarch/include/asm/kvm_host.h | 3 --
arch/loongarch/kvm/Makefile | 2 +-
arch/loongarch/kvm/main.c | 35 ++-------------------
arch/loongarch/kvm/switch.S | 32 ++++++++++++++++---
6 files changed, 53 insertions(+), 42 deletions(-)
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
--
2.39.1
Since commit 4b47a3aefb29 ("kbuild: Restore pattern to avoid stripping
.rela.dyn from vmlinux") vmlinux has .rel*.dyn preserved. Therefore, use
vmlinux to produce Image, not vmlinux.unstripped.
Doing so fixes booting a RELOCATABLE=y Image with kexec. The problem is
caused by this chain of events:
- Since commit 3e86e4d74c04 ("kbuild: keep .modinfo section in
vmlinux.unstripped"), vmlinux.unstripped gets a .modinfo section.
- The .modinfo section has SHF_ALLOC, so it ends up in Image, at the end
of it.
- The Image header's image_size field does not expect to include
.modinfo and does not account for it, since it should not be in Image.
- If .modinfo is large enough, the file size of Image ends up larger
than image_size, which eventually leads to it failing
sanity_check_segment_list().
Using vmlinux instead of vmlinux.unstripped means that the unexpected
.modinfo section is gone from Image, fixing the file size problem.
Cc: stable(a)vger.kernel.org
Fixes: 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped")
Signed-off-by: Vivian Wang <wangruikang(a)iscas.ac.cn>
---
arch/riscv/boot/Makefile | 4 ----
1 file changed, 4 deletions(-)
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index bfc3d0b75b9b..5301adf5f3f5 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -31,11 +31,7 @@ $(obj)/xipImage: vmlinux FORCE
endif
-ifdef CONFIG_RELOCATABLE
-$(obj)/Image: vmlinux.unstripped FORCE
-else
$(obj)/Image: vmlinux FORCE
-endif
$(call if_changed,objcopy)
$(obj)/Image.gz: $(obj)/Image FORCE
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20251230-riscv-vmlinux-not-unstripped-30ec0c930fd2
Best regards,
--
Vivian "dramforever" Wang
As reported, ever since commit 1013af4f585f ("mm/hugetlb: fix
huge_pmd_unshare() vs GUP-fast race") we can end up in some situations
where we perform so many IPI broadcasts when unsharing hugetlb PMD page
tables that it severely regresses some workloads.
In particular, when we fork()+exit(), or when we munmap() a large
area backed by many shared PMD tables, we perform one IPI broadcast per
unshared PMD table.
There are two optimizations to be had:
(1) When we process (unshare) multiple such PMD tables, such as during
exit(), it is sufficient to send a single IPI broadcast (as long as
we respect locking rules) instead of one per PMD table.
Locking prevents that any of these PMD tables could get reused before
we drop the lock.
(2) When we are not the last sharer (> 2 users including us), there is
no need to send the IPI broadcast. The shared PMD tables cannot
become exclusive (fully unshared) before an IPI will be broadcasted
by the last sharer.
Concurrent GUP-fast could walk into a PMD table just before we
unshared it. It could then succeed in grabbing a page from the
shared page table even after munmap() etc succeeded (and supressed
an IPI). But there is not difference compared to GUP-fast just
sleeping for a while after grabbing the page and re-enabling IRQs.
Most importantly, GUP-fast will never walk into page tables that are
no-longer shared, because the last sharer will issue an IPI
broadcast.
(if ever required, checking whether the PUD changed in GUP-fast
after grabbing the page like we do in the PTE case could handle
this)
So let's rework PMD sharing TLB flushing + IPI sync to use the mmu_gather
infrastructure so we can implement these optimizations and demystify the
code at least a bit. Extend the mmu_gather infrastructure to be able to
deal with our special hugetlb PMD table sharing implementation.
To make initialization of the mmu_gather easier when working on a single
VMA (in particular, when dealing with hugetlb), provide
tlb_gather_mmu_vma().
We'll consolidate the handling for (full) unsharing of PMD tables in
tlb_unshare_pmd_ptdesc() and tlb_flush_unshared_tables(), and track
in "struct mmu_gather" whether we had (full) unsharing of PMD tables.
Because locking is very special (concurrent unsharing+reuse must be
prevented), we disallow deferring flushing to tlb_finish_mmu() and instead
require an explicit earlier call to tlb_flush_unshared_tables().
From hugetlb code, we call huge_pmd_unshare_flush() where we make sure
that the expected lock protecting us from concurrent unsharing+reuse is
still held.
Check with a VM_WARN_ON_ONCE() in tlb_finish_mmu() that
tlb_flush_unshared_tables() was properly called earlier.
Document it all properly.
Notes about tlb_remove_table_sync_one() interaction with unsharing:
There are two fairly tricky things:
(1) tlb_remove_table_sync_one() is a NOP on architectures without
CONFIG_MMU_GATHER_RCU_TABLE_FREE.
Here, the assumption is that the previous TLB flush would send an
IPI to all relevant CPUs. Careful: some architectures like x86 only
send IPIs to all relevant CPUs when tlb->freed_tables is set.
The relevant architectures should be selecting
MMU_GATHER_RCU_TABLE_FREE, but x86 might not do that in stable
kernels and it might have been problematic before this patch.
Also, the arch flushing behavior (independent of IPIs) is different
when tlb->freed_tables is set. Do we have to enlighten them to also
take care of tlb->unshared_tables? So far we didn't care, so
hopefully we are fine. Of course, we could be setting
tlb->freed_tables as well, but that might then unnecessarily flush
too much, because the semantics of tlb->freed_tables are a bit
fuzzy.
This patch changes nothing in this regard.
(2) tlb_remove_table_sync_one() is not a NOP on architectures with
CONFIG_MMU_GATHER_RCU_TABLE_FREE that actually don't need a sync.
Take x86 as an example: in the common case (!pv, !X86_FEATURE_INVLPGB)
we still issue IPIs during TLB flushes and don't actually need the
second tlb_remove_table_sync_one().
This optimized can be implemented on top of this, by checking e.g., in
tlb_remove_table_sync_one() whether we really need IPIs. But as
described in (1), it really must honor tlb->freed_tables then to
send IPIs to all relevant CPUs.
Notes on TLB flushing changes:
(1) Flushing for non-shared PMD tables
We're converting from flush_hugetlb_tlb_range() to
tlb_remove_huge_tlb_entry(). Given that we properly initialize the
MMU gather in tlb_gather_mmu_vma() to be hugetlb aware, similar to
__unmap_hugepage_range(), that should be fine.
(2) Flushing for shared PMD tables
We're converting from various things (flush_hugetlb_tlb_range(),
tlb_flush_pmd_range(), flush_tlb_range()) to tlb_flush_pmd_range().
tlb_flush_pmd_range() achieves the same that
tlb_remove_huge_tlb_entry() would achieve in these scenarios.
Note that tlb_remove_huge_tlb_entry() also calls
__tlb_remove_tlb_entry(), however that is only implemented on
powerpc, which does not support PMD table sharing.
Similar to (1), tlb_gather_mmu_vma() should make sure that TLB
flushing keeps on working as expected.
Further, note that the ptdesc_pmd_pts_dec() in huge_pmd_share() is not a
concern, as we are holding the i_mmap_lock the whole time, preventing
concurrent unsharing. That ptdesc_pmd_pts_dec() usage will be removed
separately as a cleanup later.
There are plenty more cleanups to be had, but they have to wait until
this is fixed.
Fixes: 1013af4f585f ("mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race")
Reported-by: Uschakow, Stanislav" <suschako(a)amazon.de>
Closes: https://lore.kernel.org/all/4d3878531c76479d9f8ca9789dc6485d@amazon.de/
Tested-by: Laurence Oberman <loberman(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: David Hildenbrand (Red Hat) <david(a)kernel.org>
---
include/asm-generic/tlb.h | 77 +++++++++++++++++++++++-
include/linux/hugetlb.h | 15 +++--
include/linux/mm_types.h | 1 +
mm/hugetlb.c | 123 ++++++++++++++++++++++----------------
mm/mmu_gather.c | 33 ++++++++++
mm/rmap.c | 25 +++++---
6 files changed, 208 insertions(+), 66 deletions(-)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 1fff717cae510..4d679d2a206b4 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -46,7 +46,8 @@
*
* The mmu_gather API consists of:
*
- * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
+ * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() /
+ * tlb_finish_mmu()
*
* start and finish a mmu_gather
*
@@ -364,6 +365,20 @@ struct mmu_gather {
unsigned int vma_huge : 1;
unsigned int vma_pfn : 1;
+ /*
+ * Did we unshare (unmap) any shared page tables? For now only
+ * used for hugetlb PMD table sharing.
+ */
+ unsigned int unshared_tables : 1;
+
+ /*
+ * Did we unshare any page tables such that they are now exclusive
+ * and could get reused+modified by the new owner? When setting this
+ * flag, "unshared_tables" will be set as well. For now only used
+ * for hugetlb PMD table sharing.
+ */
+ unsigned int fully_unshared_tables : 1;
+
unsigned int batch_count;
#ifndef CONFIG_MMU_GATHER_NO_GATHER
@@ -400,6 +415,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
tlb->cleared_pmds = 0;
tlb->cleared_puds = 0;
tlb->cleared_p4ds = 0;
+ tlb->unshared_tables = 0;
/*
* Do not reset mmu_gather::vma_* fields here, we do not
* call into tlb_start_vma() again to set them if there is an
@@ -484,7 +500,7 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
* these bits.
*/
if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
- tlb->cleared_puds || tlb->cleared_p4ds))
+ tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables))
return;
tlb_flush(tlb);
@@ -773,6 +789,63 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
}
#endif
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt,
+ unsigned long addr)
+{
+ /*
+ * The caller must make sure that concurrent unsharing + exclusive
+ * reuse is impossible until tlb_flush_unshared_tables() was called.
+ */
+ VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt));
+ ptdesc_pmd_pts_dec(pt);
+
+ /* Clearing a PUD pointing at a PMD table with PMD leaves. */
+ tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE);
+
+ /*
+ * If the page table is now exclusively owned, we fully unshared
+ * a page table.
+ */
+ if (!ptdesc_pmd_is_shared(pt))
+ tlb->fully_unshared_tables = true;
+ tlb->unshared_tables = true;
+}
+
+static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb)
+{
+ /*
+ * As soon as the caller drops locks to allow for reuse of
+ * previously-shared tables, these tables could get modified and
+ * even reused outside of hugetlb context, so we have to make sure that
+ * any page table walkers (incl. TLB, GUP-fast) are aware of that
+ * change.
+ *
+ * Even if we are not fully unsharing a PMD table, we must
+ * flush the TLB for the unsharer now.
+ */
+ if (tlb->unshared_tables)
+ tlb_flush_mmu_tlbonly(tlb);
+
+ /*
+ * Similarly, we must make sure that concurrent GUP-fast will not
+ * walk previously-shared page tables that are getting modified+reused
+ * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast.
+ *
+ * We only perform this when we are the last sharer of a page table,
+ * as the IPI will reach all CPUs: any GUP-fast.
+ *
+ * Note that on configs where tlb_remove_table_sync_one() is a NOP,
+ * the expectation is that the tlb_flush_mmu_tlbonly() would have issued
+ * required IPIs already for us.
+ */
+ if (tlb->fully_unshared_tables) {
+ tlb_remove_table_sync_one();
+ tlb->fully_unshared_tables = false;
+ }
+}
+#endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
+
#endif /* CONFIG_MMU */
#endif /* _ASM_GENERIC__TLB_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 03c8725efa289..e51b8ef0cebd9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -240,8 +240,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
unsigned long hugetlb_mask_last_page(struct hstate *h);
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep);
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
@@ -300,13 +301,17 @@ static inline struct address_space *hugetlb_folio_mapping_lock_write(
return NULL;
}
-static inline int huge_pmd_unshare(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+static inline int huge_pmd_unshare(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
return 0;
}
+static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb,
+ struct vm_area_struct *vma)
+{
+}
+
static inline void adjust_range_if_pmd_sharing_possible(
struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 42af2292951d4..d1053b2c1f800 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1522,6 +1522,7 @@ static inline unsigned int mm_cid_size(void)
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
+void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma);
extern void tlb_finish_mmu(struct mmu_gather *tlb);
struct vm_fault;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c77cdef12a32..2609b6d58f99e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5096,7 +5096,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
unsigned long last_addr_mask;
pte_t *src_pte, *dst_pte;
struct mmu_notifier_range range;
- bool shared_pmd = false;
+ struct mmu_gather tlb;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, old_addr,
old_end);
@@ -5106,6 +5106,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
* range.
*/
flush_cache_range(vma, range.start, range.end);
+ tlb_gather_mmu_vma(&tlb, vma);
mmu_notifier_invalidate_range_start(&range);
last_addr_mask = hugetlb_mask_last_page(h);
@@ -5122,8 +5123,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
if (huge_pte_none(huge_ptep_get(mm, old_addr, src_pte)))
continue;
- if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
- shared_pmd = true;
+ if (huge_pmd_unshare(&tlb, vma, old_addr, src_pte)) {
old_addr |= last_addr_mask;
new_addr |= last_addr_mask;
continue;
@@ -5134,15 +5134,16 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
break;
move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz);
+ tlb_remove_huge_tlb_entry(h, &tlb, src_pte, old_addr);
}
- if (shared_pmd)
- flush_hugetlb_tlb_range(vma, range.start, range.end);
- else
- flush_hugetlb_tlb_range(vma, old_end - len, old_end);
+ tlb_flush_mmu_tlbonly(&tlb);
+ huge_pmd_unshare_flush(&tlb, vma);
+
mmu_notifier_invalidate_range_end(&range);
i_mmap_unlock_write(mapping);
hugetlb_vma_unlock_write(vma);
+ tlb_finish_mmu(&tlb);
return len + old_addr - old_end;
}
@@ -5161,7 +5162,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long sz = huge_page_size(h);
bool adjust_reservation;
unsigned long last_addr_mask;
- bool force_flush = false;
WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
@@ -5184,10 +5184,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, address, ptep)) {
+ if (huge_pmd_unshare(tlb, vma, address, ptep)) {
spin_unlock(ptl);
- tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
- force_flush = true;
address |= last_addr_mask;
continue;
}
@@ -5303,14 +5301,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
tlb_end_vma(tlb, vma);
- /*
- * There is nothing protecting a previously-shared page table that we
- * unshared through huge_pmd_unshare() from getting freed after we
- * release i_mmap_rwsem, so flush the TLB now. If huge_pmd_unshare()
- * succeeded, flush the range corresponding to the pud.
- */
- if (force_flush)
- tlb_flush_mmu_tlbonly(tlb);
+ huge_pmd_unshare_flush(tlb, vma);
}
void __hugetlb_zap_begin(struct vm_area_struct *vma,
@@ -6409,11 +6400,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
long pages = 0, psize = huge_page_size(h);
- bool shared_pmd = false;
struct mmu_notifier_range range;
unsigned long last_addr_mask;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ struct mmu_gather tlb;
/*
* In the case of shared PMDs, the area to flush could be beyond
@@ -6426,6 +6417,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
BUG_ON(address >= end);
flush_cache_range(vma, range.start, range.end);
+ tlb_gather_mmu_vma(&tlb, vma);
mmu_notifier_invalidate_range_start(&range);
hugetlb_vma_lock_write(vma);
@@ -6452,7 +6444,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
}
}
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, address, ptep)) {
+ if (huge_pmd_unshare(&tlb, vma, address, ptep)) {
/*
* When uffd-wp is enabled on the vma, unshare
* shouldn't happen at all. Warn about it if it
@@ -6461,7 +6453,6 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
WARN_ON_ONCE(uffd_wp || uffd_wp_resolve);
pages++;
spin_unlock(ptl);
- shared_pmd = true;
address |= last_addr_mask;
continue;
}
@@ -6522,22 +6513,16 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pte = huge_pte_clear_uffd_wp(pte);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
+ tlb_remove_huge_tlb_entry(h, &tlb, ptep, address);
}
next:
spin_unlock(ptl);
cond_resched();
}
- /*
- * There is nothing protecting a previously-shared page table that we
- * unshared through huge_pmd_unshare() from getting freed after we
- * release i_mmap_rwsem, so flush the TLB now. If huge_pmd_unshare()
- * succeeded, flush the range corresponding to the pud.
- */
- if (shared_pmd)
- flush_hugetlb_tlb_range(vma, range.start, range.end);
- else
- flush_hugetlb_tlb_range(vma, start, end);
+
+ tlb_flush_mmu_tlbonly(&tlb);
+ huge_pmd_unshare_flush(&tlb, vma);
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs() we are
* downgrading page table protection not changing it to point to a new
@@ -6548,6 +6533,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
mmu_notifier_invalidate_range_end(&range);
+ tlb_finish_mmu(&tlb);
return pages > 0 ? (pages << h->order) : pages;
}
@@ -6904,18 +6890,27 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
return pte;
}
-/*
- * unmap huge page backed by shared pte.
+/**
+ * huge_pmd_unshare - Unmap a pmd table if it is shared by multiple users
+ * @tlb: the current mmu_gather.
+ * @vma: the vma covering the pmd table.
+ * @addr: the address we are trying to unshare.
+ * @ptep: pointer into the (pmd) page table.
+ *
+ * Called with the page table lock held, the i_mmap_rwsem held in write mode
+ * and the hugetlb vma lock held in write mode.
*
- * Called with page table lock held.
+ * Note: The caller must call huge_pmd_unshare_flush() before dropping the
+ * i_mmap_rwsem.
*
- * returns: 1 successfully unmapped a shared pte page
- * 0 the underlying pte page is not shared, or it is the last user
+ * Returns: 1 if it was a shared PMD table and it got unmapped, or 0 if it
+ * was not a shared PMD table.
*/
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
unsigned long sz = huge_page_size(hstate_vma(vma));
+ struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd = pgd_offset(mm, addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
@@ -6927,18 +6922,36 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
hugetlb_vma_assert_locked(vma);
pud_clear(pud);
- /*
- * Once our caller drops the rmap lock, some other process might be
- * using this page table as a normal, non-hugetlb page table.
- * Wait for pending gup_fast() in other threads to finish before letting
- * that happen.
- */
- tlb_remove_table_sync_one();
- ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
+
+ tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr);
+
mm_dec_nr_pmds(mm);
return 1;
}
+/*
+ * huge_pmd_unshare_flush - Complete a sequence of huge_pmd_unshare() calls
+ * @tlb: the current mmu_gather.
+ * @vma: the vma covering the pmd table.
+ *
+ * Perform necessary TLB flushes or IPI broadcasts to synchronize PMD table
+ * unsharing with concurrent page table walkers.
+ *
+ * This function must be called after a sequence of huge_pmd_unshare()
+ * calls while still holding the i_mmap_rwsem.
+ */
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ /*
+ * We must synchronize page table unsharing such that nobody will
+ * try reusing a previously-shared page table while it might still
+ * be in use by previous sharers (TLB, GUP_fast).
+ */
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
+ tlb_flush_unshared_tables(tlb);
+}
+
#else /* !CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -6947,12 +6960,16 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
return NULL;
}
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
return 0;
}
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+}
+
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
@@ -7219,6 +7236,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long sz = huge_page_size(h);
struct mm_struct *mm = vma->vm_mm;
struct mmu_notifier_range range;
+ struct mmu_gather tlb;
unsigned long address;
spinlock_t *ptl;
pte_t *ptep;
@@ -7230,6 +7248,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
return;
flush_cache_range(vma, start, end);
+ tlb_gather_mmu_vma(&tlb, vma);
+
/*
* No need to call adjust_range_if_pmd_sharing_possible(), because
* we have already done the PUD_SIZE alignment.
@@ -7248,10 +7268,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- huge_pmd_unshare(mm, vma, address, ptep);
+ huge_pmd_unshare(&tlb, vma, address, ptep);
spin_unlock(ptl);
}
- flush_hugetlb_tlb_range(vma, start, end);
+ huge_pmd_unshare_flush(&tlb, vma);
if (take_locks) {
i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
@@ -7261,6 +7281,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
* Documentation/mm/mmu_notifier.rst.
*/
mmu_notifier_invalidate_range_end(&range);
+ tlb_finish_mmu(&tlb);
}
/*
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 247e3f9db6c7a..cd32c2dbf501b 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -10,6 +10,7 @@
#include <linux/swap.h>
#include <linux/rmap.h>
#include <linux/pgalloc.h>
+#include <linux/hugetlb.h>
#include <asm/tlb.h>
@@ -426,6 +427,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
#endif
tlb->vma_pfn = 0;
+ tlb->fully_unshared_tables = 0;
__tlb_reset_range(tlb);
inc_tlb_flush_pending(tlb->mm);
}
@@ -459,6 +461,31 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
__tlb_gather_mmu(tlb, mm, true);
}
+/**
+ * tlb_gather_mmu - initialize an mmu_gather structure for operating on a single
+ * VMA
+ * @tlb: the mmu_gather structure to initialize
+ * @vma: the vm_area_struct
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for operating on
+ * a single VMA. In contrast to tlb_gather_mmu(), calling this function will
+ * not require another call to tlb_start_vma(). In contrast to tlb_start_vma(),
+ * this function will *not* call flush_cache_range().
+ *
+ * For hugetlb VMAs, this function will also initialize the mmu_gather
+ * page_size accordingly, not requiring a separate call to
+ * tlb_change_page_size().
+ *
+ */
+void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ tlb_gather_mmu(tlb, vma->vm_mm);
+ tlb_update_vma_flags(tlb, vma);
+ if (is_vm_hugetlb_page(vma))
+ /* All entries have the same size. */
+ tlb_change_page_size(tlb, huge_page_size(hstate_vma(vma)));
+}
+
/**
* tlb_finish_mmu - finish an mmu_gather structure
* @tlb: the mmu_gather structure to finish
@@ -468,6 +495,12 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
*/
void tlb_finish_mmu(struct mmu_gather *tlb)
{
+ /*
+ * We expect an earlier huge_pmd_unshare_flush() call to sort this out,
+ * due to complicated locking requirements with page table unsharing.
+ */
+ VM_WARN_ON_ONCE(tlb->fully_unshared_tables);
+
/*
* If there are parallel threads are doing PTE changes on same range
* under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
diff --git a/mm/rmap.c b/mm/rmap.c
index 748f48727a162..7b9879ef442d9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -76,7 +76,7 @@
#include <linux/mm_inline.h>
#include <linux/oom.h>
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
#define CREATE_TRACE_POINTS
#include <trace/events/migrate.h>
@@ -2008,13 +2008,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* if unsuccessful.
*/
if (!anon) {
+ struct mmu_gather tlb;
+
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (!hugetlb_vma_trylock_write(vma))
goto walk_abort;
- if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+
+ tlb_gather_mmu_vma(&tlb, vma);
+ if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) {
hugetlb_vma_unlock_write(vma);
- flush_tlb_range(vma,
- range.start, range.end);
+ huge_pmd_unshare_flush(&tlb, vma);
+ tlb_finish_mmu(&tlb);
/*
* The PMD table was unmapped,
* consequently unmapping the folio.
@@ -2022,6 +2026,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
goto walk_done;
}
hugetlb_vma_unlock_write(vma);
+ tlb_finish_mmu(&tlb);
}
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
if (pte_dirty(pteval))
@@ -2398,17 +2403,20 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
* fail if unsuccessful.
*/
if (!anon) {
+ struct mmu_gather tlb;
+
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (!hugetlb_vma_trylock_write(vma)) {
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;
}
- if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
- hugetlb_vma_unlock_write(vma);
- flush_tlb_range(vma,
- range.start, range.end);
+ tlb_gather_mmu_vma(&tlb, vma);
+ if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) {
+ hugetlb_vma_unlock_write(vma);
+ huge_pmd_unshare_flush(&tlb, vma);
+ tlb_finish_mmu(&tlb);
/*
* The PMD table was unmapped,
* consequently unmapping the folio.
@@ -2417,6 +2425,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
break;
}
hugetlb_vma_unlock_write(vma);
+ tlb_finish_mmu(&tlb);
}
/* Nuke the hugetlb page table entry */
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
--
2.52.0
Commit 7f9ab862e05c ("leds: spi-byte: Call of_node_put() on error path")
was merged in 6.11 and then backported to stable trees through 5.10. It
relocates the line that initializes the variable 'child' to a later
point in spi_byte_probe().
Versions < 6.9 do not have commit ccc35ff2fd29 ("leds: spi-byte: Use
devm_led_classdev_register_ext()"), which removes a line that reads a
property from 'child' before its new initialization point. Consequently,
spi_byte_probe() reads from an uninitialized device node in stable
kernels 6.6-5.10.
Initialize 'child' before it is first accessed.
Fixes: 7f9ab862e05c ("leds: spi-byte: Call of_node_put() on error path")
Signed-off-by: Tiffany Yang <ynaffit(a)google.com>
---
As an alternative to moving the initialization of 'child' up,
Fedor Pchelkin proposed [1] backporting the change that removes the
intermediate access.
[1] https://lore.kernel.org/stable/20241029204128.527033-1-pchelkin@ispras.ru/
---
drivers/leds/leds-spi-byte.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c
index afe9bff7c7c1..4520df1e2341 100644
--- a/drivers/leds/leds-spi-byte.c
+++ b/drivers/leds/leds-spi-byte.c
@@ -96,6 +96,7 @@ static int spi_byte_probe(struct spi_device *spi)
if (!led)
return -ENOMEM;
+ child = of_get_next_available_child(dev_of_node(dev), NULL);
of_property_read_string(child, "label", &name);
strscpy(led->name, name, sizeof(led->name));
led->spi = spi;
@@ -106,7 +107,6 @@ static int spi_byte_probe(struct spi_device *spi)
led->ldev.max_brightness = led->cdef->max_value - led->cdef->off_value;
led->ldev.brightness_set_blocking = spi_byte_brightness_set_blocking;
- child = of_get_next_available_child(dev_of_node(dev), NULL);
state = of_get_property(child, "default-state", NULL);
if (state) {
if (!strcmp(state, "on")) {
--
2.52.0.351.gbe84eed79e-goog
The patch titled
Subject: x86/kexec: add a sanity check on previous kernel's ima kexec buffer
has been added to the -mm mm-nonmm-unstable branch. Its filename is
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: x86/kexec: add a sanity check on previous kernel's ima kexec buffer
Date: Mon, 29 Dec 2025 00:15:23 -0800
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>", the physical range that contains the carried
over IMA measurement list may fall outside the truncated RAM leading to a
kernel panic.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) ��� not-present page
Other architectures already validate the range with page_is_ram(), as done
in commit cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds") do a similar check on x86.
Without carrying the measurement list across kexec, the attestation would
fail.
Link: https://lkml.kernel.org/r/20251229081523.622515-4-harshit.m.mogalapalli@ora…
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Reported-by: Paul Webb <paul.x.webb(a)oracle.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Jonathan McDowell <noodles(a)fb.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/kernel/setup.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/arch/x86/kernel/setup.c~x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer
+++ a/arch/x86/kernel/setup.c
@@ -439,9 +439,15 @@ int __init ima_free_kexec_buffer(void)
int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
+ int ret;
+
if (!ima_kexec_buffer_size)
return -ENOENT;
+ ret = ima_validate_range(ima_kexec_buffer_phys, ima_kexec_buffer_size);
+ if (ret)
+ return ret;
+
*addr = __va(ima_kexec_buffer_phys);
*size = ima_kexec_buffer_size;
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
ima-add-ima_validate_range-for-previous-kernel-ima-buffer.patch
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
The patch titled
Subject: of/kexec: refactor ima_get_kexec_buffer() to use ima_validate_range()
has been added to the -mm mm-nonmm-unstable branch. Its filename is
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: of/kexec: refactor ima_get_kexec_buffer() to use ima_validate_range()
Date: Mon, 29 Dec 2025 00:15:22 -0800
Refactor the OF/DT ima_get_kexec_buffer() to use a generic helper to
validate the address range. No functional change intended.
Link: https://lkml.kernel.org/r/20251229081523.622515-3-harshit.m.mogalapalli@ora…
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Jonathan McDowell <noodles(a)fb.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Paul Webb <paul.x.webb(a)oracle.com>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/of/kexec.c | 15 +++------------
1 file changed, 3 insertions(+), 12 deletions(-)
--- a/drivers/of/kexec.c~of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range
+++ a/drivers/of/kexec.c
@@ -128,7 +128,6 @@ int __init ima_get_kexec_buffer(void **a
{
int ret, len;
unsigned long tmp_addr;
- unsigned long start_pfn, end_pfn;
size_t tmp_size;
const void *prop;
@@ -144,17 +143,9 @@ int __init ima_get_kexec_buffer(void **a
if (!tmp_size)
return -ENOENT;
- /*
- * Calculate the PFNs for the buffer and ensure
- * they are with in addressable memory.
- */
- start_pfn = PHYS_PFN(tmp_addr);
- end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1);
- if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) {
- pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n",
- tmp_addr, tmp_size);
- return -EINVAL;
- }
+ ret = ima_validate_range(tmp_addr, tmp_size);
+ if (ret)
+ return ret;
*addr = __va(tmp_addr);
*size = tmp_size;
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
ima-add-ima_validate_range-for-previous-kernel-ima-buffer.patch
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
The patch titled
Subject: ima: add ima_validate_range() for previous kernel IMA buffer
has been added to the -mm mm-nonmm-unstable branch. Its filename is
ima-add-ima_validate_range-for-previous-kernel-ima-buffer.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: ima: add ima_validate_range() for previous kernel IMA buffer
Date: Mon, 29 Dec 2025 00:15:21 -0800
Patch series "Address page fault in ima_restore_measurement_list()", v2.
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>" we observe a pafe fault that happens.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) not-present page
This happens on x86_64 only, as this is already fixed in aarch64 in
commit: cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds")
This patch (of 3):
When the second-stage kernel is booted with a limiting command line (e.g.
"mem=<size>"), the IMA measurement buffer handed over from the previous
kernel may fall outside the addressable RAM of the new kernel. Accessing
such a buffer can fault during early restore.
Introduce a small generic helper, ima_validate_range(), which verifies
that a physical [start, end] range for the previous-kernel IMA buffer lies
within addressable memory:
- On x86, use pfn_range_is_mapped().
- On OF based architectures, use page_is_ram().
Link: https://lkml.kernel.org/r/20251229081523.622515-1-harshit.m.mogalapalli@ora…
Link: https://lkml.kernel.org/r/20251229081523.622515-2-harshit.m.mogalapalli@ora…
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Jonathan McDowell <noodles(a)fb.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Paul Webb <paul.x.webb(a)oracle.com>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/ima.h | 1
security/integrity/ima/ima_kexec.c | 35 +++++++++++++++++++++++++++
2 files changed, 36 insertions(+)
--- a/include/linux/ima.h~ima-add-ima_validate_range-for-previous-kernel-ima-buffer
+++ a/include/linux/ima.h
@@ -69,6 +69,7 @@ static inline int ima_measure_critical_d
#ifdef CONFIG_HAVE_IMA_KEXEC
int __init ima_free_kexec_buffer(void);
int __init ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_validate_range(phys_addr_t phys, size_t size);
#endif
#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
--- a/security/integrity/ima/ima_kexec.c~ima-add-ima_validate_range-for-previous-kernel-ima-buffer
+++ a/security/integrity/ima/ima_kexec.c
@@ -12,6 +12,8 @@
#include <linux/kexec.h>
#include <linux/of.h>
#include <linux/ima.h>
+#include <linux/mm.h>
+#include <linux/overflow.h>
#include <linux/reboot.h>
#include <asm/page.h>
#include "ima.h"
@@ -294,3 +296,36 @@ void __init ima_load_kexec_buffer(void)
pr_debug("Error restoring the measurement list: %d\n", rc);
}
}
+
+/*
+ * ima_validate_range - verify a physical buffer lies in addressable RAM
+ * @phys: physical start address of the buffer from previous kernel
+ * @size: size of the buffer
+ *
+ * On success return 0. On failure returns -EINVAL so callers can skip
+ * restoring.
+ */
+int ima_validate_range(phys_addr_t phys, size_t size)
+{
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t end_phys;
+
+ if (check_add_overflow(phys, (phys_addr_t)size - 1, &end_phys))
+ return -EINVAL;
+
+ start_pfn = PHYS_PFN(phys);
+ end_pfn = PHYS_PFN(end_phys);
+
+#ifdef CONFIG_X86
+ if (!pfn_range_is_mapped(start_pfn, end_pfn))
+#else
+ if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn))
+#endif
+ {
+ pr_warn("IMA: previous kernel measurement buffer %pa (size 0x%zx) lies outside available memory\n",
+ &phys, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
ima-add-ima_validate_range-for-previous-kernel-ima-buffer.patch
of-kexec-refactor-ima_get_kexec_buffer-to-use-ima_validate_range.patch
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
The quilt patch titled
Subject: x86/kexec: add a sanity check on previous kernel's ima kexec buffer
has been removed from the -mm tree. Its filename was
x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Subject: x86/kexec: add a sanity check on previous kernel's ima kexec buffer
Date: Wed, 12 Nov 2025 11:30:02 -0800
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>", the physical range that contains the carried
over IMA measurement list may fall outside the truncated RAM leading to a
kernel panic.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) ��� not-present page
Other architectures already validate the range with page_is_ram(), as done
in commit: cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds") do a similar check on x86.
[akpm(a)linux-foundation.org: fix comment typo]
Link: https://lkml.kernel.org/r/20251112193005.3772542-1-harshit.m.mogalapalli@or…
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Reported-by: Paul Webb <paul.x.webb(a)oracle.com>
Reviewed-by: Jonathan McDowell <noodles(a)meta.com>
Tested-by: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: guoweikang <guoweikang.kernel(a)gmail.com>
Cc: Henry Willard <henry.willard(a)oracle.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Joel Granados <joel.granados(a)kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Sohil Mehta <sohil.mehta(a)intel.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yifei Liu <yifei.l.liu(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/kernel/setup.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
--- a/arch/x86/kernel/setup.c~x86-kexec-add-a-sanity-check-on-previous-kernels-ima-kexec-buffer
+++ a/arch/x86/kernel/setup.c
@@ -439,9 +439,23 @@ int __init ima_free_kexec_buffer(void)
int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
+ unsigned long start_pfn, end_pfn;
+
if (!ima_kexec_buffer_size)
return -ENOENT;
+ /*
+ * Calculate the PFNs for the buffer and ensure
+ * they are within addressable memory.
+ */
+ start_pfn = PFN_DOWN(ima_kexec_buffer_phys);
+ end_pfn = PFN_DOWN(ima_kexec_buffer_phys + ima_kexec_buffer_size - 1);
+ if (!pfn_range_is_mapped(start_pfn, end_pfn)) {
+ pr_warn("IMA buffer at 0x%llx, size = 0x%zx beyond memory\n",
+ ima_kexec_buffer_phys, ima_kexec_buffer_size);
+ return -EINVAL;
+ }
+
*addr = __va(ima_kexec_buffer_phys);
*size = ima_kexec_buffer_size;
_
Patches currently in -mm which might be from harshit.m.mogalapalli(a)oracle.com are
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 68d05693f8c031257a0822464366e1c2a239a512
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122953-federal-handclasp-1d09@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 68d05693f8c031257a0822464366e1c2a239a512 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao(a)kernel.org>
Date: Wed, 5 Nov 2025 14:50:23 +0800
Subject: [PATCH] f2fs: fix to detect recoverable inode during dryrun of
find_fsync_dnodes()
mkfs.f2fs -f /dev/vdd
mount /dev/vdd /mnt/f2fs
touch /mnt/f2fs/foo
sync # avoid CP_UMOUNT_FLAG in last f2fs_checkpoint.ckpt_flags
touch /mnt/f2fs/bar
f2fs_io fsync /mnt/f2fs/bar
f2fs_io shutdown 2 /mnt/f2fs
umount /mnt/f2fs
blockdev --setro /dev/vdd
mount /dev/vdd /mnt/f2fs
mount: /mnt/f2fs: WARNING: source write-protected, mounted read-only.
For the case if we create and fsync a new inode before sudden power-cut,
without norecovery or disable_roll_forward mount option, the following
mount will succeed w/o recovering last fsynced inode.
The problem here is that we only check inode_list list after
find_fsync_dnodes() in f2fs_recover_fsync_data() to find out whether
there is recoverable data in the iamge, but there is a missed case, if
last fsynced inode is not existing in last checkpoint, then, we will
fail to get its inode due to nat of inode node is not existing in last
checkpoint, so the inode won't be linked in inode_list.
Let's detect such case in dyrun mode to fix this issue.
After this change, mount will fail as expected below:
mount: /mnt/f2fs: cannot mount /dev/vdd read-only.
dmesg(1) may have more information after failed mount system call.
demsg:
F2FS-fs (vdd): Need to recover fsync data, but write access unavailable, please try mount w/ disable_roll_forward or norecovery
Cc: stable(a)kernel.org
Fixes: 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount")
Signed-off-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 215e442db72c..d7faebaa3c6b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -399,7 +399,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
- bool check_only)
+ bool check_only, bool *new_inode)
{
struct curseg_info *curseg;
block_t blkaddr, blkaddr_fast;
@@ -447,16 +447,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode = true;
}
- /*
- * CP | dnode(F) | inode(DF)
- * For this case, we should not give up now.
- */
entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
- if (err == -ENOENT)
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
+ if (err == -ENOENT) {
+ if (check_only)
+ *new_inode = true;
goto next;
+ }
f2fs_folio_put(folio, true);
break;
}
@@ -875,6 +878,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ bool new_inode = false;
f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
"check_only: %d", check_only);
@@ -890,8 +894,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
- err = find_fsync_dnodes(sbi, &inode_list, check_only);
- if (err || list_empty(&inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
+ if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
goto skip;
if (check_only) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 68d05693f8c031257a0822464366e1c2a239a512
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122953-length-breeding-7553@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 68d05693f8c031257a0822464366e1c2a239a512 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao(a)kernel.org>
Date: Wed, 5 Nov 2025 14:50:23 +0800
Subject: [PATCH] f2fs: fix to detect recoverable inode during dryrun of
find_fsync_dnodes()
mkfs.f2fs -f /dev/vdd
mount /dev/vdd /mnt/f2fs
touch /mnt/f2fs/foo
sync # avoid CP_UMOUNT_FLAG in last f2fs_checkpoint.ckpt_flags
touch /mnt/f2fs/bar
f2fs_io fsync /mnt/f2fs/bar
f2fs_io shutdown 2 /mnt/f2fs
umount /mnt/f2fs
blockdev --setro /dev/vdd
mount /dev/vdd /mnt/f2fs
mount: /mnt/f2fs: WARNING: source write-protected, mounted read-only.
For the case if we create and fsync a new inode before sudden power-cut,
without norecovery or disable_roll_forward mount option, the following
mount will succeed w/o recovering last fsynced inode.
The problem here is that we only check inode_list list after
find_fsync_dnodes() in f2fs_recover_fsync_data() to find out whether
there is recoverable data in the iamge, but there is a missed case, if
last fsynced inode is not existing in last checkpoint, then, we will
fail to get its inode due to nat of inode node is not existing in last
checkpoint, so the inode won't be linked in inode_list.
Let's detect such case in dyrun mode to fix this issue.
After this change, mount will fail as expected below:
mount: /mnt/f2fs: cannot mount /dev/vdd read-only.
dmesg(1) may have more information after failed mount system call.
demsg:
F2FS-fs (vdd): Need to recover fsync data, but write access unavailable, please try mount w/ disable_roll_forward or norecovery
Cc: stable(a)kernel.org
Fixes: 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount")
Signed-off-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 215e442db72c..d7faebaa3c6b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -399,7 +399,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
- bool check_only)
+ bool check_only, bool *new_inode)
{
struct curseg_info *curseg;
block_t blkaddr, blkaddr_fast;
@@ -447,16 +447,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode = true;
}
- /*
- * CP | dnode(F) | inode(DF)
- * For this case, we should not give up now.
- */
entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
- if (err == -ENOENT)
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
+ if (err == -ENOENT) {
+ if (check_only)
+ *new_inode = true;
goto next;
+ }
f2fs_folio_put(folio, true);
break;
}
@@ -875,6 +878,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ bool new_inode = false;
f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
"check_only: %d", check_only);
@@ -890,8 +894,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
- err = find_fsync_dnodes(sbi, &inode_list, check_only);
- if (err || list_empty(&inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
+ if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
goto skip;
if (check_only) {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 68d05693f8c031257a0822464366e1c2a239a512
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122952-earlobe-rework-7c0d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 68d05693f8c031257a0822464366e1c2a239a512 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao(a)kernel.org>
Date: Wed, 5 Nov 2025 14:50:23 +0800
Subject: [PATCH] f2fs: fix to detect recoverable inode during dryrun of
find_fsync_dnodes()
mkfs.f2fs -f /dev/vdd
mount /dev/vdd /mnt/f2fs
touch /mnt/f2fs/foo
sync # avoid CP_UMOUNT_FLAG in last f2fs_checkpoint.ckpt_flags
touch /mnt/f2fs/bar
f2fs_io fsync /mnt/f2fs/bar
f2fs_io shutdown 2 /mnt/f2fs
umount /mnt/f2fs
blockdev --setro /dev/vdd
mount /dev/vdd /mnt/f2fs
mount: /mnt/f2fs: WARNING: source write-protected, mounted read-only.
For the case if we create and fsync a new inode before sudden power-cut,
without norecovery or disable_roll_forward mount option, the following
mount will succeed w/o recovering last fsynced inode.
The problem here is that we only check inode_list list after
find_fsync_dnodes() in f2fs_recover_fsync_data() to find out whether
there is recoverable data in the iamge, but there is a missed case, if
last fsynced inode is not existing in last checkpoint, then, we will
fail to get its inode due to nat of inode node is not existing in last
checkpoint, so the inode won't be linked in inode_list.
Let's detect such case in dyrun mode to fix this issue.
After this change, mount will fail as expected below:
mount: /mnt/f2fs: cannot mount /dev/vdd read-only.
dmesg(1) may have more information after failed mount system call.
demsg:
F2FS-fs (vdd): Need to recover fsync data, but write access unavailable, please try mount w/ disable_roll_forward or norecovery
Cc: stable(a)kernel.org
Fixes: 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount")
Signed-off-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 215e442db72c..d7faebaa3c6b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -399,7 +399,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
- bool check_only)
+ bool check_only, bool *new_inode)
{
struct curseg_info *curseg;
block_t blkaddr, blkaddr_fast;
@@ -447,16 +447,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode = true;
}
- /*
- * CP | dnode(F) | inode(DF)
- * For this case, we should not give up now.
- */
entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
- if (err == -ENOENT)
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
+ if (err == -ENOENT) {
+ if (check_only)
+ *new_inode = true;
goto next;
+ }
f2fs_folio_put(folio, true);
break;
}
@@ -875,6 +878,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ bool new_inode = false;
f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
"check_only: %d", check_only);
@@ -890,8 +894,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
- err = find_fsync_dnodes(sbi, &inode_list, check_only);
- if (err || list_empty(&inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
+ if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
goto skip;
if (check_only) {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 74098cc06e753d3ffd8398b040a3a1dfb65260c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122918-sagging-divisible-a4a4@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74098cc06e753d3ffd8398b040a3a1dfb65260c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= <ukaszb(a)chromium.org>
Date: Thu, 27 Nov 2025 11:16:44 +0000
Subject: [PATCH] xhci: dbgtty: fix device unregister: fixup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This fixup replaces tty_vhangup() call with call to
tty_port_tty_vhangup(). Both calls hangup tty device
synchronously however tty_port_tty_vhangup() increases
reference count during the hangup operation using
scoped_guard(tty_port_tty).
Cc: stable <stable(a)kernel.org>
Fixes: 1f73b8b56cf3 ("xhci: dbgtty: fix device unregister")
Signed-off-by: Łukasz Bartosik <ukaszb(a)chromium.org>
Link: https://patch.msgid.link/20251127111644.3161386-1-ukaszb@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index 57cdda4e09c8..90282e51e23e 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -554,7 +554,7 @@ static void xhci_dbc_tty_unregister_device(struct xhci_dbc *dbc)
* Hang up the TTY. This wakes up any blocked
* writers and causes subsequent writes to fail.
*/
- tty_vhangup(port->port.tty);
+ tty_port_tty_vhangup(&port->port);
tty_unregister_device(dbc_tty_driver, port->minor);
xhci_dbc_tty_exit_port(port);
The error path of xfs_attr_leaf_hasname() can leave a NULL
xfs_buf pointer. xfs_has_attr() checks for the NULL pointer but
the other callers do not.
We tripped over the NULL pointer in xfs_attr_leaf_get() but fix
the other callers too.
Fixes v5.8-rc4-95-g07120f1abdff ("xfs: Add xfs_has_attr and subroutines")
No reproducer.
Cc: stable(a)vger.kernel.org # v5.19+ with another port for v5.9 - v5.18
Signed-off-by: Mark Tinguely <mark.tinguely(a)oracle.com>
---
fs/xfs/libxfs/xfs_attr.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 8c04acd30d48..25e2ecf20d14 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -1266,7 +1266,8 @@ xfs_attr_leaf_removename(
error = xfs_attr_leaf_hasname(args, &bp);
if (error == -ENOATTR) {
- xfs_trans_brelse(args->trans, bp);
+ if (bp)
+ xfs_trans_brelse(args->trans, bp);
if (args->op_flags & XFS_DA_OP_RECOVERY)
return 0;
return error;
@@ -1305,7 +1306,8 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
error = xfs_attr_leaf_hasname(args, &bp);
if (error == -ENOATTR) {
- xfs_trans_brelse(args->trans, bp);
+ if (bp)
+ xfs_trans_brelse(args->trans, bp);
return error;
} else if (error != -EEXIST)
return error;
--
2.50.1 (Apple Git-155)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 68d05693f8c031257a0822464366e1c2a239a512
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122951-marvelous-paramount-6494@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 68d05693f8c031257a0822464366e1c2a239a512 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao(a)kernel.org>
Date: Wed, 5 Nov 2025 14:50:23 +0800
Subject: [PATCH] f2fs: fix to detect recoverable inode during dryrun of
find_fsync_dnodes()
mkfs.f2fs -f /dev/vdd
mount /dev/vdd /mnt/f2fs
touch /mnt/f2fs/foo
sync # avoid CP_UMOUNT_FLAG in last f2fs_checkpoint.ckpt_flags
touch /mnt/f2fs/bar
f2fs_io fsync /mnt/f2fs/bar
f2fs_io shutdown 2 /mnt/f2fs
umount /mnt/f2fs
blockdev --setro /dev/vdd
mount /dev/vdd /mnt/f2fs
mount: /mnt/f2fs: WARNING: source write-protected, mounted read-only.
For the case if we create and fsync a new inode before sudden power-cut,
without norecovery or disable_roll_forward mount option, the following
mount will succeed w/o recovering last fsynced inode.
The problem here is that we only check inode_list list after
find_fsync_dnodes() in f2fs_recover_fsync_data() to find out whether
there is recoverable data in the iamge, but there is a missed case, if
last fsynced inode is not existing in last checkpoint, then, we will
fail to get its inode due to nat of inode node is not existing in last
checkpoint, so the inode won't be linked in inode_list.
Let's detect such case in dyrun mode to fix this issue.
After this change, mount will fail as expected below:
mount: /mnt/f2fs: cannot mount /dev/vdd read-only.
dmesg(1) may have more information after failed mount system call.
demsg:
F2FS-fs (vdd): Need to recover fsync data, but write access unavailable, please try mount w/ disable_roll_forward or norecovery
Cc: stable(a)kernel.org
Fixes: 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount")
Signed-off-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 215e442db72c..d7faebaa3c6b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -399,7 +399,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
- bool check_only)
+ bool check_only, bool *new_inode)
{
struct curseg_info *curseg;
block_t blkaddr, blkaddr_fast;
@@ -447,16 +447,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode = true;
}
- /*
- * CP | dnode(F) | inode(DF)
- * For this case, we should not give up now.
- */
entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
- if (err == -ENOENT)
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
+ if (err == -ENOENT) {
+ if (check_only)
+ *new_inode = true;
goto next;
+ }
f2fs_folio_put(folio, true);
break;
}
@@ -875,6 +878,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ bool new_inode = false;
f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
"check_only: %d", check_only);
@@ -890,8 +894,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
- err = find_fsync_dnodes(sbi, &inode_list, check_only);
- if (err || list_empty(&inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
+ if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
goto skip;
if (check_only) {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 74098cc06e753d3ffd8398b040a3a1dfb65260c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122917-keenly-greyhound-4fa6@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74098cc06e753d3ffd8398b040a3a1dfb65260c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= <ukaszb(a)chromium.org>
Date: Thu, 27 Nov 2025 11:16:44 +0000
Subject: [PATCH] xhci: dbgtty: fix device unregister: fixup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This fixup replaces tty_vhangup() call with call to
tty_port_tty_vhangup(). Both calls hangup tty device
synchronously however tty_port_tty_vhangup() increases
reference count during the hangup operation using
scoped_guard(tty_port_tty).
Cc: stable <stable(a)kernel.org>
Fixes: 1f73b8b56cf3 ("xhci: dbgtty: fix device unregister")
Signed-off-by: Łukasz Bartosik <ukaszb(a)chromium.org>
Link: https://patch.msgid.link/20251127111644.3161386-1-ukaszb@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index 57cdda4e09c8..90282e51e23e 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -554,7 +554,7 @@ static void xhci_dbc_tty_unregister_device(struct xhci_dbc *dbc)
* Hang up the TTY. This wakes up any blocked
* writers and causes subsequent writes to fail.
*/
- tty_vhangup(port->port.tty);
+ tty_port_tty_vhangup(&port->port);
tty_unregister_device(dbc_tty_driver, port->minor);
xhci_dbc_tty_exit_port(port);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 74098cc06e753d3ffd8398b040a3a1dfb65260c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122917-unsheathe-breeder-0ac2@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74098cc06e753d3ffd8398b040a3a1dfb65260c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= <ukaszb(a)chromium.org>
Date: Thu, 27 Nov 2025 11:16:44 +0000
Subject: [PATCH] xhci: dbgtty: fix device unregister: fixup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This fixup replaces tty_vhangup() call with call to
tty_port_tty_vhangup(). Both calls hangup tty device
synchronously however tty_port_tty_vhangup() increases
reference count during the hangup operation using
scoped_guard(tty_port_tty).
Cc: stable <stable(a)kernel.org>
Fixes: 1f73b8b56cf3 ("xhci: dbgtty: fix device unregister")
Signed-off-by: Łukasz Bartosik <ukaszb(a)chromium.org>
Link: https://patch.msgid.link/20251127111644.3161386-1-ukaszb@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index 57cdda4e09c8..90282e51e23e 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -554,7 +554,7 @@ static void xhci_dbc_tty_unregister_device(struct xhci_dbc *dbc)
* Hang up the TTY. This wakes up any blocked
* writers and causes subsequent writes to fail.
*/
- tty_vhangup(port->port.tty);
+ tty_port_tty_vhangup(&port->port);
tty_unregister_device(dbc_tty_driver, port->minor);
xhci_dbc_tty_exit_port(port);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 68d05693f8c031257a0822464366e1c2a239a512
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122945-sasquatch-unexposed-d830@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 68d05693f8c031257a0822464366e1c2a239a512 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao(a)kernel.org>
Date: Wed, 5 Nov 2025 14:50:23 +0800
Subject: [PATCH] f2fs: fix to detect recoverable inode during dryrun of
find_fsync_dnodes()
mkfs.f2fs -f /dev/vdd
mount /dev/vdd /mnt/f2fs
touch /mnt/f2fs/foo
sync # avoid CP_UMOUNT_FLAG in last f2fs_checkpoint.ckpt_flags
touch /mnt/f2fs/bar
f2fs_io fsync /mnt/f2fs/bar
f2fs_io shutdown 2 /mnt/f2fs
umount /mnt/f2fs
blockdev --setro /dev/vdd
mount /dev/vdd /mnt/f2fs
mount: /mnt/f2fs: WARNING: source write-protected, mounted read-only.
For the case if we create and fsync a new inode before sudden power-cut,
without norecovery or disable_roll_forward mount option, the following
mount will succeed w/o recovering last fsynced inode.
The problem here is that we only check inode_list list after
find_fsync_dnodes() in f2fs_recover_fsync_data() to find out whether
there is recoverable data in the iamge, but there is a missed case, if
last fsynced inode is not existing in last checkpoint, then, we will
fail to get its inode due to nat of inode node is not existing in last
checkpoint, so the inode won't be linked in inode_list.
Let's detect such case in dyrun mode to fix this issue.
After this change, mount will fail as expected below:
mount: /mnt/f2fs: cannot mount /dev/vdd read-only.
dmesg(1) may have more information after failed mount system call.
demsg:
F2FS-fs (vdd): Need to recover fsync data, but write access unavailable, please try mount w/ disable_roll_forward or norecovery
Cc: stable(a)kernel.org
Fixes: 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount")
Signed-off-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 215e442db72c..d7faebaa3c6b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -399,7 +399,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
- bool check_only)
+ bool check_only, bool *new_inode)
{
struct curseg_info *curseg;
block_t blkaddr, blkaddr_fast;
@@ -447,16 +447,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode = true;
}
- /*
- * CP | dnode(F) | inode(DF)
- * For this case, we should not give up now.
- */
entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
- if (err == -ENOENT)
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
+ if (err == -ENOENT) {
+ if (check_only)
+ *new_inode = true;
goto next;
+ }
f2fs_folio_put(folio, true);
break;
}
@@ -875,6 +878,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ bool new_inode = false;
f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
"check_only: %d", check_only);
@@ -890,8 +894,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
- err = find_fsync_dnodes(sbi, &inode_list, check_only);
- if (err || list_empty(&inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
+ if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
goto skip;
if (check_only) {
Hi Stable Maintainers,
I would like to request the following patch be merged to the stable tree:
Upstream commit: 58075aec92a8141fd7f42e1c36d1bc54552c015e
Patch subject: powercap: intel_rapl: Add support for NOVALAKE processors
Author: Kaushlendra Kumar <kaushlendra.kumar(a)intel.com>
Upstream link: https://lore.kernel.org/all/20251028101814.3482508-1-kaushlendra.kumar@inte…
Reason for stable inclusion:
It is required for Android 17 release which is based on v6.18 stable tree. You can find related
discussion in https://android-review.googlesource.com/c/kernel/common/+/3895011. This patch
should be applied to the following kernel versions: v6.18
This patch applies cleanly to v6.18 stable branch with no conflicts.
Please let me know if you need any additional information or if there are any concerns with this stable back-port request.
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
Hi Stable Maintainers,
I would like to request the following patch be merged to the stable tree:
Upstream commit: 39f421f2e301f995c17c35b783e2863155b3f647
Patch subject: powercap: intel_rapl: Add support for Wildcat Lake platform
Author: Srinivas Pandruvada srinivas.pandruvada(a)linux.intel.com
Upstream link: https://lore.kernel.org/all/20251023174532.1882008-1-srinivas.pandruvada@li…
Reason for stable inclusion:
It is required for Android 17 release which is based on v6.18 stable tree. You can find related
discussion in https://android-review.googlesource.com/c/kernel/common/+/3895010]. This patch
should be applied to the following stable branches: linux-6.18.y
Testing:
This patch works fine in Wildcat lake platform and it applies cleanly to linux-6.18.y branch with no conflicts.
Please let me know if you need any additional information or if there are any concerns with this stable back-port request.
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
When the CRU is configured to use ICnSVC for virtual channel mapping,
as on the RZ/{G3E, V2H/P} SoC, the ICnMC register must not be
programmed.
Return early after setting up ICnSVC to avoid overriding the ICnMC
register, which is not applicable in this mode.
This prevents unintended register programming when ICnSVC is enabled.
Fixes: 3c5ca0a48bb0 ("media: rzg2l-cru: Drop function pointer to configure CSI")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tommaso Merciai <tommaso.merciai.xr(a)bp.renesas.com>
---
drivers/media/platform/renesas/rzg2l-cru/rzg2l-video.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/media/platform/renesas/rzg2l-cru/rzg2l-video.c b/drivers/media/platform/renesas/rzg2l-cru/rzg2l-video.c
index 162e2ace6931..480e9b5dbcfe 100644
--- a/drivers/media/platform/renesas/rzg2l-cru/rzg2l-video.c
+++ b/drivers/media/platform/renesas/rzg2l-cru/rzg2l-video.c
@@ -268,6 +268,8 @@ static void rzg2l_cru_csi2_setup(struct rzg2l_cru_dev *cru,
rzg2l_cru_write(cru, ICnSVCNUM, csi_vc);
rzg2l_cru_write(cru, ICnSVC, ICnSVC_SVC0(0) | ICnSVC_SVC1(1) |
ICnSVC_SVC2(2) | ICnSVC_SVC3(3));
+
+ return;
}
icnmc |= rzg2l_cru_read(cru, info->image_conv) & ~ICnMC_INF_MASK;
--
2.43.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x c9f36f04a8a2725172cdf2b5e32363e4addcb14c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122946-repeater-wasting-b205@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c9f36f04a8a2725172cdf2b5e32363e4addcb14c Mon Sep 17 00:00:00 2001
From: Seunghwan Baek <sh8267.baek(a)samsung.com>
Date: Wed, 10 Dec 2025 15:38:54 +0900
Subject: [PATCH] scsi: ufs: core: Add ufshcd_update_evt_hist() for UFS suspend
error
If UFS resume fails, the event history is updated in ufshcd_resume(), but
there is no code anywhere to record UFS suspend. Therefore, add code to
record UFS suspend error event history.
Fixes: dd11376b9f1b ("scsi: ufs: Split the drivers/scsi/ufs directory")
Cc: stable(a)vger.kernel.org
Signed-off-by: Seunghwan Baek <sh8267.baek(a)samsung.com>
Reviewed-by: Peter Wang <peter.wang(a)mediatek.com>
Link: https://patch.msgid.link/20251210063854.1483899-2-sh8267.baek@samsung.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 80c0b49f30b0..0babb7035200 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -10359,7 +10359,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
ret = ufshcd_setup_clocks(hba, false);
if (ret) {
ufshcd_enable_irq(hba);
- return ret;
+ goto out;
}
if (ufshcd_is_clkgating_allowed(hba)) {
hba->clk_gating.state = CLKS_OFF;
@@ -10371,6 +10371,9 @@ static int ufshcd_suspend(struct ufs_hba *hba)
/* Put the host controller in low power mode if possible */
ufshcd_hba_vreg_set_lpm(hba);
ufshcd_pm_qos_update(hba, false);
+out:
+ if (ret)
+ ufshcd_update_evt_hist(hba, UFS_EVT_SUSPEND_ERR, (u32)ret);
return ret;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e1003aa7ec9eccdde4c926bd64ef42816ad55f25
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122912-stuck-knickers-4454@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e1003aa7ec9eccdde4c926bd64ef42816ad55f25 Mon Sep 17 00:00:00 2001
From: Udipto Goswami <udipto.goswami(a)oss.qualcomm.com>
Date: Wed, 26 Nov 2025 11:12:21 +0530
Subject: [PATCH] usb: dwc3: keep susphy enabled during exit to avoid
controller faults
On some platforms, switching USB roles from host to device can trigger
controller faults due to premature PHY power-down. This occurs when the
PHY is disabled too early during teardown, causing synchronization
issues between the PHY and controller.
Keep susphy enabled during dwc3_host_exit() and dwc3_gadget_exit()
ensures the PHY remains in a low-power state capable of handling
required commands during role switch.
Cc: stable <stable(a)kernel.org>
Fixes: 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init")
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Udipto Goswami <udipto.goswami(a)oss.qualcomm.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://patch.msgid.link/20251126054221.120638-1-udipto.goswami@oss.qualcom…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index bc3fe31638b9..8a35a6901db7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4826,7 +4826,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
if (!dwc->gadget)
return;
- dwc3_enable_susphy(dwc, false);
+ dwc3_enable_susphy(dwc, true);
usb_del_gadget(dwc->gadget);
dwc3_gadget_free_endpoints(dwc);
usb_put_gadget(dwc->gadget);
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index cf6512ed17a6..96b588bd08cd 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -227,7 +227,7 @@ void dwc3_host_exit(struct dwc3 *dwc)
if (dwc->sys_wakeup)
device_init_wakeup(&dwc->xhci->dev, false);
- dwc3_enable_susphy(dwc, false);
+ dwc3_enable_susphy(dwc, true);
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x c9f36f04a8a2725172cdf2b5e32363e4addcb14c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122946-attic-proofread-96f2@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c9f36f04a8a2725172cdf2b5e32363e4addcb14c Mon Sep 17 00:00:00 2001
From: Seunghwan Baek <sh8267.baek(a)samsung.com>
Date: Wed, 10 Dec 2025 15:38:54 +0900
Subject: [PATCH] scsi: ufs: core: Add ufshcd_update_evt_hist() for UFS suspend
error
If UFS resume fails, the event history is updated in ufshcd_resume(), but
there is no code anywhere to record UFS suspend. Therefore, add code to
record UFS suspend error event history.
Fixes: dd11376b9f1b ("scsi: ufs: Split the drivers/scsi/ufs directory")
Cc: stable(a)vger.kernel.org
Signed-off-by: Seunghwan Baek <sh8267.baek(a)samsung.com>
Reviewed-by: Peter Wang <peter.wang(a)mediatek.com>
Link: https://patch.msgid.link/20251210063854.1483899-2-sh8267.baek@samsung.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 80c0b49f30b0..0babb7035200 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -10359,7 +10359,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
ret = ufshcd_setup_clocks(hba, false);
if (ret) {
ufshcd_enable_irq(hba);
- return ret;
+ goto out;
}
if (ufshcd_is_clkgating_allowed(hba)) {
hba->clk_gating.state = CLKS_OFF;
@@ -10371,6 +10371,9 @@ static int ufshcd_suspend(struct ufs_hba *hba)
/* Put the host controller in low power mode if possible */
ufshcd_hba_vreg_set_lpm(hba);
ufshcd_pm_qos_update(hba, false);
+out:
+ if (ret)
+ ufshcd_update_evt_hist(hba, UFS_EVT_SUSPEND_ERR, (u32)ret);
return ret;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 267ee93c417e685d9f8e079e41c70ba6ee4df5a5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122923-amaretto-output-f3dc@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 267ee93c417e685d9f8e079e41c70ba6ee4df5a5 Mon Sep 17 00:00:00 2001
From: "j.turek" <jakub.turek(a)elsta.tech>
Date: Sun, 21 Dec 2025 11:32:21 +0100
Subject: [PATCH] serial: xilinx_uartps: fix rs485 delay_rts_after_send
RTS line control with delay should be triggered when there is no more
bytes in kfifo and hardware buffer is empty. Without this patch RTS
control is scheduled right after feeding hardware buffer and this is too
early.
RTS line may change state before hardware buffer is empty.
With this patch delayed RTS state change is triggered when function
cdns_uart_handle_tx is called from cdns_uart_isr on
CDNS_UART_IXR_TXEMPTY exactly when hardware completed transmission
Fixes: fccc9d9233f9 ("tty: serial: uartps: Add rs485 support to uartps driver")
Cc: stable <stable(a)kernel.org>
Link: https://patch.msgid.link/20251221103221.1971125-1-jakub.turek@elsta.tech
Signed-off-by: Jakub Turek <jakub.turek(a)elsta.tech>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index c793fc74c26b..c593d20a1b5b 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -428,10 +428,17 @@ static void cdns_uart_handle_tx(void *dev_id)
struct tty_port *tport = &port->state->port;
unsigned int numbytes;
unsigned char ch;
+ ktime_t rts_delay;
if (kfifo_is_empty(&tport->xmit_fifo) || uart_tx_stopped(port)) {
/* Disable the TX Empty interrupt */
writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_IDR);
+ /* Set RTS line after delay */
+ if (cdns_uart->port->rs485.flags & SER_RS485_ENABLED) {
+ cdns_uart->tx_timer.function = &cdns_rs485_rx_callback;
+ rts_delay = ns_to_ktime(cdns_calc_after_tx_delay(cdns_uart));
+ hrtimer_start(&cdns_uart->tx_timer, rts_delay, HRTIMER_MODE_REL);
+ }
return;
}
@@ -448,13 +455,6 @@ static void cdns_uart_handle_tx(void *dev_id)
/* Enable the TX Empty interrupt */
writel(CDNS_UART_IXR_TXEMPTY, cdns_uart->port->membase + CDNS_UART_IER);
-
- if (cdns_uart->port->rs485.flags & SER_RS485_ENABLED &&
- (kfifo_is_empty(&tport->xmit_fifo) || uart_tx_stopped(port))) {
- hrtimer_update_function(&cdns_uart->tx_timer, cdns_rs485_rx_callback);
- hrtimer_start(&cdns_uart->tx_timer,
- ns_to_ktime(cdns_calc_after_tx_delay(cdns_uart)), HRTIMER_MODE_REL);
- }
}
/**
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e1003aa7ec9eccdde4c926bd64ef42816ad55f25
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122911-punctual-slapping-497e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e1003aa7ec9eccdde4c926bd64ef42816ad55f25 Mon Sep 17 00:00:00 2001
From: Udipto Goswami <udipto.goswami(a)oss.qualcomm.com>
Date: Wed, 26 Nov 2025 11:12:21 +0530
Subject: [PATCH] usb: dwc3: keep susphy enabled during exit to avoid
controller faults
On some platforms, switching USB roles from host to device can trigger
controller faults due to premature PHY power-down. This occurs when the
PHY is disabled too early during teardown, causing synchronization
issues between the PHY and controller.
Keep susphy enabled during dwc3_host_exit() and dwc3_gadget_exit()
ensures the PHY remains in a low-power state capable of handling
required commands during role switch.
Cc: stable <stable(a)kernel.org>
Fixes: 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init")
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Udipto Goswami <udipto.goswami(a)oss.qualcomm.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://patch.msgid.link/20251126054221.120638-1-udipto.goswami@oss.qualcom…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index bc3fe31638b9..8a35a6901db7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4826,7 +4826,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
if (!dwc->gadget)
return;
- dwc3_enable_susphy(dwc, false);
+ dwc3_enable_susphy(dwc, true);
usb_del_gadget(dwc->gadget);
dwc3_gadget_free_endpoints(dwc);
usb_put_gadget(dwc->gadget);
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index cf6512ed17a6..96b588bd08cd 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -227,7 +227,7 @@ void dwc3_host_exit(struct dwc3 *dwc)
if (dwc->sys_wakeup)
device_init_wakeup(&dwc->xhci->dev, false);
- dwc3_enable_susphy(dwc, false);
+ dwc3_enable_susphy(dwc, true);
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e1003aa7ec9eccdde4c926bd64ef42816ad55f25
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122911-secrecy-pedometer-eaa0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e1003aa7ec9eccdde4c926bd64ef42816ad55f25 Mon Sep 17 00:00:00 2001
From: Udipto Goswami <udipto.goswami(a)oss.qualcomm.com>
Date: Wed, 26 Nov 2025 11:12:21 +0530
Subject: [PATCH] usb: dwc3: keep susphy enabled during exit to avoid
controller faults
On some platforms, switching USB roles from host to device can trigger
controller faults due to premature PHY power-down. This occurs when the
PHY is disabled too early during teardown, causing synchronization
issues between the PHY and controller.
Keep susphy enabled during dwc3_host_exit() and dwc3_gadget_exit()
ensures the PHY remains in a low-power state capable of handling
required commands during role switch.
Cc: stable <stable(a)kernel.org>
Fixes: 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init")
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Udipto Goswami <udipto.goswami(a)oss.qualcomm.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://patch.msgid.link/20251126054221.120638-1-udipto.goswami@oss.qualcom…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index bc3fe31638b9..8a35a6901db7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4826,7 +4826,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
if (!dwc->gadget)
return;
- dwc3_enable_susphy(dwc, false);
+ dwc3_enable_susphy(dwc, true);
usb_del_gadget(dwc->gadget);
dwc3_gadget_free_endpoints(dwc);
usb_put_gadget(dwc->gadget);
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index cf6512ed17a6..96b588bd08cd 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -227,7 +227,7 @@ void dwc3_host_exit(struct dwc3 *dwc)
if (dwc->sys_wakeup)
device_init_wakeup(&dwc->xhci->dev, false);
- dwc3_enable_susphy(dwc, false);
+ dwc3_enable_susphy(dwc, true);
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0ace3297a7301911e52d8195cb1006414897c859
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122924-retold-train-22bc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Fri, 5 Dec 2025 19:55:14 +0100
Subject: [PATCH] mptcp: pm: ignore unknown endpoint flags
Before this patch, the kernel was saving any flags set by the userspace,
even unknown ones. This doesn't cause critical issues because the kernel
is only looking at specific ones. But on the other hand, endpoints dumps
could tell the userspace some recent flags seem to be supported on older
kernel versions.
Instead, ignore all unknown flags when parsing them. By doing that, the
userspace can continue to set unsupported flags, but it has a way to
verify what is supported by the kernel.
Note that it sounds better to continue accepting unsupported flags not
to change the behaviour, but also that eases things on the userspace
side by adding "optional" endpoint types only supported by newer kernel
versions without having to deal with the different kernel versions.
A note for the backports: there will be conflicts in mptcp.h on older
versions not having the mentioned flags, the new line should still be
added last, and the '5' needs to be adapted to have the same value as
the last entry.
Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 04eea6d1d0a9..72a5d030154e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -40,6 +40,7 @@
#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
+#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0)
struct mptcp_info {
__u8 mptcpi_subflows;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f79..7aa42de9c47b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+ MPTCP_PM_ADDR_FLAGS_MASK;
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0ace3297a7301911e52d8195cb1006414897c859
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122923-crunchy-grapple-39a2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Fri, 5 Dec 2025 19:55:14 +0100
Subject: [PATCH] mptcp: pm: ignore unknown endpoint flags
Before this patch, the kernel was saving any flags set by the userspace,
even unknown ones. This doesn't cause critical issues because the kernel
is only looking at specific ones. But on the other hand, endpoints dumps
could tell the userspace some recent flags seem to be supported on older
kernel versions.
Instead, ignore all unknown flags when parsing them. By doing that, the
userspace can continue to set unsupported flags, but it has a way to
verify what is supported by the kernel.
Note that it sounds better to continue accepting unsupported flags not
to change the behaviour, but also that eases things on the userspace
side by adding "optional" endpoint types only supported by newer kernel
versions without having to deal with the different kernel versions.
A note for the backports: there will be conflicts in mptcp.h on older
versions not having the mentioned flags, the new line should still be
added last, and the '5' needs to be adapted to have the same value as
the last entry.
Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 04eea6d1d0a9..72a5d030154e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -40,6 +40,7 @@
#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
+#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0)
struct mptcp_info {
__u8 mptcpi_subflows;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f79..7aa42de9c47b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+ MPTCP_PM_ADDR_FLAGS_MASK;
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 24ec03cc55126b7b3adf102f4b3d9f716532b329
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122929-reoccupy-raking-f984@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 24ec03cc55126b7b3adf102f4b3d9f716532b329 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Date: Thu, 27 Nov 2025 17:36:50 +0100
Subject: [PATCH] serial: core: Restore sysfs fwnode information
The change that restores sysfs fwnode information does it only for OF cases.
Update the fix to cover all possible types of fwnodes.
Fixes: d36f0e9a0002 ("serial: core: restore of_node information in sysfs")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://patch.msgid.link/20251127163650.2942075-1-andriy.shevchenko@linux.i…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c
index 22749ab0428a..8e891984cdc0 100644
--- a/drivers/tty/serial/serial_base_bus.c
+++ b/drivers/tty/serial/serial_base_bus.c
@@ -13,7 +13,7 @@
#include <linux/device.h>
#include <linux/idr.h>
#include <linux/module.h>
-#include <linux/of.h>
+#include <linux/property.h>
#include <linux/serial_core.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -60,6 +60,7 @@ void serial_base_driver_unregister(struct device_driver *driver)
driver_unregister(driver);
}
+/* On failure the caller must put device @dev with put_device() */
static int serial_base_device_init(struct uart_port *port,
struct device *dev,
struct device *parent_dev,
@@ -73,7 +74,8 @@ static int serial_base_device_init(struct uart_port *port,
dev->parent = parent_dev;
dev->bus = &serial_base_bus_type;
dev->release = release;
- device_set_of_node_from_dev(dev, parent_dev);
+
+ device_set_node(dev, fwnode_handle_get(dev_fwnode(parent_dev)));
if (!serial_base_initialized) {
dev_dbg(port->dev, "uart_add_one_port() called before arch_initcall()?\n");
@@ -94,7 +96,7 @@ static void serial_base_ctrl_release(struct device *dev)
{
struct serial_ctrl_device *ctrl_dev = to_serial_base_ctrl_device(dev);
- of_node_put(dev->of_node);
+ fwnode_handle_put(dev_fwnode(dev));
kfree(ctrl_dev);
}
@@ -142,7 +144,7 @@ static void serial_base_port_release(struct device *dev)
{
struct serial_port_device *port_dev = to_serial_base_port_device(dev);
- of_node_put(dev->of_node);
+ fwnode_handle_put(dev_fwnode(dev));
kfree(port_dev);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0ace3297a7301911e52d8195cb1006414897c859
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122923-underhand-chalice-083b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Fri, 5 Dec 2025 19:55:14 +0100
Subject: [PATCH] mptcp: pm: ignore unknown endpoint flags
Before this patch, the kernel was saving any flags set by the userspace,
even unknown ones. This doesn't cause critical issues because the kernel
is only looking at specific ones. But on the other hand, endpoints dumps
could tell the userspace some recent flags seem to be supported on older
kernel versions.
Instead, ignore all unknown flags when parsing them. By doing that, the
userspace can continue to set unsupported flags, but it has a way to
verify what is supported by the kernel.
Note that it sounds better to continue accepting unsupported flags not
to change the behaviour, but also that eases things on the userspace
side by adding "optional" endpoint types only supported by newer kernel
versions without having to deal with the different kernel versions.
A note for the backports: there will be conflicts in mptcp.h on older
versions not having the mentioned flags, the new line should still be
added last, and the '5' needs to be adapted to have the same value as
the last entry.
Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 04eea6d1d0a9..72a5d030154e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -40,6 +40,7 @@
#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
+#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0)
struct mptcp_info {
__u8 mptcpi_subflows;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f79..7aa42de9c47b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+ MPTCP_PM_ADDR_FLAGS_MASK;
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 0ace3297a7301911e52d8195cb1006414897c859
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122922-cedar-reapply-f9d1@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Fri, 5 Dec 2025 19:55:14 +0100
Subject: [PATCH] mptcp: pm: ignore unknown endpoint flags
Before this patch, the kernel was saving any flags set by the userspace,
even unknown ones. This doesn't cause critical issues because the kernel
is only looking at specific ones. But on the other hand, endpoints dumps
could tell the userspace some recent flags seem to be supported on older
kernel versions.
Instead, ignore all unknown flags when parsing them. By doing that, the
userspace can continue to set unsupported flags, but it has a way to
verify what is supported by the kernel.
Note that it sounds better to continue accepting unsupported flags not
to change the behaviour, but also that eases things on the userspace
side by adding "optional" endpoint types only supported by newer kernel
versions without having to deal with the different kernel versions.
A note for the backports: there will be conflicts in mptcp.h on older
versions not having the mentioned flags, the new line should still be
added last, and the '5' needs to be adapted to have the same value as
the last entry.
Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 04eea6d1d0a9..72a5d030154e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -40,6 +40,7 @@
#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
+#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0)
struct mptcp_info {
__u8 mptcpi_subflows;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f79..7aa42de9c47b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+ MPTCP_PM_ADDR_FLAGS_MASK;
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122927-preppy-grab-bb17@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Thu, 18 Dec 2025 16:35:17 +0100
Subject: [PATCH] usb: ohci-nxp: fix device leak on probe failure
Make sure to drop the reference taken when looking up the PHY I2C device
during probe on probe failure (e.g. probe deferral) and on driver
unbind.
Fixes: 73108aa90cbf ("USB: ohci-nxp: Use isp1301 driver")
Cc: stable(a)vger.kernel.org # 3.5
Reported-by: Ma Ke <make24(a)iscas.ac.cn>
Link: https://lore.kernel.org/lkml/20251117013428.21840-1-make24@iscas.ac.cn/
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Vladimir Zapolskiy <vz(a)mleia.com>
Link: https://patch.msgid.link/20251218153519.19453-4-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/ohci-nxp.c b/drivers/usb/host/ohci-nxp.c
index 24d5a1dc5056..509ca7d8d513 100644
--- a/drivers/usb/host/ohci-nxp.c
+++ b/drivers/usb/host/ohci-nxp.c
@@ -223,6 +223,7 @@ static int ohci_hcd_nxp_probe(struct platform_device *pdev)
fail_resource:
usb_put_hcd(hcd);
fail_disable:
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
return ret;
}
@@ -234,6 +235,7 @@ static void ohci_hcd_nxp_remove(struct platform_device *pdev)
usb_remove_hcd(hcd);
ohci_nxp_stop_hc();
usb_put_hcd(hcd);
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122927-swiftly-press-a51f@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Thu, 18 Dec 2025 16:35:17 +0100
Subject: [PATCH] usb: ohci-nxp: fix device leak on probe failure
Make sure to drop the reference taken when looking up the PHY I2C device
during probe on probe failure (e.g. probe deferral) and on driver
unbind.
Fixes: 73108aa90cbf ("USB: ohci-nxp: Use isp1301 driver")
Cc: stable(a)vger.kernel.org # 3.5
Reported-by: Ma Ke <make24(a)iscas.ac.cn>
Link: https://lore.kernel.org/lkml/20251117013428.21840-1-make24@iscas.ac.cn/
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Vladimir Zapolskiy <vz(a)mleia.com>
Link: https://patch.msgid.link/20251218153519.19453-4-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/ohci-nxp.c b/drivers/usb/host/ohci-nxp.c
index 24d5a1dc5056..509ca7d8d513 100644
--- a/drivers/usb/host/ohci-nxp.c
+++ b/drivers/usb/host/ohci-nxp.c
@@ -223,6 +223,7 @@ static int ohci_hcd_nxp_probe(struct platform_device *pdev)
fail_resource:
usb_put_hcd(hcd);
fail_disable:
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
return ret;
}
@@ -234,6 +235,7 @@ static void ohci_hcd_nxp_remove(struct platform_device *pdev)
usb_remove_hcd(hcd);
ohci_nxp_stop_hc();
usb_put_hcd(hcd);
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122926-trifocals-slept-6573@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Thu, 18 Dec 2025 16:35:17 +0100
Subject: [PATCH] usb: ohci-nxp: fix device leak on probe failure
Make sure to drop the reference taken when looking up the PHY I2C device
during probe on probe failure (e.g. probe deferral) and on driver
unbind.
Fixes: 73108aa90cbf ("USB: ohci-nxp: Use isp1301 driver")
Cc: stable(a)vger.kernel.org # 3.5
Reported-by: Ma Ke <make24(a)iscas.ac.cn>
Link: https://lore.kernel.org/lkml/20251117013428.21840-1-make24@iscas.ac.cn/
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Vladimir Zapolskiy <vz(a)mleia.com>
Link: https://patch.msgid.link/20251218153519.19453-4-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/ohci-nxp.c b/drivers/usb/host/ohci-nxp.c
index 24d5a1dc5056..509ca7d8d513 100644
--- a/drivers/usb/host/ohci-nxp.c
+++ b/drivers/usb/host/ohci-nxp.c
@@ -223,6 +223,7 @@ static int ohci_hcd_nxp_probe(struct platform_device *pdev)
fail_resource:
usb_put_hcd(hcd);
fail_disable:
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
return ret;
}
@@ -234,6 +235,7 @@ static void ohci_hcd_nxp_remove(struct platform_device *pdev)
usb_remove_hcd(hcd);
ohci_nxp_stop_hc();
usb_put_hcd(hcd);
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
}
From: Mateusz Litwin <mateusz.litwin(a)nokia.com>
[ Upstream commit d67396c9d697041b385d70ff2fd59cb07ae167e8 ]
A race condition exists between the read loop and IRQ `complete()` call.
An interrupt could call the complete() between the inner loop and
reinit_completion(), potentially losing the completion event and causing
an unnecessary timeout. Moving reinit_completion() before the loop
prevents this. A premature signal will only result in a spurious wakeup
and another wait cycle, which is preferable to waiting for a timeout.
Signed-off-by: Mateusz Litwin <mateusz.litwin(a)nokia.com>
Link: https://patch.msgid.link/20251218-cqspi_indirect_read_improve-v2-1-39607997…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
### Summary
**Pros for backporting:**
1. ✅ Fixes a genuine race condition causing timeouts
2. ✅ Small, surgical fix (< 10 lines changed)
3. ✅ Logic is obviously correct and well-explained
4. ✅ Low risk - worst case is benign spurious wakeup
5. ✅ Reviewed by subsystem maintainer (Mark Brown)
6. ✅ No dependencies on other commits
7. ✅ Driver has been in the kernel since 2020, exists in all active
stable trees
8. ✅ From a reputable source (Nokia) dealing with real hardware
**Cons:**
- None significant. The absence of explicit Fixes:/Cc: stable tags is
expected for commits under manual review.
### Risk vs Benefit
- **Risk**: Very low - the change is simple, localized, and
mathematically correct
- **Benefit**: Prevents user-visible timeouts during SPI read operations
on affected hardware
This commit is an excellent candidate for stable backporting. It fixes a
real race condition bug with a small, obvious, and low-risk change. The
fix has been reviewed by the SPI maintainer and addresses real hardware
issues without introducing new features or significant complexity.
**YES**
drivers/spi/spi-cadence-quadspi.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index af6d050da1c8..695dbf7142d1 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -769,6 +769,7 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
readl(reg_base + CQSPI_REG_INDIRECTRD); /* Flush posted write. */
while (remaining > 0) {
+ ret = 0;
if (use_irq &&
!wait_for_completion_timeout(&cqspi->transfer_complete,
msecs_to_jiffies(CQSPI_READ_TIMEOUT_MS)))
@@ -781,6 +782,14 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
if (cqspi->slow_sram)
writel(0x0, reg_base + CQSPI_REG_IRQMASK);
+ /*
+ * Prevent lost interrupt and race condition by reinitializing early.
+ * A spurious wakeup and another wait cycle can occur here,
+ * which is preferable to waiting until timeout if interrupt is lost.
+ */
+ if (use_irq)
+ reinit_completion(&cqspi->transfer_complete);
+
bytes_to_read = cqspi_get_rd_sram_level(cqspi);
if (ret && bytes_to_read == 0) {
@@ -813,7 +822,6 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
}
if (use_irq && remaining > 0) {
- reinit_completion(&cqspi->transfer_complete);
if (cqspi->slow_sram)
writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
}
--
2.51.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 0ace3297a7301911e52d8195cb1006414897c859
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122921-regress-overhand-2900@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Fri, 5 Dec 2025 19:55:14 +0100
Subject: [PATCH] mptcp: pm: ignore unknown endpoint flags
Before this patch, the kernel was saving any flags set by the userspace,
even unknown ones. This doesn't cause critical issues because the kernel
is only looking at specific ones. But on the other hand, endpoints dumps
could tell the userspace some recent flags seem to be supported on older
kernel versions.
Instead, ignore all unknown flags when parsing them. By doing that, the
userspace can continue to set unsupported flags, but it has a way to
verify what is supported by the kernel.
Note that it sounds better to continue accepting unsupported flags not
to change the behaviour, but also that eases things on the userspace
side by adding "optional" endpoint types only supported by newer kernel
versions without having to deal with the different kernel versions.
A note for the backports: there will be conflicts in mptcp.h on older
versions not having the mentioned flags, the new line should still be
added last, and the '5' needs to be adapted to have the same value as
the last entry.
Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 04eea6d1d0a9..72a5d030154e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -40,6 +40,7 @@
#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
+#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0)
struct mptcp_info {
__u8 mptcpi_subflows;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f79..7aa42de9c47b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+ MPTCP_PM_ADDR_FLAGS_MASK;
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ffb8c27b0539dd90262d1021488e7817fae57c42
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122942-stonework-habitant-a877@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ffb8c27b0539dd90262d1021488e7817fae57c42 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Fri, 5 Dec 2025 19:55:17 +0100
Subject: [PATCH] mptcp: avoid deadlock on fallback while reinjecting
Jakub reported an MPTCP deadlock at fallback time:
WARNING: possible recursive locking detected
6.18.0-rc7-virtme #1 Not tainted
--------------------------------------------
mptcp_connect/20858 is trying to acquire lock:
ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_try_fallback+0xd8/0x280
but task is already holding lock:
ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&msk->fallback_lock);
lock(&msk->fallback_lock);
*** DEADLOCK ***
May be due to missing lock nesting notation
3 locks held by mptcp_connect/20858:
#0: ff1100001da18290 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x114/0x1bc0
#1: ff1100001db40fd0 (k-sk_lock-AF_INET#2){+.+.}-{0:0}, at: __mptcp_retrans+0x2cb/0xaa0
#2: ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0
stack backtrace:
CPU: 0 UID: 0 PID: 20858 Comm: mptcp_connect Not tainted 6.18.0-rc7-virtme #1 PREEMPT(full)
Hardware name: Bochs, BIOS Bochs 01/01/2011
Call Trace:
<TASK>
dump_stack_lvl+0x6f/0xa0
print_deadlock_bug.cold+0xc0/0xcd
validate_chain+0x2ff/0x5f0
__lock_acquire+0x34c/0x740
lock_acquire.part.0+0xbc/0x260
_raw_spin_lock_bh+0x38/0x50
__mptcp_try_fallback+0xd8/0x280
mptcp_sendmsg_frag+0x16c2/0x3050
__mptcp_retrans+0x421/0xaa0
mptcp_release_cb+0x5aa/0xa70
release_sock+0xab/0x1d0
mptcp_sendmsg+0xd5b/0x1bc0
sock_write_iter+0x281/0x4d0
new_sync_write+0x3c5/0x6f0
vfs_write+0x65e/0xbb0
ksys_write+0x17e/0x200
do_syscall_64+0xbb/0xfd0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
RIP: 0033:0x7fa5627cbc5e
Code: 4d 89 d8 e8 14 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 <c9> c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa
RSP: 002b:00007fff1fe14700 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fa5627cbc5e
RDX: 0000000000001f9c RSI: 00007fff1fe16984 RDI: 0000000000000005
RBP: 00007fff1fe14710 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff1fe16920
R13: 0000000000002000 R14: 0000000000001f9c R15: 0000000000001f9c
The packet scheduler could attempt a reinjection after receiving an
MP_FAIL and before the infinite map has been transmitted, causing a
deadlock since MPTCP needs to do the reinjection atomically from WRT
fallback.
Address the issue explicitly avoiding the reinjection in the critical
scenario. Note that this is the only fallback critical section that
could potentially send packets and hit the double-lock.
Reported-by: Jakub Kicinski <kuba(a)kernel.org>
Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/mptcp-dbg/results/412720/1-m…
Fixes: f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-4-9e4781…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d8a7f7029164..9b1fafd87cb9 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2769,10 +2769,13 @@ static void __mptcp_retrans(struct sock *sk)
/*
* make the whole retrans decision, xmit, disallow
- * fallback atomic
+ * fallback atomic, note that we can't retrans even
+ * when an infinite fallback is in progress, i.e. new
+ * subflows are disallowed.
*/
spin_lock_bh(&msk->fallback_lock);
- if (__mptcp_check_fallback(msk)) {
+ if (__mptcp_check_fallback(msk) ||
+ !msk->allow_subflows) {
spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
goto clear_scheduled;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122926-cushy-unstylish-3577@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b4c61e542faf8c9131d69ecfc3ad6de96d1b2ab8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Thu, 18 Dec 2025 16:35:17 +0100
Subject: [PATCH] usb: ohci-nxp: fix device leak on probe failure
Make sure to drop the reference taken when looking up the PHY I2C device
during probe on probe failure (e.g. probe deferral) and on driver
unbind.
Fixes: 73108aa90cbf ("USB: ohci-nxp: Use isp1301 driver")
Cc: stable(a)vger.kernel.org # 3.5
Reported-by: Ma Ke <make24(a)iscas.ac.cn>
Link: https://lore.kernel.org/lkml/20251117013428.21840-1-make24@iscas.ac.cn/
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Vladimir Zapolskiy <vz(a)mleia.com>
Link: https://patch.msgid.link/20251218153519.19453-4-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/ohci-nxp.c b/drivers/usb/host/ohci-nxp.c
index 24d5a1dc5056..509ca7d8d513 100644
--- a/drivers/usb/host/ohci-nxp.c
+++ b/drivers/usb/host/ohci-nxp.c
@@ -223,6 +223,7 @@ static int ohci_hcd_nxp_probe(struct platform_device *pdev)
fail_resource:
usb_put_hcd(hcd);
fail_disable:
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
return ret;
}
@@ -234,6 +235,7 @@ static void ohci_hcd_nxp_remove(struct platform_device *pdev)
usb_remove_hcd(hcd);
ohci_nxp_stop_hc();
usb_put_hcd(hcd);
+ put_device(&isp1301_i2c_client->dev);
isp1301_i2c_client = NULL;
}
This reverts commit ec9fd499b9c60a187ac8d6414c3c343c77d32e42.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Acked-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Tested-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-dw-rockchip.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index 8c1c92208802..ca808d8f7975 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -601,7 +601,6 @@ static int rockchip_pcie_configure_rc(struct platform_device *pdev,
pp = &rockchip->pci.pp;
pp->ops = &rockchip_pcie_host_ops;
- pp->use_linkup_irq = true;
ret = dw_pcie_host_init(pp);
if (ret) {
--
2.52.0
[ View in browser](https://7m8ue.r.ag.d.sendibm3.com/mk/mr/sh/7nVTPdZCTJDXOxg5wYo2dj3…
مجلة طُوى للعلوم الاجتماعية
===========================
مجلة علمية ، محكمة، فصلية ، مفتوحة الوصول ، تصدر عن مركز فكر للدراسات والتطوير
جاء اختيار اسم المجلة “طُوى” استلهامًا من **الوادي المقدّس طُوى**، تعبيرًا عن قدسية المعرفة وأثرها في التغيير المجتمعي، وإيمانًا بأن الفكر العلمي رسالة عميقة لا تقل قداسة عن أي رسالة إنسانية.
#### السادة أعضاء الهيئات التدريسية، الباحثون الكرام
تحية طيبة وبعد،
يسرّ **هيئة تحرير مجلة طُوى للعلوم الاجتماعية** أن تتقدّم إليكم بأطيب التحيات، متمنّين لكم دوام التوفيق والنجاح في مسيرتكم الأكاديمية.
نودّ إعلامكم بفتح باب استقبال الأبحاث العلمية للنشر في **العدد الخامس** من المجلة، وذلك ضمن إطار أكاديمي محكّم، وبرسوم رمزية قدرها **50 دولارًا أمريكيًا** فقط.
🔹 **عن المجلة**
تُعد مجلة طُوى منصة علمية محكّمة تُعنى بنشر البحوث الأصيلة في مجالات العلوم الاجتماعية ، وقد أسهمت المجلة خلال أعدادها السابقة في نشر عشرات الأبحاث لباحثين من جامعات عربية ودولية، مع الالتزام الصارم بمعايير الجودة والتحكيم العلمي.
✨ **لماذا النشر في مجلة طُوى؟**
• تحكيم علمي سريع خلال **7 أيام**
• نشر بثلاث لغات: **العربية – الإنجليزية – التركية**
• **وصول مفتوح (Open Access)** لجميع القرّاء
• ترقيم دولي **ISSN: 3104-7211**
• تخصيص **DOI** مستقل لكل مقال
• فهرسة في: Google Scholar، ResearchGate، Academia وغيرها في 20 فهرسة
• منصة إلكترونية تفاعلية لتقديم ومتابعة المخطوطات
📚 **مجالات النشر** تشمل – ولا تقتصر على:
علم الاجتماع، العلوم السياسية، الأنثروبولوجيا، علم النفس الاجتماعي، الإعلام والاتصال، القانون والمجتمع، التربية والتعليم، الاقتصاد الاجتماعي، الجندر وتمكين المرأة، العمل الاجتماعي، الهجرة واللجوء، الدراسات التنموية والبيئية، الفلسفة والتاريخ الاجتماعي.
💲 **رسوم النشر**:
50 دولارًا أمريكيًا، تشمل التحكيم العلمي، التدقيق، والإخراج الفني.
📄 **للتقديم على النشر**:
[https://7m8ue.r.ag.d.sendibm3.com/mk/cl/f/sh/7nVU1aA2nfsTSeWOzmWNvJm5IIiZEZ…
🌐 **موقع المجلة**:
[https://7m8ue.r.ag.d.sendibm3.com/mk/cl/f/sh/7nVU1aA2nfuMSB1hyLkpxg5tSfDLyV…
📧 **للتواصل عبر البريد الإلكتروني**:
[info@tuwa-journal.org](mailto:info@tuwa-journal.org)
📲 **للاستفسار الفوري عبر واتساب**:
[https://7m8ue.r.ag.d.sendibm3.com/mk/cl/f/sh/7nVU1aA2nfwFRhX0wuzI02Phd1i8iS…
نعتزّ بانضمامكم إلى شبكة باحثي مجلة طُوى، ونتطلّع لاستقبال إسهاماتكم العلمية القيّمة، ومنح أبحاثكم الانتشار الأكاديمي الذي تستحقه.
وتفضلوا بقبول فائق الاحترام والتقدير،
**هيئة تحرير مجلة طُوى للعلوم الاجتماعية**
[ Read the whole story](https://7m8ue.r.ag.d.sendibm3.com/mk/cl/f/sh/7nVU1aA2ng01QkXcu3SC4l3…
**fiker for research and development**
يمكن متابعة المجلة من خلال الاتصال الموضحة.
الموقع: [https://tuwa-journal.org/](https://tuwa-journal.org/) || البريد الإلكتروني: info(a)tuwa-journal.org
هاتف: 00905398717003
You've received it because you've subscribed to our newsletter.
[Unsubscribe](https://7m8ue.r.ag.d.sendibm3.com/mk/un/v2/sh/7nVTPdbLJ2bPbEmD…
The fbdev sysfs attributes are registered after sending the uevent for
the device creation, leaving a race window where e.g. udev rules may
not be able to access the sysfs attributes because the registration is
not done yet.
Fix this by switching to device_create_with_groups(). This also results in
a nice cleanup. After switching to device_create_with_groups() all that
is left of fb_init_device() is setting the drvdata and that can be passed
to device_create[_with_groups]() too. After which fb_init_device() can
be completely removed.
Dropping fb_init_device() + fb_cleanup_device() in turn allows removing
fb_info.class_flag as they were the only user of this field.
Fixes: 5fc830d6aca1 ("fbdev: Register sysfs groups through device_add_group")
Cc: stable(a)vger.kernel.org
Cc: Shixiong Ou <oushixiong(a)kylinos.cn>
Signed-off-by: Hans de Goede <johannes.goede(a)oss.qualcomm.com>
---
Note the fixes tag is technically wrong. This race has existed forever.
The commit I picked for the fixes tag is a dependency of this change not
the commit introducing the race. I don't believe that backporting this
back any further is useful which is why I went with this commit.
---
drivers/video/fbdev/core/fbsysfs.c | 36 +++---------------------------
include/linux/fb.h | 1 -
2 files changed, 3 insertions(+), 34 deletions(-)
diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c
index b8344c40073b..baa2bae0fb5b 100644
--- a/drivers/video/fbdev/core/fbsysfs.c
+++ b/drivers/video/fbdev/core/fbsysfs.c
@@ -12,8 +12,6 @@
#include "fb_internal.h"
-#define FB_SYSFS_FLAG_ATTR 1
-
static int activate(struct fb_info *fb_info, struct fb_var_screeninfo *var)
{
int err;
@@ -451,33 +449,7 @@ static struct attribute *fb_device_attrs[] = {
NULL,
};
-static const struct attribute_group fb_device_attr_group = {
- .attrs = fb_device_attrs,
-};
-
-static int fb_init_device(struct fb_info *fb_info)
-{
- int ret;
-
- dev_set_drvdata(fb_info->dev, fb_info);
-
- fb_info->class_flag |= FB_SYSFS_FLAG_ATTR;
-
- ret = device_add_group(fb_info->dev, &fb_device_attr_group);
- if (ret)
- fb_info->class_flag &= ~FB_SYSFS_FLAG_ATTR;
-
- return 0;
-}
-
-static void fb_cleanup_device(struct fb_info *fb_info)
-{
- if (fb_info->class_flag & FB_SYSFS_FLAG_ATTR) {
- device_remove_group(fb_info->dev, &fb_device_attr_group);
-
- fb_info->class_flag &= ~FB_SYSFS_FLAG_ATTR;
- }
-}
+ATTRIBUTE_GROUPS(fb_device);
int fb_device_create(struct fb_info *fb_info)
{
@@ -485,14 +457,13 @@ int fb_device_create(struct fb_info *fb_info)
dev_t devt = MKDEV(FB_MAJOR, node);
int ret;
- fb_info->dev = device_create(fb_class, fb_info->device, devt, NULL, "fb%d", node);
+ fb_info->dev = device_create_with_groups(fb_class, fb_info->device, devt, fb_info,
+ fb_device_groups, "fb%d", node);
if (IS_ERR(fb_info->dev)) {
/* Not fatal */
ret = PTR_ERR(fb_info->dev);
pr_warn("Unable to create device for framebuffer %d; error %d\n", node, ret);
fb_info->dev = NULL;
- } else {
- fb_init_device(fb_info);
}
return 0;
@@ -505,7 +476,6 @@ void fb_device_destroy(struct fb_info *fb_info)
if (!fb_info->dev)
return;
- fb_cleanup_device(fb_info);
device_destroy(fb_class, devt);
fb_info->dev = NULL;
}
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 05cc251035da..c3302d513546 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -497,7 +497,6 @@ struct fb_info {
#if defined(CONFIG_FB_DEVICE)
struct device *dev; /* This is this fb device */
#endif
- int class_flag; /* private sysfs flags */
#ifdef CONFIG_FB_TILEBLITTING
struct fb_tile_ops *tileops; /* Tile Blitting */
#endif
--
2.52.0
The for_each_pci_dev() loop takes a reference on each PCI device
during iteration. When returning early on pci_eisa_init() failure,
the reference on 'dev' is not released, causing a resource leak.
Add pci_dev_put(dev) before the error return to properly balance
the reference count taken by for_each_pci_dev().
Fixes: c5fb301ae83b ("EISA/PCI: Init EISA early, before PNP")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kaushlendra Kumar <kaushlendra.kumar(a)intel.com>
---
drivers/eisa/pci_eisa.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 8173e60bb808..8242e9e16ce2 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -80,8 +80,10 @@ static int __init pci_eisa_init_early(void)
for_each_pci_dev(dev)
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_EISA) {
ret = pci_eisa_init(dev);
- if (ret)
+ if (ret) {
+ pci_dev_put(dev);
return ret;
+ }
}
return 0;
--
2.34.1
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 590c03ca6a3fbb114396673314e2aa483839608b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122924-reproach-foster-4189@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 590c03ca6a3fbb114396673314e2aa483839608b Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16(a)zte.com.cn>
Date: Tue, 7 Oct 2025 18:28:21 +0800
Subject: [PATCH] mm/ksm: fix exec/fork inheritance support for prctl
Patch series "ksm: fix exec/fork inheritance", v2.
This series fixes exec/fork inheritance. See the detailed description of
the issue below.
This patch (of 2):
Background
==========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process")
introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by
prctl() so that the process's VMAs are forcibly scanned by ksmd.
Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve().
Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl")
fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY
by adding the mm_slot to ksm_mm_head in __bprm_mm_init().
Problem
=======
In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY
during exec/fork can fail. For example, when the scanning frequency of
ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may
still fail to pass it to the newly exec'd process. This happens because
ksm_execve() is executed too early in the do_execve flow (prematurely
adding the new mm_struct to the ksm_mm_slot list).
As a result, before do_execve completes, ksmd may have already performed a
scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus
clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program
executes, the flag MMF_VM_MERGE_ANY inheritance missed.
Root reason
===========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear
the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs.
Solution
========
Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE
VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot
list, and its process has not yet officially started running or has not
yet performed mmap/brk to allocate anonymous VMAS.
Secondly, recheck MMF_VM_MERGEABLE again if a process takes
MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list
again.
Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn
Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn
Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process")
Signed-off-by: xu xin <xu.xin16(a)zte.com.cn>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Wang Yaxin <wang.yaxin(a)zte.com.cn>
Cc: Yang Yang <yang.yang29(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 067538fc4d58..c982694c987b 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -17,7 +17,7 @@
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, vm_flags_t *vm_flags);
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
@@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_struct *mm);
#else /* !CONFIG_KSM */
-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
const struct file *file, vm_flags_t vm_flags)
{
return vm_flags;
diff --git a/mm/ksm.c b/mm/ksm.c
index cdefba633856..4f672f4f2140 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2712,8 +2712,14 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
spin_unlock(&ksm_mmlist_lock);
mm_slot_free(mm_slot_cache, mm_slot);
+ /*
+ * Only clear MMF_VM_MERGEABLE. We must not clear
+ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process,
+ * perhaps their mm_struct has just been added to ksm_mm_slot
+ * list, and its process has not yet officially started running
+ * or has not yet performed mmap/brk to allocate anonymous VMAS.
+ */
mm_flags_clear(MMF_VM_MERGEABLE, mm);
- mm_flags_clear(MMF_VM_MERGE_ANY, mm);
mmap_read_unlock(mm);
mmdrop(mm);
} else {
@@ -2831,12 +2837,20 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
*
* Returns: @vm_flags possibly updated to mark mergeable.
*/
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags)
{
if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
- __ksm_should_add_vma(file, vm_flags))
+ __ksm_should_add_vma(file, vm_flags)) {
vm_flags |= VM_MERGEABLE;
+ /*
+ * Generally, the flags here always include MMF_VM_MERGEABLE.
+ * However, in rare cases, this flag may be cleared by ksmd who
+ * scans a cycle without finding any mergeable vma.
+ */
+ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm)))
+ __ksm_enter(mm);
+ }
return vm_flags;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 590c03ca6a3fbb114396673314e2aa483839608b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122925-designed-overture-2e7d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 590c03ca6a3fbb114396673314e2aa483839608b Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16(a)zte.com.cn>
Date: Tue, 7 Oct 2025 18:28:21 +0800
Subject: [PATCH] mm/ksm: fix exec/fork inheritance support for prctl
Patch series "ksm: fix exec/fork inheritance", v2.
This series fixes exec/fork inheritance. See the detailed description of
the issue below.
This patch (of 2):
Background
==========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process")
introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by
prctl() so that the process's VMAs are forcibly scanned by ksmd.
Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve().
Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl")
fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY
by adding the mm_slot to ksm_mm_head in __bprm_mm_init().
Problem
=======
In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY
during exec/fork can fail. For example, when the scanning frequency of
ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may
still fail to pass it to the newly exec'd process. This happens because
ksm_execve() is executed too early in the do_execve flow (prematurely
adding the new mm_struct to the ksm_mm_slot list).
As a result, before do_execve completes, ksmd may have already performed a
scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus
clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program
executes, the flag MMF_VM_MERGE_ANY inheritance missed.
Root reason
===========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear
the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs.
Solution
========
Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE
VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot
list, and its process has not yet officially started running or has not
yet performed mmap/brk to allocate anonymous VMAS.
Secondly, recheck MMF_VM_MERGEABLE again if a process takes
MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list
again.
Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn
Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn
Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process")
Signed-off-by: xu xin <xu.xin16(a)zte.com.cn>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Wang Yaxin <wang.yaxin(a)zte.com.cn>
Cc: Yang Yang <yang.yang29(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 067538fc4d58..c982694c987b 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -17,7 +17,7 @@
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, vm_flags_t *vm_flags);
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
@@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_struct *mm);
#else /* !CONFIG_KSM */
-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
const struct file *file, vm_flags_t vm_flags)
{
return vm_flags;
diff --git a/mm/ksm.c b/mm/ksm.c
index cdefba633856..4f672f4f2140 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2712,8 +2712,14 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
spin_unlock(&ksm_mmlist_lock);
mm_slot_free(mm_slot_cache, mm_slot);
+ /*
+ * Only clear MMF_VM_MERGEABLE. We must not clear
+ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process,
+ * perhaps their mm_struct has just been added to ksm_mm_slot
+ * list, and its process has not yet officially started running
+ * or has not yet performed mmap/brk to allocate anonymous VMAS.
+ */
mm_flags_clear(MMF_VM_MERGEABLE, mm);
- mm_flags_clear(MMF_VM_MERGE_ANY, mm);
mmap_read_unlock(mm);
mmdrop(mm);
} else {
@@ -2831,12 +2837,20 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
*
* Returns: @vm_flags possibly updated to mark mergeable.
*/
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags)
{
if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
- __ksm_should_add_vma(file, vm_flags))
+ __ksm_should_add_vma(file, vm_flags)) {
vm_flags |= VM_MERGEABLE;
+ /*
+ * Generally, the flags here always include MMF_VM_MERGEABLE.
+ * However, in rare cases, this flag may be cleared by ksmd who
+ * scans a cycle without finding any mergeable vma.
+ */
+ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm)))
+ __ksm_enter(mm);
+ }
return vm_flags;
}
The patch below does not apply to the 6.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.18.y
git checkout FETCH_HEAD
git cherry-pick -x 4012d78562193ef5eb613bad4b0c0fa187637cfe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122915-kitchen-june-49ec@gregkh' --subject-prefix 'PATCH 6.18.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4012d78562193ef5eb613bad4b0c0fa187637cfe Mon Sep 17 00:00:00 2001
From: Junbeom Yeom <junbeom.yeom(a)samsung.com>
Date: Fri, 19 Dec 2025 21:40:31 +0900
Subject: [PATCH] erofs: fix unexpected EIO under memory pressure
erofs readahead could fail with ENOMEM under the memory pressure because
it tries to alloc_page with GFP_NOWAIT | GFP_NORETRY, while GFP_KERNEL
for a regular read. And if readahead fails (with non-uptodate folios),
the original request will then fall back to synchronous read, and
`.read_folio()` should return appropriate errnos.
However, in scenarios where readahead and read operations compete,
read operation could return an unintended EIO because of an incorrect
error propagation.
To resolve this, this patch modifies the behavior so that, when the
PCL is for read(which means pcl.besteffort is true), it attempts actual
decompression instead of propagating the privios error except initial EIO.
- Page size: 4K
- The original size of FileA: 16K
- Compress-ratio per PCL: 50% (Uncompressed 8K -> Compressed 4K)
[page0, page1] [page2, page3]
[PCL0]---------[PCL1]
- functions declaration:
. pread(fd, buf, count, offset)
. readahead(fd, offset, count)
- Thread A tries to read the last 4K
- Thread B tries to do readahead 8K from 4K
- RA, besteffort == false
- R, besteffort == true
<process A> <process B>
pread(FileA, buf, 4K, 12K)
do readahead(page3) // failed with ENOMEM
wait_lock(page3)
if (!uptodate(page3))
goto do_read
readahead(FileA, 4K, 8K)
// Here create PCL-chain like below:
// [null, page1] [page2, null]
// [PCL0:RA]-----[PCL1:RA]
...
do read(page3) // found [PCL1:RA] and add page3 into it,
// and then, change PCL1 from RA to R
...
// Now, PCL-chain is as below:
// [null, page1] [page2, page3]
// [PCL0:RA]-----[PCL1:R]
// try to decompress PCL-chain...
z_erofs_decompress_queue
err = 0;
// failed with ENOMEM, so page 1
// only for RA will not be uptodated.
// it's okay.
err = decompress([PCL0:RA], err)
// However, ENOMEM propagated to next
// PCL, even though PCL is not only
// for RA but also for R. As a result,
// it just failed with ENOMEM without
// trying any decompression, so page2
// and page3 will not be uptodated.
** BUG HERE ** --> err = decompress([PCL1:R], err)
return err as ENOMEM
...
wait_lock(page3)
if (!uptodate(page3))
return EIO <-- Return an unexpected EIO!
...
Fixes: 2349d2fa02db ("erofs: sunset unneeded NOFAILs")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jaewook Kim <jw5454.kim(a)samsung.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Junbeom Yeom <junbeom.yeom(a)samsung.com>
Reviewed-by: Gao Xiang <hsiangkao(a)linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao(a)linux.alibaba.com>
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 65da21504632..3d31f7840ca0 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1262,7 +1262,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
return err;
}
-static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
+static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, bool eio)
{
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
@@ -1270,7 +1270,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
const struct z_erofs_decompressor *alg =
z_erofs_decomp[pcl->algorithmformat];
bool try_free = true;
- int i, j, jtop, err2;
+ int i, j, jtop, err2, err = eio ? -EIO : 0;
struct page *page;
bool overlapped;
const char *reason;
@@ -1413,12 +1413,12 @@ static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
.pcl = io->head,
};
struct z_erofs_pcluster *next;
- int err = io->eio ? -EIO : 0;
+ int err = 0;
for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) {
DBG_BUGON(!be.pcl);
next = READ_ONCE(be.pcl->next);
- err = z_erofs_decompress_pcluster(&be, err) ?: err;
+ err = z_erofs_decompress_pcluster(&be, io->eio) ?: err;
}
return err;
}
A new warning in Clang 22 [1] complains that @clidr passed to
get_clidr_el1() is an uninitialized const pointer. get_clidr_el1()
doesn't really care since it casts away the const-ness anyways -- it is
a false positive.
| ../arch/arm64/kvm/sys_regs.c:2838:23: warning: variable 'clidr' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer]
| 2838 | get_clidr_el1(NULL, &clidr); /* Ugly... */
| | ^~~~~
This patch isn't needed for anything past 6.1 as this code section was
reworked in Commit 7af0c2534f4c ("KVM: arm64: Normalize cache
configuration"). Since there is no upstream equivalent, this patch just
needs to be applied to 5.15.
Disable this warning for sys_regs.o with an iron fist as it doesn't make
sense to waste maintainer's time or potentially break builds by
backporting large changelists from 6.2+.
Cc: stable(a)vger.kernel.org
Fixes: 7c8c5e6a9101e ("arm64: KVM: system register handling")
Link: https://github.com/llvm/llvm-project/commit/00dacf8c22f065cb52efb14cd091d44… [1]
Reviewed-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Justin Stitt <justinstitt(a)google.com>
---
Resending this with Nathan's RB tag, an updated commit log and better
recipients from checkpatch.pl.
I'm also sending a similar patch resend for 6.1.
---
arch/arm64/kvm/Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 989bb5dad2c8..109cca425d3e 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -25,3 +25,6 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
+
+# Work around a false positive Clang 22 -Wuninitialized-const-pointer warning
+CFLAGS_sys_regs.o := $(call cc-disable-warning, uninitialized-const-pointer)
---
base-commit: 8bb7eca972ad531c9b149c0a51ab43a417385813
change-id: 20250728-b4-stable-disable-uninit-ptr-warn-5-15-c0c9db3df206
Best regards,
--
Justin Stitt <justinstitt(a)google.com>
A new warning in Clang 22 [1] complains that @clidr passed to
get_clidr_el1() is an uninitialized const pointer. get_clidr_el1()
doesn't really care since it casts away the const-ness anyways -- it is
a false positive.
This patch isn't needed for anything past 6.1 as this code section was
reworked in Commit 7af0c2534f4c ("KVM: arm64: Normalize cache
configuration") which incidentally removed the aforementioned warning.
Since there is no upstream equivalent, this patch just needs to be
applied to 6.1.
Disable this warning for sys_regs.o instead of backporting the patches
from 6.2+ that modified this code area.
Cc: stable(a)vger.kernel.org
Fixes: 7c8c5e6a9101e ("arm64: KVM: system register handling")
Link: https://github.com/llvm/llvm-project/commit/00dacf8c22f065cb52efb14cd091d44… [1]
Reviewed-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Justin Stitt <justinstitt(a)google.com>
---
Changes in v2:
- disable warning for TU instead of initialising the struct
- update commit message
- Link to v1: https://lore.kernel.org/all/20250724-b4-clidr-unint-const-ptr-v1-1-67c4d620…
- Link to v1 resend (sent wrong diff, thanks Nathan): https://lore.kernel.org/all/20251204-b4-clidr-unint-const-ptr-v1-1-95161315…
---
arch/arm64/kvm/Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 5e33c2d4645a..5fdb5331bfad 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -24,6 +24,9 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
+# Work around a false positive Clang 22 -Wuninitialized-const-pointer warning
+CFLAGS_sys_regs.o := $(call cc-disable-warning, uninitialized-const-pointer)
+
always-y := hyp_constants.h hyp-constants.s
define rule_gen_hyp_constants
---
base-commit: 830b3c68c1fb1e9176028d02ef86f3cf76aa2476
change-id: 20250728-stable-disable-unit-ptr-warn-281fee82539c
Best regards,
--
Justin Stitt <justinstitt(a)google.com>
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x dae9750105cf93ac1e156ef91f4beeb53bd64777
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122958-statue-subtotal-2f1c@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dae9750105cf93ac1e156ef91f4beeb53bd64777 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111(a)xry111.site>
Date: Fri, 28 Nov 2025 15:50:32 +0800
Subject: [PATCH] gpio: loongson: Switch 2K2000/3000 GPIO to BYTE_CTRL_MODE
The manuals of 2K2000 says both BIT_CTRL_MODE and BYTE_CTRL_MODE are
supported but the latter is recommended. Also on 2K3000, per the ACPI
DSDT the GPIO controller is compatible with 2K2000, but it fails to
operate GPIOs 62 and 63 (and maybe others) using BIT_CTRL_MODE.
Using BYTE_CTRL_MODE also makes those 2K3000 GPIOs work.
Fixes: 3feb70a61740 ("gpio: loongson: add more gpio chip support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Xi Ruoyao <xry111(a)xry111.site>
Reviewed-by: Huacai Chen <chenhuacai(a)loongson.cn>
Link: https://lore.kernel.org/r/20251128075033.255821-1-xry111@xry111.site
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c
index d4e291b275f0..77d07e31366f 100644
--- a/drivers/gpio/gpio-loongson-64bit.c
+++ b/drivers/gpio/gpio-loongson-64bit.c
@@ -408,11 +408,11 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls2k2000_data0 = {
static const struct loongson_gpio_chip_data loongson_gpio_ls2k2000_data1 = {
.label = "ls2k2000_gpio",
- .mode = BIT_CTRL_MODE,
- .conf_offset = 0x0,
- .in_offset = 0x20,
- .out_offset = 0x10,
- .inten_offset = 0x30,
+ .mode = BYTE_CTRL_MODE,
+ .conf_offset = 0x800,
+ .in_offset = 0xa00,
+ .out_offset = 0x900,
+ .inten_offset = 0xb00,
};
static const struct loongson_gpio_chip_data loongson_gpio_ls2k2000_data2 = {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122959-unawake-devious-8898@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6abfe107894af7e8ce3a2e120c619d81ee764ad5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122936-jaunt-sliding-0a13@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6abfe107894af7e8ce3a2e120c619d81ee764ad5 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10(a)huawei.com>
Date: Mon, 3 Nov 2025 09:01:23 +0800
Subject: [PATCH] jbd2: fix the inconsistency between checksum and data in
memory for journal sb
Copying the file system while it is mounted as read-only results in
a mount failure:
[~]# mkfs.ext4 -F /dev/sdc
[~]# mount /dev/sdc -o ro /mnt/test
[~]# dd if=/dev/sdc of=/dev/sda bs=1M
[~]# mount /dev/sda /mnt/test1
[ 1094.849826] JBD2: journal checksum error
[ 1094.850927] EXT4-fs (sda): Could not load journal inode
mount: mount /dev/sda on /mnt/test1 failed: Bad message
The process described above is just an abstracted way I came up with to
reproduce the issue. In the actual scenario, the file system was mounted
read-only and then copied while it was still mounted. It was found that
the mount operation failed. The user intended to verify the data or use
it as a backup, and this action was performed during a version upgrade.
Above issue may happen as follows:
ext4_fill_super
set_journal_csum_feature_set(sb)
if (ext4_has_metadata_csum(sb))
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
if (test_opt(sb, JOURNAL_CHECKSUM)
jbd2_journal_set_features(sbi->s_journal, compat, 0, incompat);
lock_buffer(journal->j_sb_buffer);
sb->s_feature_incompat |= cpu_to_be32(incompat);
//The data in the journal sb was modified, but the checksum was not
updated, so the data remaining in memory has a mismatch between the
data and the checksum.
unlock_buffer(journal->j_sb_buffer);
In this case, the journal sb copied over is in a state where the checksum
and data are inconsistent, so mounting fails.
To solve the above issue, update the checksum in memory after modifying
the journal sb.
Fixes: 4fd5ea43bc11 ("jbd2: checksum journal superblock")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251103010123.3753631-1-yebin(a)huaweicloud.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2fe1786a8f1b..c973162d5b31 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2354,6 +2354,12 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
+ /*
+ * Update the checksum now so that it is valid even for read-only
+ * filesystems where jbd2_write_superblock() doesn't get called.
+ */
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(sb);
unlock_buffer(journal->j_sb_buffer);
jbd2_journal_init_transaction_limits(journal);
@@ -2383,9 +2389,17 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
+ lock_buffer(journal->j_sb_buffer);
sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
+ /*
+ * Update the checksum now so that it is valid even for read-only
+ * filesystems where jbd2_write_superblock() doesn't get called.
+ */
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(sb);
+ unlock_buffer(journal->j_sb_buffer);
jbd2_journal_init_transaction_limits(journal);
}
EXPORT_SYMBOL(jbd2_journal_clear_features);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6abfe107894af7e8ce3a2e120c619d81ee764ad5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122935-turban-sweep-c818@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6abfe107894af7e8ce3a2e120c619d81ee764ad5 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10(a)huawei.com>
Date: Mon, 3 Nov 2025 09:01:23 +0800
Subject: [PATCH] jbd2: fix the inconsistency between checksum and data in
memory for journal sb
Copying the file system while it is mounted as read-only results in
a mount failure:
[~]# mkfs.ext4 -F /dev/sdc
[~]# mount /dev/sdc -o ro /mnt/test
[~]# dd if=/dev/sdc of=/dev/sda bs=1M
[~]# mount /dev/sda /mnt/test1
[ 1094.849826] JBD2: journal checksum error
[ 1094.850927] EXT4-fs (sda): Could not load journal inode
mount: mount /dev/sda on /mnt/test1 failed: Bad message
The process described above is just an abstracted way I came up with to
reproduce the issue. In the actual scenario, the file system was mounted
read-only and then copied while it was still mounted. It was found that
the mount operation failed. The user intended to verify the data or use
it as a backup, and this action was performed during a version upgrade.
Above issue may happen as follows:
ext4_fill_super
set_journal_csum_feature_set(sb)
if (ext4_has_metadata_csum(sb))
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
if (test_opt(sb, JOURNAL_CHECKSUM)
jbd2_journal_set_features(sbi->s_journal, compat, 0, incompat);
lock_buffer(journal->j_sb_buffer);
sb->s_feature_incompat |= cpu_to_be32(incompat);
//The data in the journal sb was modified, but the checksum was not
updated, so the data remaining in memory has a mismatch between the
data and the checksum.
unlock_buffer(journal->j_sb_buffer);
In this case, the journal sb copied over is in a state where the checksum
and data are inconsistent, so mounting fails.
To solve the above issue, update the checksum in memory after modifying
the journal sb.
Fixes: 4fd5ea43bc11 ("jbd2: checksum journal superblock")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251103010123.3753631-1-yebin(a)huaweicloud.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2fe1786a8f1b..c973162d5b31 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2354,6 +2354,12 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
+ /*
+ * Update the checksum now so that it is valid even for read-only
+ * filesystems where jbd2_write_superblock() doesn't get called.
+ */
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(sb);
unlock_buffer(journal->j_sb_buffer);
jbd2_journal_init_transaction_limits(journal);
@@ -2383,9 +2389,17 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
+ lock_buffer(journal->j_sb_buffer);
sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
+ /*
+ * Update the checksum now so that it is valid even for read-only
+ * filesystems where jbd2_write_superblock() doesn't get called.
+ */
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(sb);
+ unlock_buffer(journal->j_sb_buffer);
jbd2_journal_init_transaction_limits(journal);
}
EXPORT_SYMBOL(jbd2_journal_clear_features);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122957-crevice-busily-7e0e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122956-dropkick-outlast-5a49@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6abfe107894af7e8ce3a2e120c619d81ee764ad5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122934-exclude-sevenfold-d418@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6abfe107894af7e8ce3a2e120c619d81ee764ad5 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10(a)huawei.com>
Date: Mon, 3 Nov 2025 09:01:23 +0800
Subject: [PATCH] jbd2: fix the inconsistency between checksum and data in
memory for journal sb
Copying the file system while it is mounted as read-only results in
a mount failure:
[~]# mkfs.ext4 -F /dev/sdc
[~]# mount /dev/sdc -o ro /mnt/test
[~]# dd if=/dev/sdc of=/dev/sda bs=1M
[~]# mount /dev/sda /mnt/test1
[ 1094.849826] JBD2: journal checksum error
[ 1094.850927] EXT4-fs (sda): Could not load journal inode
mount: mount /dev/sda on /mnt/test1 failed: Bad message
The process described above is just an abstracted way I came up with to
reproduce the issue. In the actual scenario, the file system was mounted
read-only and then copied while it was still mounted. It was found that
the mount operation failed. The user intended to verify the data or use
it as a backup, and this action was performed during a version upgrade.
Above issue may happen as follows:
ext4_fill_super
set_journal_csum_feature_set(sb)
if (ext4_has_metadata_csum(sb))
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
if (test_opt(sb, JOURNAL_CHECKSUM)
jbd2_journal_set_features(sbi->s_journal, compat, 0, incompat);
lock_buffer(journal->j_sb_buffer);
sb->s_feature_incompat |= cpu_to_be32(incompat);
//The data in the journal sb was modified, but the checksum was not
updated, so the data remaining in memory has a mismatch between the
data and the checksum.
unlock_buffer(journal->j_sb_buffer);
In this case, the journal sb copied over is in a state where the checksum
and data are inconsistent, so mounting fails.
To solve the above issue, update the checksum in memory after modifying
the journal sb.
Fixes: 4fd5ea43bc11 ("jbd2: checksum journal superblock")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251103010123.3753631-1-yebin(a)huaweicloud.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2fe1786a8f1b..c973162d5b31 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2354,6 +2354,12 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
+ /*
+ * Update the checksum now so that it is valid even for read-only
+ * filesystems where jbd2_write_superblock() doesn't get called.
+ */
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(sb);
unlock_buffer(journal->j_sb_buffer);
jbd2_journal_init_transaction_limits(journal);
@@ -2383,9 +2389,17 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
+ lock_buffer(journal->j_sb_buffer);
sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
+ /*
+ * Update the checksum now so that it is valid even for read-only
+ * filesystems where jbd2_write_superblock() doesn't get called.
+ */
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(sb);
+ unlock_buffer(journal->j_sb_buffer);
jbd2_journal_init_transaction_limits(journal);
}
EXPORT_SYMBOL(jbd2_journal_clear_features);