When dwc3_gadget_soft_disconnect() fails, dwc3_suspend_common() keeps
going with the suspend, resulting in a period where the power domain is
off, but the gadget driver remains connected. Within this time frame,
invoking vbus_event_work() will cause an error as it attempts to access
DWC3 registers for endpoint disabling after the power domain has been
completely shut down.
Abort the suspend sequence when dwc3_gadget_suspend() cannot halt the
controller and proceeds with a soft connect.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
CC: stable(a)vger.kernel.org
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
---
Kernel panic - not syncing: Asynchronous SError Interrupt
Workqueue: events vbus_event_work
Call trace:
dump_backtrace+0xf4/0x118
show_stack+0x18/0x24
dump_stack_lvl+0x60/0x7c
dump_stack+0x18/0x3c
panic+0x16c/0x390
nmi_panic+0xa4/0xa8
arm64_serror_panic+0x6c/0x94
do_serror+0xc4/0xd0
el1h_64_error_handler+0x34/0x48
el1h_64_error+0x68/0x6c
readl+0x4c/0x8c
__dwc3_gadget_ep_disable+0x48/0x230
dwc3_gadget_ep_disable+0x50/0xc0
usb_ep_disable+0x44/0xe4
ffs_func_eps_disable+0x64/0xc8
ffs_func_set_alt+0x74/0x368
ffs_func_disable+0x18/0x28
composite_disconnect+0x90/0xec
configfs_composite_disconnect+0x64/0x88
usb_gadget_disconnect_locked+0xc0/0x168
vbus_event_work+0x3c/0x58
process_one_work+0x1e4/0x43c
worker_thread+0x25c/0x430
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20
---
Changelog:
v5:
- add the Acked-by tag
v4:
- correct the mistake where semicolon was forgotten
- return -EAGAIN upon dwc3_gadget_suspend() failure
v3:
- change the Fixes tag
v2:
- move declarations in separate lines
- add the Fixes tag
---
drivers/usb/dwc3/core.c | 9 +++++++--
drivers/usb/dwc3/gadget.c | 22 +++++++++-------------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 66a08b527165..f36bc933c55b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2388,6 +2388,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
u32 reg;
int i;
+ int ret;
if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) {
dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) &
@@ -2406,7 +2407,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
case DWC3_GCTL_PRTCAP_DEVICE:
if (pm_runtime_suspended(dwc->dev))
break;
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
dwc3_core_exit(dwc);
break;
@@ -2441,7 +2444,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 321361288935..b6b63b530148 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4821,26 +4821,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
int ret;
ret = dwc3_gadget_soft_disconnect(dwc);
- if (ret)
- goto err;
-
- spin_lock_irqsave(&dwc->lock, flags);
- if (dwc->gadget_driver)
- dwc3_disconnect_gadget(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
-
- return 0;
-
-err:
/*
* Attempt to reset the controller's state. Likely no
* communication can be established until the host
* performs a port reset.
*/
- if (dwc->softconnect)
+ if (ret && dwc->softconnect) {
dwc3_gadget_soft_connect(dwc);
+ return -EAGAIN;
+ }
- return ret;
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
--
2.49.0.1164.gab81da1b16-goog
It's possible for the folio to either get marked for writeback or
redirtied. Add a helper, filemap_end_dropbehind(), which guards the
folio_unmap_invalidate() call behind check for the folio being both
non-dirty and not under writeback AFTER the folio lock has been
acquired. Use this helper folio_end_dropbehind_write().
Cc: stable(a)vger.kernel.org
Reported-by: Al Viro <viro(a)zeniv.linux.org.uk>
Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes")
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
mm/filemap.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 7b90cbeb4a1a..008a55290f34 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct folio *folio)
}
EXPORT_SYMBOL(folio_wait_private_2_killable);
+static void filemap_end_dropbehind(struct folio *folio)
+{
+ struct address_space *mapping = folio->mapping;
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+
+ if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
+ folio_unmap_invalidate(mapping, folio, 0);
+}
+
/*
* If folio was marked as dropbehind, then pages should be dropped when writeback
* completes. Do that now. If we fail, it's likely because of a big folio -
@@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(struct folio *folio)
* invalidation in that case.
*/
if (in_task() && folio_trylock(folio)) {
- if (folio->mapping)
- folio_unmap_invalidate(folio->mapping, folio, 0);
+ filemap_end_dropbehind(folio);
folio_unlock(folio);
}
}
--
2.49.0
In all the MUX value for LED1 GPIO46 there is a Copy-Paste error where
the MUX value is set to LED0_MODE_MASK instead of LED1_MODE_MASK.
This wasn't notice as there were no board that made use of the
secondary PHY LED but looking at the internal Documentation the actual
value should be LED1_MODE_MASK similar to the other GPIO entry.
Fix the wrong value to apply the correct MUX configuration.
Cc: stable(a)vger.kernel.org
Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC")
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/pinctrl/mediatek/pinctrl-airoha.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c
index b97b28ebb37a..8ef7f88477aa 100644
--- a/drivers/pinctrl/mediatek/pinctrl-airoha.c
+++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c
@@ -1752,8 +1752,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
@@ -1816,8 +1816,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
@@ -1880,8 +1880,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
@@ -1944,8 +1944,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
--
2.48.1
Commit e77aff5528a18 ("binderfs: fix use-after-free in binder_devices")
addressed a use-after-free where devices could be released without first
being removed from the binder_devices list. However, there is a similar
path in binder_free_proc() that was missed:
==================================================================
BUG: KASAN: slab-use-after-free in binder_remove_device+0xd4/0x100
Write of size 8 at addr ffff0000c773b900 by task umount/467
CPU: 12 UID: 0 PID: 467 Comm: umount Not tainted 6.15.0-rc7-00138-g57483a362741 #9 PREEMPT
Hardware name: linux,dummy-virt (DT)
Call trace:
binder_remove_device+0xd4/0x100
binderfs_evict_inode+0x230/0x2f0
evict+0x25c/0x5dc
iput+0x304/0x480
dentry_unlink_inode+0x208/0x46c
__dentry_kill+0x154/0x530
[...]
Allocated by task 463:
__kmalloc_cache_noprof+0x13c/0x324
binderfs_binder_device_create.isra.0+0x138/0xa60
binder_ctl_ioctl+0x1ac/0x230
[...]
Freed by task 215:
kfree+0x184/0x31c
binder_proc_dec_tmpref+0x33c/0x4ac
binder_deferred_func+0xc10/0x1108
process_one_work+0x520/0xba4
[...]
==================================================================
Call binder_remove_device() within binder_free_proc() to ensure the
device is removed from the binder_devices list before being kfreed.
Cc: stable(a)vger.kernel.org
Fixes: 12d909cac1e1 ("binderfs: add new binder devices to binder_devices")
Reported-by: syzbot+4af454407ec393de51d6(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=4af454407ec393de51d6
Tested-by: syzbot+4af454407ec393de51d6(a)syzkaller.appspotmail.com
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
---
drivers/android/binder.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 682bbe4ad550..c463ca4a8fff 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5241,6 +5241,7 @@ static void binder_free_proc(struct binder_proc *proc)
__func__, proc->outstanding_txns);
device = container_of(proc->context, struct binder_device, context);
if (refcount_dec_and_test(&device->ref)) {
+ binder_remove_device(device);
kfree(proc->context->name);
kfree(device);
}
--
2.49.0.1151.ga128411c76-goog
The patch titled
Subject: fs/dax: fix "don't skip locked entries when scanning entries"
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fs-dax-fix-dont-skip-locked-entries-when-scanning-entries.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Alistair Popple <apopple(a)nvidia.com>
Subject: fs/dax: fix "don't skip locked entries when scanning entries"
Date: Fri, 23 May 2025 14:37:49 +1000
Commit 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning
entries") introduced a new function, wait_entry_unlocked_exclusive(),
which waits for the current entry to become unlocked without advancing the
XArray iterator state.
Waiting for the entry to become unlocked requires dropping the XArray
lock. This requires calling xas_pause() prior to dropping the lock which
leaves the xas in a suitable state for the next iteration. However this
has the side-effect of advancing the xas state to the next index.
Normally this isn't an issue because xas_for_each() contains code to
detect this state and thus avoid advancing the index a second time on the
next loop iteration.
However both callers of and wait_entry_unlocked_exclusive() itself
subsequently use the xas state to reload the entry. As xas_pause()
updated the state to the next index this will cause the current entry
which is being waited on to be skipped. This caused the following warning
to fire intermittently when running xftest generic/068 on an XFS
filesystem with FS DAX enabled:
[ 35.067397] ------------[ cut here ]------------
[ 35.068229] WARNING: CPU: 21 PID: 1640 at mm/truncate.c:89 truncate_folio_batch_exceptionals+0xd8/0x1e0
[ 35.069717] Modules linked in: nd_pmem dax_pmem nd_btt nd_e820 libnvdimm
[ 35.071006] CPU: 21 UID: 0 PID: 1640 Comm: fstest Not tainted 6.15.0-rc7+ #77 PREEMPT(voluntary)
[ 35.072613] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/204
[ 35.074845] RIP: 0010:truncate_folio_batch_exceptionals+0xd8/0x1e0
[ 35.075962] Code: a1 00 00 00 f6 47 0d 20 0f 84 97 00 00 00 4c 63 e8 41 39 c4 7f 0b eb 61 49 83 c5 01 45 39 ec 7e 58 42 f68
[ 35.079522] RSP: 0018:ffffb04e426c7850 EFLAGS: 00010202
[ 35.080359] RAX: 0000000000000000 RBX: ffff9d21e3481908 RCX: ffffb04e426c77f4
[ 35.081477] RDX: ffffb04e426c79e8 RSI: ffffb04e426c79e0 RDI: ffff9d21e34816e8
[ 35.082590] RBP: ffffb04e426c79e0 R08: 0000000000000001 R09: 0000000000000003
[ 35.083733] R10: 0000000000000000 R11: 822b53c0f7a49868 R12: 000000000000001f
[ 35.084850] R13: 0000000000000000 R14: ffffb04e426c78e8 R15: fffffffffffffffe
[ 35.085953] FS: 00007f9134c87740(0000) GS:ffff9d22abba0000(0000) knlGS:0000000000000000
[ 35.087346] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 35.088244] CR2: 00007f9134c86000 CR3: 000000040afff000 CR4: 00000000000006f0
[ 35.089354] Call Trace:
[ 35.089749] <TASK>
[ 35.090168] truncate_inode_pages_range+0xfc/0x4d0
[ 35.091078] truncate_pagecache+0x47/0x60
[ 35.091735] xfs_setattr_size+0xc7/0x3e0
[ 35.092648] xfs_vn_setattr+0x1ea/0x270
[ 35.093437] notify_change+0x1f4/0x510
[ 35.094219] ? do_truncate+0x97/0xe0
[ 35.094879] do_truncate+0x97/0xe0
[ 35.095640] path_openat+0xabd/0xca0
[ 35.096278] do_filp_open+0xd7/0x190
[ 35.096860] do_sys_openat2+0x8a/0xe0
[ 35.097459] __x64_sys_openat+0x6d/0xa0
[ 35.098076] do_syscall_64+0xbb/0x1d0
[ 35.098647] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 35.099444] RIP: 0033:0x7f9134d81fc1
[ 35.100033] Code: 75 57 89 f0 25 00 00 41 00 3d 00 00 41 00 74 49 80 3d 2a 26 0e 00 00 74 6d 89 da 48 89 ee bf 9c ff ff ff5
[ 35.102993] RSP: 002b:00007ffcd41e0d10 EFLAGS: 00000202 ORIG_RAX: 0000000000000101
[ 35.104263] RAX: ffffffffffffffda RBX: 0000000000000242 RCX: 00007f9134d81fc1
[ 35.105452] RDX: 0000000000000242 RSI: 00007ffcd41e1200 RDI: 00000000ffffff9c
[ 35.106663] RBP: 00007ffcd41e1200 R08: 0000000000000000 R09: 0000000000000064
[ 35.107923] R10: 00000000000001a4 R11: 0000000000000202 R12: 0000000000000066
[ 35.109112] R13: 0000000000100000 R14: 0000000000100000 R15: 0000000000000400
[ 35.110357] </TASK>
[ 35.110769] irq event stamp: 8415587
[ 35.111486] hardirqs last enabled at (8415599): [<ffffffff8d74b562>] __up_console_sem+0x52/0x60
[ 35.113067] hardirqs last disabled at (8415610): [<ffffffff8d74b547>] __up_console_sem+0x37/0x60
[ 35.114575] softirqs last enabled at (8415300): [<ffffffff8d6ac625>] handle_softirqs+0x315/0x3f0
[ 35.115933] softirqs last disabled at (8415291): [<ffffffff8d6ac811>] __irq_exit_rcu+0xa1/0xc0
[ 35.117316] ---[ end trace 0000000000000000 ]---
Fix this by using xas_reset() instead, which is equivalent in
implementation to xas_pause() but does not advance the XArray state.
Link: https://lkml.kernel.org/r/20250523043749.1460780-1-apopple@nvidia.com
Fixes: 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning entries")
Signed-off-by: Alistair Popple <apopple(a)nvidia.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: Alison Schofield <alison.schofield(a)intel.com>
Cc: Matthew Wilcow (Oracle) <willy(a)infradead.org>
Cc: Balbir Singh <balbirs(a)nvidia.com>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Ted Ts'o <tytso(a)mit.edu>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/dax.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/dax.c~fs-dax-fix-dont-skip-locked-entries-when-scanning-entries
+++ a/fs/dax.c
@@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusi
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
- xas_pause(xas);
+ xas_reset(xas);
xas_unlock_irq(xas);
schedule();
finish_wait(wq, &ewait.wait);
_
Patches currently in -mm which might be from apopple(a)nvidia.com are
fs-dax-fix-dont-skip-locked-entries-when-scanning-entries.patch
The patch titled
Subject: mm/khugepaged: fix race with folio split/free using temporary reference
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-khugepaged-fix-race-with-folio-split-free-using-temporary-reference.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Shivank Garg <shivankg(a)amd.com>
Subject: mm/khugepaged: fix race with folio split/free using temporary reference
Date: Mon, 26 May 2025 18:28:18 +0000
hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn
calls folio_mapcount(). folio_mapcount() checks folio_test_large() before
proceeding to folio_large_mapcount(), but there is a race window where the
folio may get split/freed between these checks, triggering:
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio)
Take a temporary reference to the folio in hpage_collapse_scan_file().
This stabilizes the folio during refcount check and prevents incorrect
large folio detection due to concurrent split/free. Use helper
folio_expected_ref_count() + 1 to compare with folio_ref_count() instead
of using is_refcount_suitable().
Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com
Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value")
Signed-off-by: Shivank Garg <shivankg(a)amd.com>
Reported-by: syzbot+2b99589e33edbe9475ca(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com
Suggested-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Dev Jain <dev.jain(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Bharata B Rao <bharata(a)amd.com>
Cc: Fengwei Yin <fengwei.yin(a)intel.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mariano Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
--- a/mm/khugepaged.c~mm-khugepaged-fix-race-with-folio-split-free-using-temporary-reference
+++ a/mm/khugepaged.c
@@ -2295,6 +2295,17 @@ static int hpage_collapse_scan_file(stru
continue;
}
+ if (!folio_try_get(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ xas_reset(&xas);
+ continue;
+ }
+
if (folio_order(folio) == HPAGE_PMD_ORDER &&
folio->index == start) {
/* Maybe PMD-mapped */
@@ -2305,23 +2316,27 @@ static int hpage_collapse_scan_file(stru
* it's safe to skip LRU and refcount checks before
* returning.
*/
+ folio_put(folio);
break;
}
node = folio_nid(folio);
if (hpage_collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
+ folio_put(folio);
break;
}
cc->node_load[node]++;
if (!folio_test_lru(folio)) {
result = SCAN_PAGE_LRU;
+ folio_put(folio);
break;
}
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
result = SCAN_PAGE_COUNT;
+ folio_put(folio);
break;
}
@@ -2333,6 +2348,7 @@ static int hpage_collapse_scan_file(stru
*/
present += folio_nr_pages(folio);
+ folio_put(folio);
if (need_resched()) {
xas_pause(&xas);
_
Patches currently in -mm which might be from shivankg(a)amd.com are
mm-khugepaged-fix-race-with-folio-split-free-using-temporary-reference.patch
mm-khugepaged-clean-up-refcount-check-using-folio_expected_ref_count.patch
blk_mq_freeze_queue() never terminates if one or more bios are on the plug
list and if the block device driver defines a .submit_bio() method.
This is the case for device mapper drivers. The deadlock happens because
blk_mq_freeze_queue() waits for q_usage_counter to drop to zero, because
a queue reference is held by bios on the plug list and because the
__bio_queue_enter() call in __submit_bio() waits for the queue to be
unfrozen.
This patch fixes the following deadlock:
Workqueue: dm-51_zwplugs blk_zone_wplug_bio_work
Call trace:
__schedule+0xb08/0x1160
schedule+0x48/0xc8
__bio_queue_enter+0xcc/0x1d0
__submit_bio+0x100/0x1b0
submit_bio_noacct_nocheck+0x230/0x49c
blk_zone_wplug_bio_work+0x168/0x250
process_one_work+0x26c/0x65c
worker_thread+0x33c/0x498
kthread+0x110/0x134
ret_from_fork+0x10/0x20
Call trace:
__switch_to+0x230/0x410
__schedule+0xb08/0x1160
schedule+0x48/0xc8
blk_mq_freeze_queue_wait+0x78/0xb8
blk_mq_freeze_queue+0x90/0xa4
queue_attr_store+0x7c/0xf0
sysfs_kf_write+0x98/0xc8
kernfs_fop_write_iter+0x12c/0x1d4
vfs_write+0x340/0x3ac
ksys_write+0x78/0xe8
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Damien Le Moal <dlemoal(a)kernel.org>
Cc: Yu Kuai <yukuai1(a)huaweicloud.com>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: dd291d77cc90 ("block: Introduce zone write plugging")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
Changes compared to v1: fixed a race condition. Call bio_zone_write_plugging()
only before submitting the bio and not after it has been submitted.
block/blk-core.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index b862c66018f2..713fb3865260 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -621,6 +621,13 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
+/*
+ * Do not call bio_queue_enter() if the BIO_ZONE_WRITE_PLUGGING flag has been
+ * set because this causes blk_mq_freeze_queue() to deadlock if
+ * blk_zone_wplug_bio_work() submits a bio. Calling bio_queue_enter() for bios
+ * on the plug list is not necessary since a q_usage_counter reference is held
+ * while a bio is on the plug list.
+ */
static void __submit_bio(struct bio *bio)
{
/* If plug is not used, add new plug here to cache nsecs time. */
@@ -633,8 +640,12 @@ static void __submit_bio(struct bio *bio)
if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) {
blk_mq_submit_bio(bio);
- } else if (likely(bio_queue_enter(bio) == 0)) {
+ } else {
struct gendisk *disk = bio->bi_bdev->bd_disk;
+ bool zwp = bio_zone_write_plugging(bio);
+
+ if (unlikely(!zwp && bio_queue_enter(bio) != 0))
+ goto finish_plug;
if ((bio->bi_opf & REQ_POLLED) &&
!(disk->queue->limits.features & BLK_FEAT_POLL)) {
@@ -643,9 +654,12 @@ static void __submit_bio(struct bio *bio)
} else {
disk->fops->submit_bio(bio);
}
- blk_queue_exit(disk->queue);
+
+ if (!zwp)
+ blk_queue_exit(disk->queue);
}
+finish_plug:
blk_finish_plug(&plug);
}
Customer is reporting a really subtle issue where we get random DMAR
faults, hangs and other nasties for kernel migration jobs when stressing
stuff like s2idle/s3/s4. The explosions seems to happen somewhere
after resuming the system with splats looking something like:
PM: suspend exit
rfkill: input handler disabled
xe 0000:00:02.0: [drm] GT0: Engine reset: engine_class=bcs, logical_mask: 0x2, guc_id=0
xe 0000:00:02.0: [drm] GT0: Timedout job: seqno=24496, lrc_seqno=24496, guc_id=0, flags=0x13 in no process [-1]
xe 0000:00:02.0: [drm] GT0: Kernel-submitted job timed out
The likely cause appears to be a race between suspend cancelling the
worker that processes the free_job()'s, such that we still have pending
jobs to be freed after the cancel. Following from this, on resume the
pending_list will now contain at least one already complete job, but it
looks like we call drm_sched_resubmit_jobs(), which will then call
run_job() on everything still on the pending_list. But if the job was
already complete, then all the resources tied to the job, like the bb
itself, any memory that is being accessed, the iommu mappings etc. might
be long gone since those are usually tied to the fence signalling.
This scenario can be seen in ftrace when running a slightly modified
xe_pm (kernel was only modified to inject artificial latency into
free_job to make the race easier to hit):
xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ...
xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13
xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=1, guc_state=0x0, flags=0x4
xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=0, guc_state=0x0, flags=0x3
xe_exec_queue_stop: dev=0000:00:02.0, 1:0x1, gt=1, width=1, guc_id=1, guc_state=0x0, flags=0x3
xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=2, guc_state=0x0, flags=0x3
xe_exec_queue_resubmit: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13
xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ...
.....
xe_exec_queue_memory_cat_error: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x3, flags=0x13
So the job_run() is clearly triggered twice for the same job, even
though the first must have already signalled to completion during
suspend. We can also see a CAT error after the re-submit.
To prevent this try to call xe_sched_stop() to forcefully remove
anything on the pending_list that has already signalled, before we
re-submit.
v2:
- Make sure to re-arm the fence callbacks with sched_start().
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4856
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: William Tseng <william.tseng(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index c250ea773491..0c8fe0461df9 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -51,7 +51,9 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
{
+ drm_sched_stop(&sched->base, NULL); /* remove completed jobs */
drm_sched_resubmit_jobs(&sched->base);
+ drm_sched_start(&sched->base, 0); /* re-add fence callback for pending jobs */
}
static inline bool
--
2.49.0
Hi Greg and Sasha,
Please find attatched backports for 6.14 and 6.12 (which have -Wextra
enabled by default) to turn off a new warning from -Wextra in GCC 15 and
Clang 21, -Wunterminated-string-initialization, which is fatal when
CONFIG_WERROR is enabled. Please let me know if there are any issues or
questions.
Cheers,
Nathan
From: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
BugLink: https://bugs.launchpad.net/bugs/2111592
commit 8a7d12d674ac ("net: usb: usbnet: fix name regression") assumed
that local addresses always came from the kernel, but some devices hand
out local mac addresses so we ended up with point-to-point devices with
a mac set by the driver, renaming to eth%d when they used to be named
usb%d.
Userspace should not rely on device name, but for the sake of stability
restore the local mac address check portion of the naming exception:
point to point devices which either have no mac set by the driver or
have a local mac handed out by the driver will keep the usb%d name.
(some USB LTE modems are known to hand out a stable mac from the locally
administered range; that mac appears to be random (different for
mulitple devices) and can be reset with device-specific commands, so
while such devices would benefit from getting a OUI reserved, we have
to deal with these and might as well preserve the existing behavior
to avoid breaking fragile openwrt configurations and such on upgrade.)
Link: https://lkml.kernel.org/r/20241203130457.904325-1-asmadeus@codewreck.org
Fixes: 8a7d12d674ac ("net: usb: usbnet: fix name regression")
Cc: stable(a)vger.kernel.org
Tested-by: Ahmed Naseef <naseefkm(a)gmail.com>
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
Acked-by: Oliver Neukum <oneukum(a)suse.com>
Link: https://patch.msgid.link/20250326-usbnet_rename-v2-1-57eb21fcff26@atmark-te…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
(cherry picked from commit 2ea396448f26d0d7d66224cb56500a6789c7ed07)
Signed-off-by: Jianlin Lv <iecedge(a)gmail.com>
---
drivers/net/usb/usbnet.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 9f66c47dc58b..08cbc8e4b361 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -178,6 +178,17 @@ int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress)
}
EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr);
+static bool usbnet_needs_usb_name_format(struct usbnet *dev, struct net_device *net)
+{
+ /* Point to point devices which don't have a real MAC address
+ * (or report a fake local one) have historically used the usb%d
+ * naming. Preserve this..
+ */
+ return (dev->driver_info->flags & FLAG_POINTTOPOINT) != 0 &&
+ (is_zero_ether_addr(net->dev_addr) ||
+ is_local_ether_addr(net->dev_addr));
+}
+
static void intr_complete (struct urb *urb)
{
struct usbnet *dev = urb->context;
@@ -1766,13 +1777,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
if (status < 0)
goto out1;
- // heuristic: "usb%d" for links we know are two-host,
- // else "eth%d" when there's reasonable doubt. userspace
- // can rename the link if it knows better.
+ /* heuristic: rename to "eth%d" if we are not sure this link
+ * is two-host (these links keep "usb%d")
+ */
if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
- ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
- /* somebody touched it*/
- !is_zero_ether_addr(net->dev_addr)))
+ !usbnet_needs_usb_name_format(dev, net))
strscpy(net->name, "eth%d", sizeof(net->name));
/* WLAN devices should always be named "wlan%d" */
if ((dev->driver_info->flags & FLAG_WLAN) != 0)
--
2.34.1
在 2025/5/23 07:31, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: properly limit inline data extent according to block size
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-properly-limit-inline-data-extent-according-to.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch from all stable trees.
This is only for a debug feature, 2K block size, and it will never be
exposed to end users (only to allow people without a 64K page sized
system to test subpage routine on x86_64).
Thanks,
Qu
>
>
>
> commit a5afc96d757771c992eb3af4629a562ec52ba1dc
> Author: Qu Wenruo <wqu(a)suse.com>
> Date: Tue Feb 25 14:30:44 2025 +1030
>
> btrfs: properly limit inline data extent according to block size
>
> [ Upstream commit 23019d3e6617a8ec99a8d2f5947aa3dd8a74a1b8 ]
>
> Btrfs utilizes inline data extent for the following cases:
>
> - Regular small files
> - Symlinks
>
> And "btrfs check" detects any file extents that are too large as an
> error.
>
> It's not a problem for 4K block size, but for the incoming smaller
> block sizes (2K), it can cause problems due to bad limits:
>
> - Non-compressed inline data extents
> We do not allow a non-compressed inline data extent to be as large as
> block size.
>
> - Symlinks
> Currently the only real limit on symlinks are 4K, which can be larger
> than 2K block size.
>
> These will result btrfs-check to report too large file extents.
>
> Fix it by adding proper size checks for the above cases.
>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> Reviewed-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 9ce1270addb04..0da2611fb9c85 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -623,6 +623,10 @@ static bool can_cow_file_range_inline(struct btrfs_inode *inode,
> if (size > fs_info->sectorsize)
> return false;
>
> + /* We do not allow a non-compressed extent to be as large as block size. */
> + if (data_len >= fs_info->sectorsize)
> + return false;
> +
> /* We cannot exceed the maximum inline data size. */
> if (data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
> return false;
> @@ -8691,7 +8695,12 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
> struct extent_buffer *leaf;
>
> name_len = strlen(symname);
> - if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
> + /*
> + * Symlinks utilize uncompressed inline extent data, which should not
> + * reach block size.
> + */
> + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
> + name_len >= fs_info->sectorsize)
> return -ENAMETOOLONG;
>
> inode = new_inode(dir->i_sb);