Hi Stable,
Please provide a quote for your products:
Include:
1.Pricing (per unit)
2.Delivery cost & timeline
3.Quote expiry date
Deadline: October
Thanks!
Danny Peddinti
PathnSitu Trading
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6df8e84aa6b5b1812cc2cacd6b3f5ccbb18cda2b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102009-dominion-underfeed-6f4b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6df8e84aa6b5b1812cc2cacd6b3f5ccbb18cda2b Mon Sep 17 00:00:00 2001
From: Gui-Dong Han <hanguidong02(a)gmail.com>
Date: Wed, 8 Oct 2025 03:43:27 +0000
Subject: [PATCH] drm/amdgpu: use atomic functions with memory barriers for vm
fault info
The atomic variable vm_fault_info_updated is used to synchronize access to
adev->gmc.vm_fault_info between the interrupt handler and
get_vm_fault_info().
The default atomic functions like atomic_set() and atomic_read() do not
provide memory barriers. This allows for CPU instruction reordering,
meaning the memory accesses to vm_fault_info and the vm_fault_info_updated
flag are not guaranteed to occur in the intended order. This creates a
race condition that can lead to inconsistent or stale data being used.
The previous implementation, which used an explicit mb(), was incomplete
and inefficient. It failed to account for all potential CPU reorderings,
such as the access of vm_fault_info being reordered before the atomic_read
of the flag. This approach is also more verbose and less performant than
using the proper atomic functions with acquire/release semantics.
Fix this by switching to atomic_set_release() and atomic_read_acquire().
These functions provide the necessary acquire and release semantics,
which act as memory barriers to ensure the correct order of operations.
It is also more efficient and idiomatic than using explicit full memory
barriers.
Fixes: b97dfa27ef3a ("drm/amdgpu: save vm fault information for amdkfd")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
Signed-off-by: Felix Kuehling <felix.kuehling(a)amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 83020963dfde..a2ca9acf8c4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2329,10 +2329,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *mem)
{
- if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
- mb(); /* make sure read happened */
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 93d7ccb7d013..0e5e54d0a9a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1068,7 +1068,7 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
@@ -1290,7 +1290,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1306,8 +1306,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index c5e2a2c41e06..e1509480dfc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1183,7 +1183,7 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
@@ -1478,7 +1478,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1494,8 +1494,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5801e65206b065b0b2af032f7f1eef222aa2fd83
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102034-voltage-truck-aeff@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5801e65206b065b0b2af032f7f1eef222aa2fd83 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Date: Wed, 15 Oct 2025 09:40:15 +0100
Subject: [PATCH] drm/sched: Fix potential double free in
drm_sched_job_add_resv_dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When adding dependencies with drm_sched_job_add_dependency(), that
function consumes the fence reference both on success and failure, so in
the latter case the dma_fence_put() on the error path (xarray failed to
expand) is a double free.
Interestingly this bug appears to have been present ever since
commit ebd5f74255b9 ("drm/sched: Add dependency tracking"), since the code
back then looked like this:
drm_sched_job_add_implicit_dependencies():
...
for (i = 0; i < fence_count; i++) {
ret = drm_sched_job_add_dependency(job, fences[i]);
if (ret)
break;
}
for (; i < fence_count; i++)
dma_fence_put(fences[i]);
Which means for the failing 'i' the dma_fence_put was already a double
free. Possibly there were no users at that time, or the test cases were
insufficient to hit it.
The bug was then only noticed and fixed after
commit 9c2ba265352a ("drm/scheduler: use new iterator in drm_sched_job_add_implicit_dependencies v2")
landed, with its fixup of
commit 4eaf02d6076c ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies").
At that point it was a slightly different flavour of a double free, which
commit 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder")
noticed and attempted to fix.
But it only moved the double free from happening inside the
drm_sched_job_add_dependency(), when releasing the reference not yet
obtained, to the caller, when releasing the reference already released by
the former in the failure case.
As such it is not easy to identify the right target for the fixes tag so
lets keep it simple and just continue the chain.
While fixing we also improve the comment and explain the reason for taking
the reference and not dropping it.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Fixes: 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder")
Reported-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Closes: https://lore.kernel.org/dri-devel/aNFbXq8OeYl3QSdm@stanley.mountain/
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Rob Clark <robdclark(a)chromium.org>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Danilo Krummrich <dakr(a)kernel.org>
Cc: Philipp Stanner <phasta(a)kernel.org>
Cc: Christian König <ckoenig.leichtzumerken(a)gmail.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: stable(a)vger.kernel.org # v5.16+
Signed-off-by: Philipp Stanner <phasta(a)kernel.org>
Link: https://lore.kernel.org/r/20251015084015.6273-1-tvrtko.ursulin@igalia.com
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 46119aacb809..c39f0245e3a9 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -965,13 +965,14 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
dma_resv_assert_held(resv);
dma_resv_for_each_fence(&cursor, resv, usage, fence) {
- /* Make sure to grab an additional ref on the added fence */
- dma_fence_get(fence);
- ret = drm_sched_job_add_dependency(job, fence);
- if (ret) {
- dma_fence_put(fence);
+ /*
+ * As drm_sched_job_add_dependency always consumes the fence
+ * reference (even when it fails), and dma_resv_for_each_fence
+ * is not obtaining one, we need to grab one before calling.
+ */
+ ret = drm_sched_job_add_dependency(job, dma_fence_get(fence));
+ if (ret)
return ret;
- }
}
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1d3ad183943b38eec2acf72a0ae98e635dc8456b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025102009-dares-negligent-77e3@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d3ad183943b38eec2acf72a0ae98e635dc8456b Mon Sep 17 00:00:00 2001
From: Deepanshu Kartikey <kartikey406(a)gmail.com>
Date: Tue, 30 Sep 2025 16:58:10 +0530
Subject: [PATCH] ext4: detect invalid INLINE_DATA + EXTENTS flag combination
syzbot reported a BUG_ON in ext4_es_cache_extent() when opening a verity
file on a corrupted ext4 filesystem mounted without a journal.
The issue is that the filesystem has an inode with both the INLINE_DATA
and EXTENTS flags set:
EXT4-fs error (device loop0): ext4_cache_extents:545: inode #15:
comm syz.0.17: corrupted extent tree: lblk 0 < prev 66
Investigation revealed that the inode has both flags set:
DEBUG: inode 15 - flag=1, i_inline_off=164, has_inline=1, extents_flag=1
This is an invalid combination since an inode should have either:
- INLINE_DATA: data stored directly in the inode
- EXTENTS: data stored in extent-mapped blocks
Having both flags causes ext4_has_inline_data() to return true, skipping
extent tree validation in __ext4_iget(). The unvalidated out-of-order
extents then trigger a BUG_ON in ext4_es_cache_extent() due to integer
underflow when calculating hole sizes.
Fix this by detecting this invalid flag combination early in ext4_iget()
and rejecting the corrupted inode.
Cc: stable(a)kernel.org
Reported-and-tested-by: syzbot+038b7bf43423e132b308(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=038b7bf43423e132b308
Suggested-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Deepanshu Kartikey <kartikey406(a)gmail.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Message-ID: <20250930112810.315095-1-kartikey406(a)gmail.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f9e4ac87211e..e99306a8f47c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5319,6 +5319,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ext4_set_inode_flags(inode, true);
+ /* Detect invalid flag combination - can't have both inline data and extents */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ ext4_error_inode(inode, function, line, 0,
+ "inode has both inline data and extents flags");
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
Do not block PCI config accesses through pci_cfg_access_lock() when
executing the s390 variant of PCI error recovery: Acquire just
device_lock() instead of pci_dev_lock() as powerpc's EEH and
generig PCI AER processing do.
During error recovery testing a pair of tasks was reported to be hung:
mlx5_core 0000:00:00.1: mlx5_health_try_recover:338:(pid 5553): health recovery flow aborted, PCI reads still not working
INFO: task kmcheck:72 blocked for more than 122 seconds.
Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kmcheck state:D stack:0 pid:72 tgid:72 ppid:2 flags:0x00000000
Call Trace:
[<000000065256f030>] __schedule+0x2a0/0x590
[<000000065256f356>] schedule+0x36/0xe0
[<000000065256f572>] schedule_preempt_disabled+0x22/0x30
[<0000000652570a94>] __mutex_lock.constprop.0+0x484/0x8a8
[<000003ff800673a4>] mlx5_unload_one+0x34/0x58 [mlx5_core]
[<000003ff8006745c>] mlx5_pci_err_detected+0x94/0x140 [mlx5_core]
[<0000000652556c5a>] zpci_event_attempt_error_recovery+0xf2/0x398
[<0000000651b9184a>] __zpci_event_error+0x23a/0x2c0
INFO: task kworker/u1664:6:1514 blocked for more than 122 seconds.
Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/u1664:6 state:D stack:0 pid:1514 tgid:1514 ppid:2 flags:0x00000000
Workqueue: mlx5_health0000:00:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
Call Trace:
[<000000065256f030>] __schedule+0x2a0/0x590
[<000000065256f356>] schedule+0x36/0xe0
[<0000000652172e28>] pci_wait_cfg+0x80/0xe8
[<0000000652172f94>] pci_cfg_access_lock+0x74/0x88
[<000003ff800916b6>] mlx5_vsc_gw_lock+0x36/0x178 [mlx5_core]
[<000003ff80098824>] mlx5_crdump_collect+0x34/0x1c8 [mlx5_core]
[<000003ff80074b62>] mlx5_fw_fatal_reporter_dump+0x6a/0xe8 [mlx5_core]
[<0000000652512242>] devlink_health_do_dump.part.0+0x82/0x168
[<0000000652513212>] devlink_health_report+0x19a/0x230
[<000003ff80075a12>] mlx5_fw_fatal_reporter_err_work+0xba/0x1b0 [mlx5_core]
No kernel log of the exact same error with an upstream kernel is
available - but the very same deadlock situation can be constructed there,
too:
- task: kmcheck
mlx5_unload_one() tries to acquire devlink lock while the PCI error
recovery code has set pdev->block_cfg_access by way of
pci_cfg_access_lock()
- task: kworker
mlx5_crdump_collect() tries to set block_cfg_access through
pci_cfg_access_lock() while devlink_health_report() had acquired
the devlink lock.
A similar deadlock situation can be reproduced by requesting a
crdump with
> devlink health dump show pci/<BDF> reporter fw_fatal
while PCI error recovery is executed on the same <BDF> physical function
by mlx5_core's pci_error_handlers. On s390 this can be injected with
> zpcictl --reset-fw <BDF>
Tests with this patch failed to reproduce that second deadlock situation,
the devlink command is rejected with "kernel answers: Permission denied" -
and we get a kernel log message of:
mlx5_core 1ed0:00:00.1: mlx5_crdump_collect:50:(pid 254382): crdump: failed to lock vsc gw err -5
because the config read of VSC_SEMAPHORE is rejected by the underlying
hardware.
Two prior attempts to address this issue have been discussed and
ultimately rejected [see link], with the primary argument that s390's
implementation of PCI error recovery is imposing restrictions that
neither powerpc's EEH nor PCI AER handling need. Tests show that PCI
error recovery on s390 is running to completion even without blocking
access to PCI config space.
Link: https://lore.kernel.org/all/20251007144826.2825134-1-gbayer@linux.ibm.com/
Cc: stable(a)vger.kernel.org
Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery")
Reviewed-by: Niklas Schnelle <schnelle(a)linux.ibm.com>
Signed-off-by: Gerd Bayer <gbayer(a)linux.ibm.com>
---
Hi Niklas, Shay, Jason,
by now I believe fixing this in s390/pci is the right way to go, since
the other PCI error recovery implementations apparently don't require
this strict blocking of accesses to the PCI config space.
Hi Alexander, Vasily, Heiko,
while I sent this to netdev since prior versions were discussed there,
I assume this patch will go through the s390 tree, right?
Thanks,
Gerd
---
Changes in v3:
- Incorporate changes to commit message as suggested by Niklas.
- Link to v2: https://lore.kernel.org/r/20251015-fix_pcirecov_master-v2-1-e07962fe9558@li…
Changes in v2:
- Rebase to upstream master
---
arch/s390/pci/pci_event.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b95376041501f479eee20705d45fb8c68553da71..27db1e72c623f8a289cae457e87f0a9896ed241d 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -188,7 +188,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
* is unbound or probed and that userspace can't access its
* configuration space while we perform recovery.
*/
- pci_dev_lock(pdev);
+ device_lock(&pdev->dev);
if (pdev->error_state == pci_channel_io_perm_failure) {
ers_res = PCI_ERS_RESULT_DISCONNECT;
goto out_unlock;
@@ -257,7 +257,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
driver->err_handler->resume(pdev);
pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
out_unlock:
- pci_dev_unlock(pdev);
+ device_unlock(&pdev->dev);
zpci_report_status(zdev, "recovery", status_str);
return ers_res;
---
base-commit: 9b332cece987ee1790b2ed4c989e28162fa47860
change-id: 20251015-fix_pcirecov_master-55fe3705c6c6
Best regards,
--
Gerd Bayer <gbayer(a)linux.ibm.com>
A few fixes for drm panic, that I found when writing unit tests with kunit.
Jocelyn Falempe (6):
drm/panic: Fix drawing the logo on a small narrow screen
drm/panic: Fix overlap between qr code and logo
drm/panic: Fix qr_code, ensure vmargin is positive
drm/panic: Fix kmsg text drawing rectangle
drm/panic: Fix divide by 0 if the screen width < font width
drm/panic: Fix 24bit pixel crossing page boundaries
drivers/gpu/drm/drm_panic.c | 60 +++++++++++++++++++++++++++++++++----
1 file changed, 54 insertions(+), 6 deletions(-)
base-commit: e4bea919584ff292c9156cf7d641a2ab3cbe27b0
--
2.51.0
In statmount_string(), most flags assign an output offset pointer (offp)
which is later updated with the string offset. However, the
STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP cases directly set the
struct fields instead of using offp. This leaves offp uninitialized,
leading to a possible uninitialized dereference when *offp is updated.
Fix it by assigning offp for UIDMAP and GIDMAP as well, keeping the code
path consistent.
Fixes: 37c4a9590e1e ("statmount: allow to retrieve idmappings")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zhen Ni <zhen.ni(a)easystack.cn>
---
fs/namespace.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..5b5ab2ae238b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5454,11 +5454,11 @@ static int statmount_string(struct kstatmount *s, u64 flag)
ret = statmount_sb_source(s, seq);
break;
case STATMOUNT_MNT_UIDMAP:
- sm->mnt_uidmap = start;
+ offp = &sm->mnt_uidmap;
ret = statmount_mnt_uidmap(s, seq);
break;
case STATMOUNT_MNT_GIDMAP:
- sm->mnt_gidmap = start;
+ offp = &sm->mnt_gidmap;
ret = statmount_mnt_gidmap(s, seq);
break;
default:
--
2.20.1
From: Stefano Garzarella <sgarzare(a)redhat.com>
Syzbot reported a potential lock inversion deadlock between
vsock_register_mutex and sk_lock-AF_VSOCK when vsock_linger() is called.
The issue was introduced by commit 687aa0c5581b ("vsock: Fix
transport_* TOCTOU") which added vsock_register_mutex locking in
vsock_assign_transport() around the transport->release() call, that can
call vsock_linger(). vsock_assign_transport() can be called with sk_lock
held. vsock_linger() calls sk_wait_event() that temporarily releases and
re-acquires sk_lock. During this window, if another thread hold
vsock_register_mutex while trying to acquire sk_lock, a circular
dependency is created.
Fix this by releasing vsock_register_mutex before calling
transport->release() and vsock_deassign_transport(). This is safe
because we don't need to hold vsock_register_mutex while releasing the
old transport, and we ensure the new transport won't disappear by
obtaining a module reference first via try_module_get().
Reported-by: syzbot+10e35716f8e4929681fa(a)syzkaller.appspotmail.com
Tested-by: syzbot+10e35716f8e4929681fa(a)syzkaller.appspotmail.com
Fixes: 687aa0c5581b ("vsock: Fix transport_* TOCTOU")
Cc: mhal(a)rbox.co
Cc: stable(a)vger.kernel.org
Signed-off-by: Stefano Garzarella <sgarzare(a)redhat.com>
---
net/vmw_vsock/af_vsock.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 4c2db6cca557..76763247a377 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -487,12 +487,26 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
goto err;
}
- if (vsk->transport) {
- if (vsk->transport == new_transport) {
- ret = 0;
- goto err;
- }
+ if (vsk->transport && vsk->transport == new_transport) {
+ ret = 0;
+ goto err;
+ }
+ /* We increase the module refcnt to prevent the transport unloading
+ * while there are open sockets assigned to it.
+ */
+ if (!new_transport || !try_module_get(new_transport->module)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ /* It's safe to release the mutex after a successful try_module_get().
+ * Whichever transport `new_transport` points at, it won't go away until
+ * the last module_put() below or in vsock_deassign_transport().
+ */
+ mutex_unlock(&vsock_register_mutex);
+
+ if (vsk->transport) {
/* transport->release() must be called with sock lock acquired.
* This path can only be taken during vsock_connect(), where we
* have already held the sock lock. In the other cases, this
@@ -512,20 +526,6 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->peer_shutdown = 0;
}
- /* We increase the module refcnt to prevent the transport unloading
- * while there are open sockets assigned to it.
- */
- if (!new_transport || !try_module_get(new_transport->module)) {
- ret = -ENODEV;
- goto err;
- }
-
- /* It's safe to release the mutex after a successful try_module_get().
- * Whichever transport `new_transport` points at, it won't go away until
- * the last module_put() below or in vsock_deassign_transport().
- */
- mutex_unlock(&vsock_register_mutex);
-
if (sk->sk_type == SOCK_SEQPACKET) {
if (!new_transport->seqpacket_allow ||
!new_transport->seqpacket_allow(remote_cid)) {
--
2.51.0
[ Upstream commit f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 ]
syzkaller discovered the following crash: (kernel BUG)
[ 44.607039] ------------[ cut here ]------------
[ 44.607422] kernel BUG at mm/userfaultfd.c:2067!
[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none)
[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460
<snip other registers, drop unreliable trace>
[ 44.617726] Call Trace:
[ 44.617926] <TASK>
[ 44.619284] userfaultfd_release+0xef/0x1b0
[ 44.620976] __fput+0x3f9/0xb60
[ 44.621240] fput_close_sync+0x110/0x210
[ 44.622222] __x64_sys_close+0x8f/0x120
[ 44.622530] do_syscall_64+0x5b/0x2f0
[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 44.623244] RIP: 0033:0x7f365bb3f227
Kernel panics because it detects UFFD inconsistency during
userfaultfd_release_all(). Specifically, a VMA which has a valid pointer
to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags.
The inconsistency is caused in ksm_madvise(): when user calls madvise()
with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode,
it accidentally clears all flags stored in the upper 32 bits of
vma->vm_flags.
Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and
int are 32-bit wide. This setup causes the following mishap during the &=
~VM_MERGEABLE assignment.
VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000.
After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then
promoted to unsigned long before the & operation. This promotion fills
upper 32 bits with leading 0s, as we're doing unsigned conversion (and
even for a signed conversion, this wouldn't help as the leading bit is 0).
& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff
instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears
the upper 32-bits of its value.
Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the
BIT() macro.
Note: other VM_* flags are not affected: This only happens to the
VM_MERGEABLE flag, as the other VM_* flags are all constants of type int
and after ~ operation, they end up with leading 1 and are thus converted
to unsigned long with leading 1s.
Note 2:
After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is
no longer a kernel BUG, but a WARNING at the same place:
[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067
but the root-cause (flag-drop) remains the same.
[akpm(a)linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel]
Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/
Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de
Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode")
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis(a)gmail.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: SeongJae Park <sj(a)kernel.org>
Tested-by: Alice Ryhl <aliceryhl(a)google.com>
Tested-by: Miguel Ojeda <miguel.ojeda.sandonis(a)gmail.com>
Cc: Xu Xin <xu.xin16(a)zte.com.cn>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
[acsjakub(a)amazon.de: adjust context in bindgings_helper.h]
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
---
Tested that CONFIG_RUST=y builds with LLVM=1 with toolchain from
https://mirrors.edge.kernel.org/pub/tools/llvm/rust/.
include/linux/mm.h | 2 +-
rust/bindings/bindings_helper.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0fa8404957d..13b4bd7355c1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -320,7 +320,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
+#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */
#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index a80783fcbe04..8b97919a86e2 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -33,3 +33,4 @@ const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO;
const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM;
const gfp_t RUST_CONST_HELPER___GFP_NOWARN = ___GFP_NOWARN;
const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL;
+const vm_flags_t RUST_CONST_HELPER_VM_MERGEABLE = VM_MERGEABLE;
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
[ Upstream commit f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 ]
syzkaller discovered the following crash: (kernel BUG)
[ 44.607039] ------------[ cut here ]------------
[ 44.607422] kernel BUG at mm/userfaultfd.c:2067!
[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none)
[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460
<snip other registers, drop unreliable trace>
[ 44.617726] Call Trace:
[ 44.617926] <TASK>
[ 44.619284] userfaultfd_release+0xef/0x1b0
[ 44.620976] __fput+0x3f9/0xb60
[ 44.621240] fput_close_sync+0x110/0x210
[ 44.622222] __x64_sys_close+0x8f/0x120
[ 44.622530] do_syscall_64+0x5b/0x2f0
[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 44.623244] RIP: 0033:0x7f365bb3f227
Kernel panics because it detects UFFD inconsistency during
userfaultfd_release_all(). Specifically, a VMA which has a valid pointer
to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags.
The inconsistency is caused in ksm_madvise(): when user calls madvise()
with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode,
it accidentally clears all flags stored in the upper 32 bits of
vma->vm_flags.
Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and
int are 32-bit wide. This setup causes the following mishap during the &=
~VM_MERGEABLE assignment.
VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000.
After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then
promoted to unsigned long before the & operation. This promotion fills
upper 32 bits with leading 0s, as we're doing unsigned conversion (and
even for a signed conversion, this wouldn't help as the leading bit is 0).
& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff
instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears
the upper 32-bits of its value.
Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the
BIT() macro.
Note: other VM_* flags are not affected: This only happens to the
VM_MERGEABLE flag, as the other VM_* flags are all constants of type int
and after ~ operation, they end up with leading 1 and are thus converted
to unsigned long with leading 1s.
Note 2:
After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is
no longer a kernel BUG, but a WARNING at the same place:
[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067
but the root-cause (flag-drop) remains the same.
[akpm(a)linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel]
Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/
Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de
Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode")
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis(a)gmail.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: SeongJae Park <sj(a)kernel.org>
Tested-by: Alice Ryhl <aliceryhl(a)google.com>
Tested-by: Miguel Ojeda <miguel.ojeda.sandonis(a)gmail.com>
Cc: Xu Xin <xu.xin16(a)zte.com.cn>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
[acsjakub(a)amazon.de: adapt rust bindgen const to older versions]
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
---
I inferred the rust adaptation from the neighboring definitions. Could
rust folks please also check that it makes sense?
Tested that CONFIG_RUST=y builds with LLVM=1 with toolchain from
https://mirrors.edge.kernel.org/pub/tools/llvm/rust/.
include/linux/mm.h | 2 +-
rust/bindings/bindings_helper.h | 2 ++
rust/bindings/lib.rs | 1 +
3 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ba77f08900ca..fa5b11452ae6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -315,7 +315,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
+#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */
#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index c91a3c24f607..5416f21918e0 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -12,8 +12,10 @@
#include <linux/refcount.h>
#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/mm.h>
/* `bindgen` gets confused at certain things. */
const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
+const vm_flags_t BINDINGS_VM_MERGEABLE = VM_MERGEABLE;
diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs
index 9bcbea04dac3..7d9078b94a8f 100644
--- a/rust/bindings/lib.rs
+++ b/rust/bindings/lib.rs
@@ -51,3 +51,4 @@ mod bindings_helper {
pub const GFP_KERNEL: gfp_t = BINDINGS_GFP_KERNEL;
pub const __GFP_ZERO: gfp_t = BINDINGS___GFP_ZERO;
+pub const VM_MERGEABLE: vm_flags_t = BINDINGS_VM_MERGEABLE;
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597