All related to the pages code, and the latter are reproducible with a simple test.
Jason Gunthorpe (4): iommufd: Check for uptr overflow iommufd: Fix unpinning of pages when an access is present iommufd: Do not corrupt the pfn list when doing batch carry iommufd/selftest: Cover domain unmap with huge pages and access
drivers/iommu/iommufd/pages.c | 16 ++++++++++-- tools/testing/selftests/iommu/iommufd.c | 34 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-)
base-commit: 9c7d518b9b71f4d5ca3d12952cda3417ac6126c4
syzkaller found that setting up a map with a user VA that wraps past zero can trigger WARN_ONs, particularly from pin_user_pages weirdly returning 0 due to invalid arguments.
Prevent creating a pages with a uptr and size that would math overflow.
WARNING: CPU: 0 PID: 518 at drivers/iommu/iommufd/pages.c:793 pfn_reader_user_pin+0x2e6/0x390 Modules linked in: CPU: 0 PID: 518 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:pfn_reader_user_pin+0x2e6/0x390 Code: b1 11 e9 25 fe ff ff e8 28 e4 0f ff 31 ff 48 89 de e8 2e e6 0f ff 48 85 db 74 0a e8 14 e4 0f ff e9 4d ff ff ff e8 0a e4 0f ff <0f> 0b bb f2 ff ff ff e9 3c ff ff ff e8 f9 e3 0f ff ba 01 00 00 00 RSP: 0018:ffffc90000f9fa30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff821e2b72 RDX: 0000000000000000 RSI: ffff888014184680 RDI: 0000000000000002 RBP: ffffc90000f9fa78 R08: 00000000000000ff R09: 0000000079de6f4e R10: ffffc90000f9f790 R11: ffff888014185418 R12: ffffc90000f9fc60 R13: 0000000000000002 R14: ffff888007879800 R15: 0000000000000000 FS: 00007f4227555740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000043 CR3: 000000000e748005 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: <TASK> pfn_reader_next+0x14a/0x7b0 ? interval_tree_double_span_iter_update+0x11a/0x140 pfn_reader_first+0x140/0x1b0 iopt_pages_rw_slow+0x71/0x280 ? __this_cpu_preempt_check+0x20/0x30 iopt_pages_rw_access+0x2b2/0x5b0 iommufd_access_rw+0x19f/0x2f0 iommufd_test+0xd11/0x16f0 ? write_comp_data+0x2f/0x90 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 ? __pfx_iommufd_fops_ioctl+0x10/0x10 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Cc: stable@vger.kernel.org Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZA/O6vGUfni158oK@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com --- drivers/iommu/iommufd/pages.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index f8d92c9bb65b60..400ec7c91ed7e7 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1142,6 +1142,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, bool writable) { struct iopt_pages *pages; + unsigned long end;
/* * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP @@ -1150,6 +1151,9 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, if (length > SIZE_MAX - PAGE_SIZE || length == 0) return ERR_PTR(-EINVAL);
+ if (check_add_overflow((unsigned long)uptr, length, &end)) + return ERR_PTR(-EOVERFLOW); + pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); if (!pages) return ERR_PTR(-ENOMEM);
From: Jason Gunthorpe jgg@nvidia.com Sent: Friday, March 31, 2023 11:32 PM
syzkaller found that setting up a map with a user VA that wraps past zero can trigger WARN_ONs, particularly from pin_user_pages weirdly returning 0 due to invalid arguments.
Prevent creating a pages with a uptr and size that would math overflow.
WARNING: CPU: 0 PID: 518 at drivers/iommu/iommufd/pages.c:793 pfn_reader_user_pin+0x2e6/0x390 Modules linked in: CPU: 0 PID: 518 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0- gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:pfn_reader_user_pin+0x2e6/0x390 Code: b1 11 e9 25 fe ff ff e8 28 e4 0f ff 31 ff 48 89 de e8 2e e6 0f ff 48 85 db 74 0a e8 14 e4 0f ff e9 4d ff ff ff e8 0a e4 0f ff <0f> 0b bb f2 ff ff ff e9 3c ff ff ff e8 f9 e3 0f ff ba 01 00 00 00 RSP: 0018:ffffc90000f9fa30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff821e2b72 RDX: 0000000000000000 RSI: ffff888014184680 RDI: 0000000000000002 RBP: ffffc90000f9fa78 R08: 00000000000000ff R09: 0000000079de6f4e R10: ffffc90000f9f790 R11: ffff888014185418 R12: ffffc90000f9fc60 R13: 0000000000000002 R14: ffff888007879800 R15: 0000000000000000 FS: 00007f4227555740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000043 CR3: 000000000e748005 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace:
<TASK> pfn_reader_next+0x14a/0x7b0 ? interval_tree_double_span_iter_update+0x11a/0x140 pfn_reader_first+0x140/0x1b0 iopt_pages_rw_slow+0x71/0x280 ? __this_cpu_preempt_check+0x20/0x30 iopt_pages_rw_access+0x2b2/0x5b0 iommufd_access_rw+0x19f/0x2f0 iommufd_test+0xd11/0x16f0 ? write_comp_data+0x2f/0x90 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 ? __pfx_iommufd_fops_ioctl+0x10/0x10 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Cc: stable@vger.kernel.org Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZA/O6vGUfni158oK@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
Reviewed-by: Kevin Tian kevin.tian@intel.com
syzkaller found that the calculation of batch_last_index should use 'start_index' since at input to this function the batch is either empty or it has already been adjusted to cross any accesses so it will start at the point we are unmapping from.
Getting this wrong causes the unmap to run over the end of the pages which corrupts pages that were never mapped. In most cases this triggers the num pinned debugging:
WARNING: CPU: 0 PID: 557 at drivers/iommu/iommufd/pages.c:294 __iopt_area_unfill_domain+0x152/0x560 Modules linked in: CPU: 0 PID: 557 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__iopt_area_unfill_domain+0x152/0x560 Code: d2 0f ff 44 8b 64 24 54 48 8b 44 24 48 31 ff 44 89 e6 48 89 44 24 38 e8 fc d3 0f ff 45 85 e4 0f 85 eb 01 00 00 e8 0e d2 0f ff <0f> 0b e8 07 d2 0f ff 48 8b 44 24 38 89 5c 24 58 89 18 8b 44 24 54 RSP: 0018:ffffc9000108baf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff821e3f85 RDX: 0000000000000000 RSI: ffff88800faf0000 RDI: 0000000000000002 RBP: ffffc9000108bd18 R08: 000000000003ca25 R09: 0000000000000014 R10: 000000000003ca00 R11: 0000000000000024 R12: 0000000000000004 R13: 0000000000000801 R14: 00000000000007ff R15: 0000000000000800 FS: 00007f3499ce1740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000243 CR3: 00000000179c2001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: <TASK> iopt_area_unfill_domain+0x32/0x40 iopt_table_remove_domain+0x23f/0x4c0 iommufd_device_selftest_detach+0x3a/0x90 iommufd_selftest_destroy+0x55/0x70 iommufd_object_destroy_user+0xce/0x130 iommufd_destroy+0xa2/0xc0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Also add some useful WARN_ON sanity checks.
Cc: stable@vger.kernel.org Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBE1k040xAhIuTmq@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com --- drivers/iommu/iommufd/pages.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 400ec7c91ed7e7..b11aace836542d 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1207,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, unsigned long start = max(start_index, *unmapped_end_index);
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + batch->total_pfns) + WARN_ON(*unmapped_end_index - + batch->total_pfns != + start_index); batch_from_domain(batch, domain, area, start, last_index); - batch_last_index = start + batch->total_pfns - 1; + batch_last_index = start_index + batch->total_pfns - 1; } else { batch_last_index = last_index; }
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(batch_last_index > real_last_index); + /* * unmaps must always 'cut' at a place where the pfns are not * contiguous to pair with the maps that always install
Hi Jason,
On 2023-03-31 at 12:32:25 -0300, Jason Gunthorpe wrote:
syzkaller found that the calculation of batch_last_index should use 'start_index' since at input to this function the batch is either empty or it has already been adjusted to cross any accesses so it will start at the point we are unmapping from.
Getting this wrong causes the unmap to run over the end of the pages which corrupts pages that were never mapped. In most cases this triggers the num pinned debugging:
WARNING: CPU: 0 PID: 557 at drivers/iommu/iommufd/pages.c:294 __iopt_area_unfill_domain+0x152/0x560 Modules linked in: CPU: 0 PID: 557 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__iopt_area_unfill_domain+0x152/0x560 Code: d2 0f ff 44 8b 64 24 54 48 8b 44 24 48 31 ff 44 89 e6 48 89 44 24 38 e8 fc d3 0f ff 45 85 e4 0f 85 eb 01 00 00 e8 0e d2 0f ff <0f> 0b e8 07 d2 0f ff 48 8b 44 24 38 89 5c 24 58 89 18 8b 44 24 54 RSP: 0018:ffffc9000108baf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff821e3f85 RDX: 0000000000000000 RSI: ffff88800faf0000 RDI: 0000000000000002 RBP: ffffc9000108bd18 R08: 000000000003ca25 R09: 0000000000000014 R10: 000000000003ca00 R11: 0000000000000024 R12: 0000000000000004 R13: 0000000000000801 R14: 00000000000007ff R15: 0000000000000800 FS: 00007f3499ce1740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000243 CR3: 00000000179c2001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace:
<TASK> iopt_area_unfill_domain+0x32/0x40 iopt_table_remove_domain+0x23f/0x4c0 iommufd_device_selftest_detach+0x3a/0x90 iommufd_selftest_destroy+0x55/0x70 iommufd_object_destroy_user+0xce/0x130 iommufd_destroy+0xa2/0xc0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Also add some useful WARN_ON sanity checks.
Cc: stable@vger.kernel.org Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBE1k040xAhIuTmq@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
drivers/iommu/iommufd/pages.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 400ec7c91ed7e7..b11aace836542d 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1207,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, unsigned long start = max(start_index, *unmapped_end_index);
if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
batch->total_pfns)
WARN_ON(*unmapped_end_index -
batch->total_pfns !=
start_index); batch_from_domain(batch, domain, area, start, last_index);
batch_last_index = start + batch->total_pfns - 1;
} else { batch_last_index = last_index; }batch_last_index = start_index + batch->total_pfns - 1;
if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
WARN_ON(batch_last_index > real_last_index);
- /*
- unmaps must always 'cut' at a place where the pfns are not
- contiguous to pair with the maps that always install
--
I tested the reproduced code in the kernel with all 3 fixed patches. Syzkaller reproduced code: https://github.com/xupengfe/syzkaller_logs/raw/main/230314_094459___iopt_are... This issue was gone and the issue was fixed.
Thanks! BR.
2.40.0
From: Jason Gunthorpe jgg@nvidia.com Sent: Friday, March 31, 2023 11:32 PM
syzkaller found that the calculation of batch_last_index should use 'start_index' since at input to this function the batch is either empty or it has already been adjusted to cross any accesses so it will start at the point we are unmapping from.
Getting this wrong causes the unmap to run over the end of the pages which corrupts pages that were never mapped. In most cases this triggers the num pinned debugging:
WARNING: CPU: 0 PID: 557 at drivers/iommu/iommufd/pages.c:294 __iopt_area_unfill_domain+0x152/0x560 Modules linked in: CPU: 0 PID: 557 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0- gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__iopt_area_unfill_domain+0x152/0x560 Code: d2 0f ff 44 8b 64 24 54 48 8b 44 24 48 31 ff 44 89 e6 48 89 44 24 38 e8 fc d3 0f ff 45 85 e4 0f 85 eb 01 00 00 e8 0e d2 0f ff <0f> 0b e8 07 d2 0f ff 48 8b 44 24 38 89 5c 24 58 89 18 8b 44 24 54 RSP: 0018:ffffc9000108baf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff821e3f85 RDX: 0000000000000000 RSI: ffff88800faf0000 RDI: 0000000000000002 RBP: ffffc9000108bd18 R08: 000000000003ca25 R09: 0000000000000014 R10: 000000000003ca00 R11: 0000000000000024 R12: 0000000000000004 R13: 0000000000000801 R14: 00000000000007ff R15: 0000000000000800 FS: 00007f3499ce1740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000243 CR3: 00000000179c2001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace:
<TASK> iopt_area_unfill_domain+0x32/0x40 iopt_table_remove_domain+0x23f/0x4c0 iommufd_device_selftest_detach+0x3a/0x90 iommufd_selftest_destroy+0x55/0x70 iommufd_object_destroy_user+0xce/0x130 iommufd_destroy+0xa2/0xc0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Also add some useful WARN_ON sanity checks.
Cc: stable@vger.kernel.org Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBE1k040xAhIuTmq@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
Reviewed-by: Kevin Tian kevin.tian@intel.com
If batch->end is 0 then setting npfns[0] before computing the new value of pfns will fail to adjust the pfn and result in various page accounting corruptions. It should be ordered after.
This seems to result in various kinds of page meta-data corruption related failures:
WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740 Modules linked in: CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:try_grab_folio+0x503/0x740 Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89 RSP: 0018:ffffc90000f37908 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26 RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002 RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008 R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540 R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540 FS: 00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: <TASK> internal_get_user_pages_fast+0xd32/0x2200 pin_user_pages_fast+0x65/0x90 pfn_reader_user_pin+0x376/0x390 pfn_reader_next+0x14a/0x7b0 pfn_reader_first+0x140/0x1b0 iopt_area_fill_domain+0x74/0x210 iopt_table_add_domain+0x30e/0x6e0 iommufd_device_selftest_attach+0x7f/0x140 iommufd_test+0x10ff/0x16f0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Cc: stable@vger.kernel.org Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBExkEW/On0ue68q@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com --- drivers/iommu/iommufd/pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index b11aace836542d..3c47846cc5efe8 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) batch->npfns[batch->end - 1] < keep_pfns);
batch->total_pfns = keep_pfns; - batch->npfns[0] = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns); + batch->npfns[0] = keep_pfns; batch->end = 0; }
Hi Jason,
On 2023-03-31 at 12:32:26 -0300, Jason Gunthorpe wrote:
If batch->end is 0 then setting npfns[0] before computing the new value of pfns will fail to adjust the pfn and result in various page accounting corruptions. It should be ordered after.
This seems to result in various kinds of page meta-data corruption related failures:
WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740 Modules linked in: CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:try_grab_folio+0x503/0x740 Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89 RSP: 0018:ffffc90000f37908 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26 RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002 RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008 R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540 R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540 FS: 00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace:
<TASK> internal_get_user_pages_fast+0xd32/0x2200 pin_user_pages_fast+0x65/0x90 pfn_reader_user_pin+0x376/0x390 pfn_reader_next+0x14a/0x7b0 pfn_reader_first+0x140/0x1b0 iopt_area_fill_domain+0x74/0x210 iopt_table_add_domain+0x30e/0x6e0 iommufd_device_selftest_attach+0x7f/0x140 iommufd_test+0x10ff/0x16f0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Cc: stable@vger.kernel.org Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBExkEW/On0ue68q@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
drivers/iommu/iommufd/pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index b11aace836542d..3c47846cc5efe8 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) batch->npfns[batch->end - 1] < keep_pfns); batch->total_pfns = keep_pfns;
- batch->npfns[0] = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns);
- batch->npfns[0] = keep_pfns; batch->end = 0;
} --
I tested the reproduced code in the kernel with all 3 fixed patches. Syzkaller reproduced code: https://github.com/xupengfe/syzkaller_logs/blob/main/230313_234302_try_grab_... This issue was gone and the issue was fixed.
Thanks! BR.
2.40.0
Hi Jason,
Could you add "Tested-by" tag from me?
Thanks! BR.
On 2023-04-01 at 14:29:33 +0800, Pengfei Xu wrote:
Hi Jason,
On 2023-03-31 at 12:32:26 -0300, Jason Gunthorpe wrote:
If batch->end is 0 then setting npfns[0] before computing the new value of pfns will fail to adjust the pfn and result in various page accounting corruptions. It should be ordered after.
This seems to result in various kinds of page meta-data corruption related failures:
WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740 Modules linked in: CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:try_grab_folio+0x503/0x740 Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89 RSP: 0018:ffffc90000f37908 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26 RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002 RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008 R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540 R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540 FS: 00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace:
<TASK> internal_get_user_pages_fast+0xd32/0x2200 pin_user_pages_fast+0x65/0x90 pfn_reader_user_pin+0x376/0x390 pfn_reader_next+0x14a/0x7b0 pfn_reader_first+0x140/0x1b0 iopt_area_fill_domain+0x74/0x210 iopt_table_add_domain+0x30e/0x6e0 iommufd_device_selftest_attach+0x7f/0x140 iommufd_test+0x10ff/0x16f0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Cc: stable@vger.kernel.org Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBExkEW/On0ue68q@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
drivers/iommu/iommufd/pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index b11aace836542d..3c47846cc5efe8 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) batch->npfns[batch->end - 1] < keep_pfns); batch->total_pfns = keep_pfns;
- batch->npfns[0] = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns);
- batch->npfns[0] = keep_pfns; batch->end = 0;
} --
I tested the reproduced code in the kernel with all 3 fixed patches. Syzkaller reproduced code: https://github.com/xupengfe/syzkaller_logs/blob/main/230313_234302_try_grab_... This issue was gone and the issue was fixed.
Thanks! BR.
2.40.0
On Mon, Apr 03, 2023 at 03:02:56PM +0800, Pengfei Xu wrote:
Hi Jason,
Could you add "Tested-by" tag from me?
Yes, I did, in future you can respond to the cover letter with that tag and the tools will pick it up
Jason
Hi Jason,
On 2023-04-04 at 10:01:55 -0300, Jason Gunthorpe wrote:
On Mon, Apr 03, 2023 at 03:02:56PM +0800, Pengfei Xu wrote:
Hi Jason,
Could you add "Tested-by" tag from me?
Yes, I did, in future you can respond to the cover letter with that tag and the tools will pick it up
Ah, I see, I will do that next time, thanks for suggestion!
BR. Thanks!
Jason
From: Jason Gunthorpe jgg@nvidia.com Sent: Friday, March 31, 2023 11:32 PM
If batch->end is 0 then setting npfns[0] before computing the new value of pfns will fail to adjust the pfn and result in various page accounting corruptions. It should be ordered after.
This seems to result in various kinds of page meta-data corruption related failures:
WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740 Modules linked in: CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0- gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:try_grab_folio+0x503/0x740 Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89 RSP: 0018:ffffc90000f37908 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26 RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002 RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008 R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540 R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540 FS: 00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace:
<TASK> internal_get_user_pages_fast+0xd32/0x2200 pin_user_pages_fast+0x65/0x90 pfn_reader_user_pin+0x376/0x390 pfn_reader_next+0x14a/0x7b0 pfn_reader_first+0x140/0x1b0 iopt_area_fill_domain+0x74/0x210 iopt_table_add_domain+0x30e/0x6e0 iommufd_device_selftest_attach+0x7f/0x140 iommufd_test+0x10ff/0x16f0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc
Cc: stable@vger.kernel.org Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Reported-by: Pengfei Xu pengfei.xu@intel.com Link: https://lore.kernel.org/r/ZBExkEW/On0ue68q@xpf.sh.intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
Reviewed-by: Kevin Tian kevin.tian@intel.com
Inspired by the syzkaller reproducer check the batch carry path with a simple test.
Signed-off-by: Jason Gunthorpe jgg@nvidia.com --- tools/testing/selftests/iommu/iommufd.c | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+)
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index c07252dbf62d72..a2ce8f3c5040f9 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -594,6 +594,40 @@ TEST_F(iommufd_ioas, iova_ranges) EXPECT_EQ(0, ranges[1].last); }
+TEST_F(iommufd_ioas, access_domain_destory) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = self->base_iova + PAGE_SIZE, + .length = PAGE_SIZE}, + }; + size_t buf_size = 2 * HUGEPAGE_SIZE; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + test_ioctl_ioas_map_fixed(buf, buf_size, self->base_iova); + + test_cmd_create_access(self->ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + access_cmd.access_pages.uptr = (uintptr_t)buf + PAGE_SIZE; + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + + /* Causes a complicated unpin across a huge page boundary */ + if (self->stdev_id) + test_ioctl_destroy(self->stdev_id); + + test_cmd_destroy_access_pages( + access_cmd.id, access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access(access_cmd.id); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + TEST_F(iommufd_ioas, access_pin) { struct iommu_test_cmd access_cmd = {
From: Jason Gunthorpe jgg@nvidia.com Sent: Friday, March 31, 2023 11:32 PM
Inspired by the syzkaller reproducer check the batch carry path with a simple test.
Signed-off-by: Jason Gunthorpe jgg@nvidia.com
Reviewed-by: Kevin Tian kevin.tian@intel.com
On Fri, Mar 31, 2023 at 12:32:23PM -0300, Jason Gunthorpe wrote:
All related to the pages code, and the latter are reproducible with a simple test.
Jason Gunthorpe (4): iommufd: Check for uptr overflow iommufd: Fix unpinning of pages when an access is present iommufd: Do not corrupt the pfn list when doing batch carry iommufd/selftest: Cover domain unmap with huge pages and access
drivers/iommu/iommufd/pages.c | 16 ++++++++++-- tools/testing/selftests/iommu/iommufd.c | 34 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-)
I picked these up to go to -rc
Jason
linux-kselftest-mirror@lists.linaro.org