- Linux-stable-mirror - lists.linaro.org

[PATCH] media: uvcvideo: fix build error in uvc_ctrl_cleanup_fh

by Desnes Nunes

This fixes the following compilation failure: "error: ‘for’ loop initial declarations are only allowed in C99 or C11 mode" Cc: stable(a)vger.kernel.org Fixes: 221cd51efe45 ("media: uvcvideo: Remove dangling pointers") Signed-off-by: Desnes Nunes <desnesn(a)redhat.com> --- drivers/media/usb/uvc/uvc_ctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 44b6513c5264..532615d8484b 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -3260,7 +3260,7 @@ int uvc_ctrl_init_device(struct uvc_device *dev) void uvc_ctrl_cleanup_fh(struct uvc_fh *handle) { struct uvc_entity *entity; - int i; + unsigned int i; guard(mutex)(&handle->chain->ctrl_mutex); @@ -3268,7 +3268,7 @@ void uvc_ctrl_cleanup_fh(struct uvc_fh *handle) return; list_for_each_entry(entity, &handle->chain->dev->entities, list) { - for (unsigned int i = 0; i < entity->ncontrols; ++i) { + for (i = 0; i < entity->ncontrols; ++i) { if (entity->controls[i].handle != handle) continue; uvc_ctrl_set_handle(handle, &entity->controls[i], NULL); -- 2.49.0

22 hours, 15 minutes

4
6
0 0

FAILED: patch "[PATCH] mm/hugetlb: unshare page tables during VMA split, not before" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062041-uplifted-cahoots-6c42@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh(a)google.com> Date: Tue, 27 May 2025 23:23:53 +0200 Subject: [PATCH] mm/hugetlb: unshare page tables during VMA split, not before Currently, __split_vma() triggers hugetlb page table unsharing through vm_ops->may_split(). This happens before the VMA lock and rmap locks are taken - which is too early, it allows racing VMA-locked page faults in our process and racing rmap walks from other processes to cause page tables to be shared again before we actually perform the split. Fix it by explicitly calling into the hugetlb unshare logic from __split_vma() in the same place where THP splitting also happens. At that point, both the VMA and the rmap(s) are write-locked. An annoying detail is that we can now call into the helper hugetlb_unshare_pmds() from two different locking contexts: 1. from hugetlb_split(), holding: - mmap lock (exclusively) - VMA lock - file rmap lock (exclusively) 2. hugetlb_unshare_all_pmds(), which I think is designed to be able to call us with only the mmap lock held (in shared mode), but currently only runs while holding mmap lock (exclusively) and VMA lock Backporting note: This commit fixes a racy protection that was introduced in commit b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that commit claimed to fix an issue introduced in 5.13, but it should actually also go all the way back. [jannh(a)google.com: v2] Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1… Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1… Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58… Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") Signed-off-by: Jann Horn <jannh(a)google.com> Cc: Liam Howlett <liam.howlett(a)oracle.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <stable(a)vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs] Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0598f36931de..42f374e828a2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) { } +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f0b1d53079f9..7ba020d489d4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool take_locks); static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static void hugetlb_free_folio(struct folio *folio) @@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + return 0; +} +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) +{ /* * PMD sharing is only possible for PUD_SIZE-aligned address ranges * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + * This function is called in the middle of a VMA split operation, with + * MM, VMA and rmap all write-locked to prevent concurrent page table + * walks (except hardware and gup_fast()). */ + vma_assert_write_locked(vma); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + if (addr & ~PUD_MASK) { - /* - * hugetlb_vm_op_split is called right before we attempt to - * split the VMA. We will need to unshare PMDs in the old and - * new VMAs, so let's unshare before we split. - */ unsigned long floor = addr & PUD_MASK; unsigned long ceil = floor + PUD_SIZE; - if (floor >= vma->vm_start && ceil <= vma->vm_end) - hugetlb_unshare_pmds(vma, floor, ceil); + if (floor >= vma->vm_start && ceil <= vma->vm_end) { + /* + * Locking: + * Use take_locks=false here. + * The file rmap lock is already held. + * The hugetlb VMA lock can't be taken when we already + * hold the file rmap lock, and we don't need it because + * its purpose is to synchronize against concurrent page + * table walks, which are not possible thanks to the + * locks held by our caller. + */ + hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); + } } - - return 0; } static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) @@ -7885,9 +7899,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re spin_unlock_irq(&hugetlb_lock); } +/* + * If @take_locks is false, the caller must ensure that no concurrent page table + * access can happen (except for gup_fast() and hardware page walks). + * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like + * concurrent page fault handling) and the file rmap lock. + */ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, - unsigned long end) + unsigned long end, + bool take_locks) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); @@ -7911,8 +7932,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, start, end); mmu_notifier_invalidate_range_start(&range); - hugetlb_vma_lock_write(vma); - i_mmap_lock_write(vma->vm_file->f_mapping); + if (take_locks) { + hugetlb_vma_lock_write(vma); + i_mmap_lock_write(vma->vm_file->f_mapping); + } else { + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + } for (address = start; address < end; address += PUD_SIZE) { ptep = hugetlb_walk(vma, address, sz); if (!ptep) @@ -7922,8 +7947,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, spin_unlock(ptl); } flush_hugetlb_tlb_range(vma, start, end); - i_mmap_unlock_write(vma->vm_file->f_mapping); - hugetlb_vma_unlock_write(vma); + if (take_locks) { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } /* * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see * Documentation/mm/mmu_notifier.rst. @@ -7938,7 +7965,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), - ALIGN_DOWN(vma->vm_end, PUD_SIZE)); + ALIGN_DOWN(vma->vm_end, PUD_SIZE), + /* take_locks = */ true); } /* diff --git a/mm/vma.c b/mm/vma.c index 1c6595f282e5..7ebc9eb608f4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -539,7 +539,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp); + + /* + * Get rid of huge pages and shared page tables straddling the split + * boundary. + */ vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); + if (is_vm_hugetlb_page(vma)) + hugetlb_split(vma, addr); if (new_below) { vma->vm_start = addr; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 441feb21aa5a..4505b1c31be1 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, (void)next; } +static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} + static inline void vma_iter_free(struct vma_iterator *vmi) { mas_destroy(&vmi->mas);

22 hours, 36 minutes

3
6
0 0

[PATCH] i2c/designware: Fix an initialization issue

by Michael J. Ruhl

The i2c_dw_xfer_init() function requires msgs and msg_write_idx from the dev context to be initialized. amd_i2c_dw_xfer_quirk() inits msgs and msgs_num, but not msg_write_idx. This could allow an out of bounds access (of msgs). Initialize msg_write_idx before calling i2c_dw_xfer_init(). Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> Fixes: 17631e8ca2d3 ("i2c: designware: Add driver support for AMD NAVI GPU") Cc: <stable(a)vger.kernel.org> Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com> --- drivers/i2c/busses/i2c-designware-master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c5394229b77f..40aa5114bf8c 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs, dev->msgs = msgs; dev->msgs_num = num_msgs; + dev->msg_write_idx = 0; i2c_dw_xfer_init(dev); /* Initiate messages read/write transaction */ -- 2.49.0

23 hours, 50 minutes

2
1
0 0

[PATCH v3 0/3] platform/x86: think-lmi: Fix resource cleanup flaws

by Kurt Borja

Hi all, First patch is a prerequisite in order to avoid NULL pointer dereferences in error paths. Then two fixes follow. Signed-off-by: Kurt Borja <kuurtb(a)gmail.com> --- Changes in v3: - Add Cc stable tags to all patches - Rebase on top of the 'fixes' branch - Link to v2: https://lore.kernel.org/r/20250628-lmi-fix-v2-0-c530e1c959d7@gmail.com Changes in v2: [PATCH 02] - Remove kobject_del() and commit message remark. It turns out it's optional to call this (my bad) - Leave only one fixes tag. The other two are not necessary. - Link to v1: https://lore.kernel.org/r/20250628-lmi-fix-v1-0-c6eec9aa3ca7@gmail.com --- Kurt Borja (3): platform/x86: think-lmi: Create ksets consecutively platform/x86: think-lmi: Fix kobject cleanup platform/x86: think-lmi: Fix sysfs group cleanup drivers/platform/x86/think-lmi.c | 90 ++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 60 deletions(-) --- base-commit: 173bbec6693f3f3f00dac144f3aa0cd62fb60d33 change-id: 20250628-lmi-fix-98143b10d9fd -- ~ Kurt

1 day

1
3
0 0

[PATCH] crypto: s390/sha - Fix uninitialized variable in SHA-1 and SHA-2

by Eric Biggers

Commit 88c02b3f79a6 ("s390/sha3: Support sha3 performance enhancements") added the field s390_sha_ctx::first_message_part and made it be used by s390_sha_update_blocks(). At the time, s390_sha_update_blocks() was used by all the s390 SHA-1, SHA-2, and SHA-3 algorithms. However, only the initialization functions for SHA-3 were updated, leaving SHA-1 and SHA-2 using first_message_part uninitialized. This could cause e.g. CPACF_KIMD_SHA_512 | CPACF_KIMD_NIP to be used instead of just CPACF_KIMD_NIP. It's unclear why this didn't cause a problem earlier; this bug was found only when UBSAN detected the uninitialized boolean. Perhaps the CPU ignores CPACF_KIMD_NIP for SHA-1 and SHA-2. Regardless, let's fix this. For now just initialize to false, i.e. don't try to "optimize" the SHA state initialization. Note: in 6.16, we need to patch SHA-1, SHA-384, and SHA-512. In 6.15 and earlier, we'll also need to patch SHA-224 and SHA-256, as they hadn't yet been librarified (which incidentally fixed this bug). Fixes: 88c02b3f79a6 ("s390/sha3: Support sha3 performance enhancements") Cc: stable(a)vger.kernel.org Reported-by: Ingo Franzki <ifranzki(a)linux.ibm.com> Closes: https://lore.kernel.org/r/12740696-595c-4604-873e-aefe8b405fbf@linux.ibm.com Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> --- This is targeting 6.16. I'd prefer to take this through libcrypto-fixes, since the librarification work is also touching this area. But let me know if there's a preference for the crypto tree or the s390 tree instead. arch/s390/crypto/sha1_s390.c | 1 + arch/s390/crypto/sha512_s390.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba229..73672e76a88f9 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -36,10 +36,11 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[2] = SHA1_H2; sctx->state[3] = SHA1_H3; sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = false; return 0; } static int s390_sha1_export(struct shash_desc *desc, void *out) diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c3..e9e112025ff22 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -30,10 +30,11 @@ static int sha512_init(struct shash_desc *desc) ctx->sha512.state[6] = SHA512_H6; ctx->sha512.state[7] = SHA512_H7; ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = false; return 0; } static int sha512_export(struct shash_desc *desc, void *out) @@ -95,10 +96,11 @@ static int sha384_init(struct shash_desc *desc) ctx->sha512.state[6] = SHA384_H6; ctx->sha512.state[7] = SHA384_H7; ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = false; return 0; } static struct shash_alg sha384_alg = { base-commit: e540341508ce2f6e27810106253d5de194b66750 -- 2.50.0

1 day

3
6
0 0

[PATCH 5.10 1/2] string.h: add array-wrappers for (v)memdup_user()

by Fedor Pchelkin

From: Philipp Stanner <pstanner(a)redhat.com> commit 313ebe47d75558511aa1237b6e35c663b5c0ec6f upstream. Currently, user array duplications are sometimes done without an overflow check. Sometimes the checks are done manually; sometimes the array size is calculated with array_size() and sometimes by calculating n * size directly in code. Introduce wrappers for arrays for memdup_user() and vmemdup_user() to provide a standardized and safe way for duplicating user arrays. This is both for new code as well as replacing usage of (v)memdup_user() in existing code that uses, e.g., n * size to calculate array sizes. Suggested-by: David Airlie <airlied(a)redhat.com> Signed-off-by: Philipp Stanner <pstanner(a)redhat.com> Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com> Reviewed-by: Kees Cook <keescook(a)chromium.org> Reviewed-by: Zack Rusin <zackr(a)vmware.com> Signed-off-by: Dave Airlie <airlied(a)redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-3-pstann… [ fp: fix small conflict in header includes; take linux/errno.h as well ] Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru> --- s390 and x86_64 allmodconfig build passed. include/linux/string.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index 0cef345a6e87..cd8c007ad3be 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -5,6 +5,9 @@ #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ +#include <linux/err.h> /* for ERR_PTR() */ +#include <linux/errno.h> /* for E2BIG */ +#include <linux/overflow.h> /* for check_mul_overflow() */ #include <stdarg.h> #include <uapi/linux/string.h> @@ -13,6 +16,44 @@ extern void *memdup_user(const void __user *, size_t); extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); +/** + * memdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result is physically + * contiguous, to be freed by kfree(). + */ +static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return memdup_user(src, nbytes); +} + +/** + * vmemdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result may be not + * physically contiguous. Use kvfree() to free. + */ +static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return vmemdup_user(src, nbytes); +} + /* * Include machine specific inline routines */ -- 2.50.0

1 day, 1 hour

1
1
0 0

[PATCH net v3 1/2] virtio-net: xsk: rx: fix the frame's length check

by Bui Quang Minh

When calling buf_to_xdp, the len argument is the frame data's length without virtio header's length (vi->hdr_len). We check that len with xsk_pool_get_rx_frame_size() + vi->hdr_len to ensure the provided len does not larger than the allocated chunk size. The additional vi->hdr_len is because in virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for virtio header and ask the vhost to start placing data from hard_start + XDP_PACKET_HEADROOM - vi->hdr_len not hard_start + XDP_PACKET_HEADROOM But the first buffer has virtio_header, so the maximum frame's length in the first buffer can only be xsk_pool_get_rx_frame_size() not xsk_pool_get_rx_frame_size() + vi->hdr_len like in the current check. This commit adds an additional argument to buf_to_xdp differentiate between the first buffer and other ones to correctly calculate the maximum frame's length. Cc: stable(a)vger.kernel.org Reviewed-by: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com> Fixes: a4e7ba702701 ("virtio_net: xsk: rx: support recv small mode") Signed-off-by: Bui Quang Minh <minhquangbui99(a)gmail.com> --- drivers/net/virtio_net.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e53ba600605a..1eb237cd5d0b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1127,15 +1127,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } } +/* Note that @len is the length of received data without virtio header */ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, - struct receive_queue *rq, void *buf, u32 len) + struct receive_queue *rq, void *buf, + u32 len, bool first_buf) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; - bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; + /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for + * virtio header and ask the vhost to fill data from + * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len + * The first buffer has virtio header so the remaining region for frame + * data is + * xsk_pool_get_rx_frame_size() + * While other buffers than the first one do not have virtio header, so + * the maximum frame data's length can be + * xsk_pool_get_rx_frame_size() + vi->hdr_len + */ + bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); + if (!first_buf) + bufsize += vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", @@ -1260,7 +1274,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, false); if (!xdp) goto err; @@ -1358,7 +1372,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, true); if (!xdp) return; -- 2.43.0

1 day, 2 hours

1
0
0 0

[PATCH] ext4: fix JBD2 credit overflow with large folios

by Sasha Levin

When large folios are enabled, the blocks-per-folio calculation in ext4_da_writepages_trans_blocks() can overflow the journal transaction limits, causing the writeback path to fail with errors like: JBD2: kworker/u8:0 wants too many credits credits:416 rsv_credits:21 max:334 This occurs with small block sizes (1KB) and large folios (32MB), where the calculation results in 32768 blocks per folio. The transaction credit calculation then requests more credits than the journal can handle, leading to the following warning and writeback failure: WARNING: CPU: 1 PID: 43 at fs/jbd2/transaction.c:334 start_this_handle+0x4c0/0x4e0 EXT4-fs (loop0): ext4_do_writepages: jbd2_start: 9223372036854775807 pages, ino 14; err -28 Call trace leading to the issue: ext4_do_writepages() ext4_da_writepages_trans_blocks() bpp = ext4_journal_blocks_per_folio() // Returns 32768 for 32MB folio with 1KB blocks ext4_meta_trans_blocks(inode, MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp) // With bpp=32768, lblocks=34815, pextents=32768 // Returns credits=415, but with overhead becomes 416 > max 334 ext4_journal_start_with_reserve() jbd2_journal_start_reserved() start_this_handle() // Fails with warning when credits:416 > max:334 The issue was introduced by commit d6bf294773a47 ("ext4/jbd2: convert jbd2_journal_blocks_per_page() to support large folio"), which added support for large folios but didn't account for the journal credit limits. Fix this by capping the blocks-per-folio value at 8192 in the writeback path. This is the value we'd get with 32MB folios and 4KB blocks, or 8MB folios with 1KB blocks, which is reasonable and safe for typical journal configurations. Fixes: d6bf294773a4 ("ext4/jbd2: convert jbd2_journal_blocks_per_page() to support large folio") Cc: stable(a)vger.kernel.org Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- fs/ext4/inode.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index be9a4cba35fd5..860e59a176c97 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2070,6 +2070,14 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio) */ #define MAX_WRITEPAGES_EXTENT_LEN 2048 +/* + * Maximum blocks per folio to avoid JBD2 credit overflow. + * This is the value we'd get with 32MB folios and 4KB blocks, + * or 8MB folios with 1KB blocks, which is reasonable and safe + * for typical journal configurations. + */ +#define MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK 8192 + /* * mpage_add_bh_to_extent - try to add bh to extent of blocks to map * @@ -2481,6 +2489,18 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) { int bpp = ext4_journal_blocks_per_folio(inode); + /* + * With large folios, blocks per folio can get excessively large, + * especially with small block sizes. For example, with 32MB folios + * (order 11) and 1KB blocks, we get 32768 blocks per folio. This + * leads to credit requests that overflow the journal's transaction + * limit. + * + * Limit the value to avoid excessive credit requests. + */ + if (bpp > MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK) + bpp = MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK; + return ext4_meta_trans_blocks(inode, MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); } @@ -2559,6 +2579,13 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) handle_t *handle = NULL; int bpp = ext4_journal_blocks_per_folio(mpd->inode); + /* + * With large folios, blocks per folio can get excessively large, + * especially with small block sizes. Cap it to avoid credit overflow. + */ + if (bpp > MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK) + bpp = MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK; + if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else @@ -6179,6 +6206,13 @@ int ext4_writepage_trans_blocks(struct inode *inode) int bpp = ext4_journal_blocks_per_folio(inode); int ret; + /* + * With large folios, blocks per folio can get excessively large, + * especially with small block sizes. Cap it to avoid credit overflow. + */ + if (bpp > MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK) + bpp = MAX_BLOCKS_PER_FOLIO_FOR_WRITEBACK; + ret = ext4_meta_trans_blocks(inode, bpp, bpp); /* Account for data blocks for journalled mode */ -- 2.39.5

1 day, 2 hours

2
2
0 0

[PATCH v5 01/12] platform/x86/intel/pmt: fix a crashlog NULL pointer access

by Michael J. Ruhl

Usage of the intel_pmt_read() for binary sysfs, requires a pcidev. The current use of the endpoint value is only valid for telemetry endpoint usage. Without the ep, the crashlog usage causes the following NULL pointer exception: BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: Oops: 0000 [#1] SMP NOPTI RIP: 0010:intel_pmt_read+0x3b/0x70 [pmt_class] Code: Call Trace: <TASK> ? sysfs_kf_bin_read+0xc0/0xe0 kernfs_fop_read_iter+0xac/0x1a0 vfs_read+0x26d/0x350 ksys_read+0x6b/0xe0 __x64_sys_read+0x1d/0x30 x64_sys_call+0x1bc8/0x1d70 do_syscall_64+0x6d/0x110 Augment struct intel_pmt_entry with a pointer to the pcidev to avoid the NULL pointer exception. Reviewed-by: Tejas Upadhyay <tejas.upadhyay(a)intel.com> Fixes: 416eeb2e1fc7 ("platform/x86/intel/pmt: telemetry: Export API to read telemetry") Cc: <stable(a)vger.kernel.org> Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com> --- drivers/platform/x86/intel/pmt/class.c | 3 ++- drivers/platform/x86/intel/pmt/class.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmt/class.c b/drivers/platform/x86/intel/pmt/class.c index 7233b654bbad..d046e8752173 100644 --- a/drivers/platform/x86/intel/pmt/class.c +++ b/drivers/platform/x86/intel/pmt/class.c @@ -97,7 +97,7 @@ intel_pmt_read(struct file *filp, struct kobject *kobj, if (count > entry->size - off) count = entry->size - off; - count = pmt_telem_read_mmio(entry->ep->pcidev, entry->cb, entry->header.guid, buf, + count = pmt_telem_read_mmio(entry->pcidev, entry->cb, entry->header.guid, buf, entry->base, off, count); return count; @@ -252,6 +252,7 @@ static int intel_pmt_populate_entry(struct intel_pmt_entry *entry, return -EINVAL; } + entry->pcidev = pci_dev; entry->guid = header->guid; entry->size = header->size; entry->cb = ivdev->priv_data; diff --git a/drivers/platform/x86/intel/pmt/class.h b/drivers/platform/x86/intel/pmt/class.h index b2006d57779d..f6ce80c4e051 100644 --- a/drivers/platform/x86/intel/pmt/class.h +++ b/drivers/platform/x86/intel/pmt/class.h @@ -39,6 +39,7 @@ struct intel_pmt_header { struct intel_pmt_entry { struct telem_endpoint *ep; + struct pci_dev *pcidev; struct intel_pmt_header header; struct bin_attribute pmt_bin_attr; struct kobject *kobj; -- 2.49.0

1 day, 2 hours

3
2
0 0

[PATCH v2 3/3] PCI: Fix failure detection during resource resize

by Ilpo Järvinen

Since the commit 96336ec70264 ("PCI: Perform reset_resource() and build fail list in sync") the failed list is always built and returned to let the caller decide what to do with the failures. The caller may want to retry resource fitting and assignment and before that can happen, the resources should be restored to their original state (a reset effectively clears the struct resource), which requires returning them on the failed list so that the original state remains stored in the associated struct pci_dev_resource. Resource resizing is different from the ordinary resource fitting and assignment in that it only considers part of the resources. This means failures for other resource types are not relevant at all and should be ignored. As resize doesn't unassign such unrelated resources, those resource ending up into the failed list implies assignment of that resource must have failed before resize too. The check in pci_reassign_bridge_resources() to decide if the whole assignment is successful, however, is based on list emptiness which will cause false negatives when the failed list has resources with an unrelated type. If the failed list is not empty, call pci_required_resource_failed() and extend it to be able to filter on specific resource types too (if provided). Calling pci_required_resource_failed() at this point is slightly problematic because the resource itself is reset when the failed list is constructed in __assign_resources_sorted(). As a result, pci_resource_is_optional() does not have access to the original resource flags. This could be worked around by restoring and re-reseting the resource around the call to pci_resource_is_optional(), however, it shouldn't cause issue as resource resizing is meant for 64-bit prefetchable resources according to Christian König (see the Link which unfortunately doesn't point directly to Christian's reply because lore didn't store that email at all). Fixes: 96336ec70264 ("PCI: Perform reset_resource() and build fail list in sync") Link: https://lore.kernel.org/all/c5d1b5d8-8669-5572-75a7-0b480f581ac1@linux.inte… Reported-by: D Scott Phillips <scott(a)os.amperecomputing.com> Tested-by: D Scott Phillips <scott(a)os.amperecomputing.com> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com> Reviewed-by: D Scott Phillips <scott(a)os.amperecomputing.com> Cc: Christian König <christian.koenig(a)amd.com> Cc: <stable(a)vger.kernel.org> --- drivers/pci/setup-bus.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 24863d8d0053..dbbd80d78d3d 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -28,6 +28,10 @@ #include <linux/acpi.h> #include "pci.h" +#define PCI_RES_TYPE_MASK \ + (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH |\ + IORESOURCE_MEM_64) + unsigned int pci_flags; EXPORT_SYMBOL_GPL(pci_flags); @@ -384,13 +388,19 @@ static bool pci_need_to_release(unsigned long mask, struct resource *res) } /* Return: @true if assignment of a required resource failed. */ -static bool pci_required_resource_failed(struct list_head *fail_head) +static bool pci_required_resource_failed(struct list_head *fail_head, + unsigned long type) { struct pci_dev_resource *fail_res; + type &= PCI_RES_TYPE_MASK; + list_for_each_entry(fail_res, fail_head, list) { int idx = pci_resource_num(fail_res->dev, fail_res->res); + if (type && (fail_res->flags & PCI_RES_TYPE_MASK) != type) + continue; + if (!pci_resource_is_optional(fail_res->dev, idx)) return true; } @@ -504,7 +514,7 @@ static void __assign_resources_sorted(struct list_head *head, } /* Without realloc_head and only optional fails, nothing more to do. */ - if (!pci_required_resource_failed(&local_fail_head) && + if (!pci_required_resource_failed(&local_fail_head, 0) && list_empty(realloc_head)) { list_for_each_entry(save_res, &save_head, list) { struct resource *res = save_res->res; @@ -1708,10 +1718,6 @@ static void __pci_bridge_assign_resources(const struct pci_dev *bridge, } } -#define PCI_RES_TYPE_MASK \ - (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH |\ - IORESOURCE_MEM_64) - static void pci_bridge_release_resources(struct pci_bus *bus, unsigned long type) { @@ -2449,8 +2455,12 @@ int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type) free_list(&added); if (!list_empty(&failed)) { - ret = -ENOSPC; - goto cleanup; + if (pci_required_resource_failed(&failed, type)) { + ret = -ENOSPC; + goto cleanup; + } + /* Only resources with unrelated types failed (again) */ + free_list(&failed); } list_for_each_entry(dev_res, &saved, list) { -- 2.39.5

1 day, 3 hours

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror