Linux-stable-mirror August 2024

linux-stable-mirror@lists.linaro.org

504 participants
1329 discussions

[PATCH] libbpf: check the btf_type kind to prevent error

by Ma Ke

To prevent potential error return values, it is necessary to check the return value of btf__type_by_id. We can add a kind checking to fix the issue. Cc: stable(a)vger.kernel.org Fixes: 430025e5dca5 ("libbpf: Add subskeleton scaffolding") Signed-off-by: Ma Ke <make24(a)iscas.ac.cn> --- tools/lib/bpf/libbpf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a3be6f8fac09..d1eb45d16054 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -13850,6 +13850,9 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) var = btf_var_secinfos(map_type); for (i = 0; i < len; i++, var++) { var_type = btf__type_by_id(btf, var->type); + if (!var_type) + return libbpf_err(-ENOENT); + var_name = btf__name_by_offset(btf, var_type->name_off); if (strcmp(var_name, var_skel->name) == 0) { *var_skel->addr = map->mmaped + var->offset; -- 2.25.1

1 year, 5 months

+ mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu has been added to the -mm mm-hotfixes-unstable branch. Its filename is mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Waiman Long <longman(a)redhat.com> Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu Date: Tue, 6 Aug 2024 12:41:07 -0400 The memory_failure_cpu structure is a per-cpu structure. Access to its content requires the use of get_cpu_var() to lock in the current CPU and disable preemption. The use of a regular spinlock_t for locking purpose is fine for a non-RT kernel. Since the integration of RT spinlock support into the v5.15 kernel, a spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping lock in a preemption disabled context is illegal resulting in the following kind of warning. [12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0 [12135.732252] preempt_count: 1, expected: 0 [12135.732255] RCU nest depth: 2, expected: 2 : [12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021 [12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred [12135.732433] Call Trace: [12135.732436] <TASK> [12135.732450] dump_stack_lvl+0x57/0x81 [12135.732461] __might_resched.cold+0xf4/0x12f [12135.732479] rt_spin_lock+0x4c/0x100 [12135.732491] memory_failure_queue+0x40/0xe0 [12135.732503] ghes_do_memory_failure+0x53/0x390 [12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0 [12135.732575] ghes_proc+0xf9/0x1a0 [12135.732591] ghes_notify_hed+0x6a/0x150 [12135.732602] notifier_call_chain+0x43/0xb0 [12135.732626] blocking_notifier_call_chain+0x43/0x60 [12135.732637] acpi_ev_notify_dispatch+0x47/0x70 [12135.732648] acpi_os_execute_deferred+0x13/0x20 [12135.732654] process_one_work+0x41f/0x500 [12135.732695] worker_thread+0x192/0x360 [12135.732715] kthread+0x111/0x140 [12135.732733] ret_from_fork+0x29/0x50 [12135.732779] </TASK> Fix it by using a raw_spinlock_t for locking instead. Also move the pr_err() out of the lock critical section to avoid indeterminate latency of this call. Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com Fixes: ea8f5fb8a71f ("HWPoison: add memory_failure_queue()") Signed-off-by: Waiman Long <longman(a)redhat.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: Juri Lelli <juri.lelli(a)redhat.com> Cc: Len Brown <len.brown(a)intel.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memory-failure.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) --- a/mm/memory-failure.c~mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu +++ a/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,19 +2443,21 @@ void memory_failure_queue(unsigned long { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(str mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(vo for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } _ Patches currently in -mm which might be from longman(a)redhat.com are padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch watchdog-handle-the-enodev-failure-case-of-lockup_detector_delay_init-separately.patch

1 year, 5 months

+ padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: padata: Fix possible divide-by-0 panic in padata_mt_helper() has been added to the -mm mm-hotfixes-unstable branch. Its filename is padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Waiman Long <longman(a)redhat.com> Subject: padata: Fix possible divide-by-0 panic in padata_mt_helper() Date: Tue, 6 Aug 2024 13:46:47 -0400 We are hit with a not easily reproducible divide-by-0 panic in padata.c at bootup time. [ 10.017908] Oops: divide error: 0000 1 PREEMPT SMP NOPTI [ 10.017908] CPU: 26 PID: 2627 Comm: kworker/u1666:1 Not tainted 6.10.0-15.el10.x86_64 #1 [ 10.017908] Hardware name: Lenovo ThinkSystem SR950 [7X12CTO1WW]/[7X12CTO1WW], BIOS [PSE140J-2.30] 07/20/2021 [ 10.017908] Workqueue: events_unbound padata_mt_helper [ 10.017908] RIP: 0010:padata_mt_helper+0x39/0xb0 : [ 10.017963] Call Trace: [ 10.017968] <TASK> [ 10.018004] ? padata_mt_helper+0x39/0xb0 [ 10.018084] process_one_work+0x174/0x330 [ 10.018093] worker_thread+0x266/0x3a0 [ 10.018111] kthread+0xcf/0x100 [ 10.018124] ret_from_fork+0x31/0x50 [ 10.018138] ret_from_fork_asm+0x1a/0x30 [ 10.018147] </TASK> Looking at the padata_mt_helper() function, the only way a divide-by-0 panic can happen is when ps->chunk_size is 0. The way that chunk_size is initialized in padata_do_multithreaded(), chunk_size can be 0 when the min_chunk in the passed-in padata_mt_job structure is 0. Fix this divide-by-0 panic by making sure that chunk_size will be at least 1 no matter what the input parameters are. Link: https://lkml.kernel.org/r/20240806174647.1050398-1-longman@redhat.com Fixes: 004ed42638f4 ("padata: add basic support for multithreaded jobs") Signed-off-by: Waiman Long <longman(a)redhat.com> Cc: Daniel Jordan <daniel.m.jordan(a)oracle.com> Cc: Steffen Klassert <steffen.klassert(a)secunet.com> Cc: Waiman Long <longman(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/padata.c | 7 +++++++ 1 file changed, 7 insertions(+) --- a/kernel/padata.c~padata-fix-possible-divide-by-0-panic-in-padata_mt_helper +++ a/kernel/padata.c @@ -517,6 +517,13 @@ void __init padata_do_multithreaded(stru ps.chunk_size = max(ps.chunk_size, job->min_chunk); ps.chunk_size = roundup(ps.chunk_size, job->align); + /* + * chunk_size can be 0 if the caller sets min_chunk to 0. So force it + * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().` + */ + if (!ps.chunk_size) + ps.chunk_size = 1U; + list_for_each_entry(pw, &works, pw_list) if (job->numa_aware) { int old_node = atomic_read(&last_used_nid); _ Patches currently in -mm which might be from longman(a)redhat.com are padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch watchdog-handle-the-enodev-failure-case-of-lockup_detector_delay_init-separately.patch

1 year, 5 months

[GIT PULL] virtio: bugfix

by Michael S. Tsirkin

The following changes since commit 6d834691da474ed1c648753d3d3a3ef8379fa1c1: virtio_pci_modern: remove admin queue serialization lock (2024-07-17 05:43:21 -0400) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus for you to fetch changes up to 0823dc64586ba5ea13a7d200a5d33e4c5fa45950: vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler (2024-07-26 03:26:02 -0400) ---------------------------------------------------------------- virtio: bugfix Fixes a single, long-standing issue with kick pass-through vdpa. Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com> ---------------------------------------------------------------- Jason Wang (1): vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler drivers/vhost/vdpa.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-)

1 year, 5 months

[PATCH 1/2] drm/amdgpu/jpeg2: properly set atomics vmid field

by Alex Deucher

This needs to be set as well if the IB uses atomics. Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com> Cc: stable(a)vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); -- 2.45.2

1 year, 5 months

Bidding Services

by elvislehmann891＠gmail.com

Hi, Cannon Estimation,LLC brings you a major discount on Cost Estimating & Quantities Take-Off Services. We claim a 98% accuracy guarantee with a refund policy in case of any error in quantities. We are using certified software’s like PlanSwift, BlueBeams, Accu-Bid, Auto-Bid & RSmeans etc Send us your plans for a quote on our service charges before getting started. Please reply to that email, so I can share some sample estimates. Thanks & Have a Great Day. Regards, Elvis Lehmann Business Development Manager Cannon Estimation, LLC

1 year, 5 months

[PATCH iwl-next 7/9] idpf: fix netdev Tx queue stop/wake

by Alexander Lobakin

From: Michal Kubiak <michal.kubiak(a)intel.com> netif_txq_maybe_stop() returns -1, 0, or 1, while idpf_tx_maybe_stop_common() says it returns 0 or -EBUSY. As a result, there sometimes are Tx queue timeout warnings despite that the queue is empty or there is at least enough space to restart it. Make idpf_tx_maybe_stop_common() inline and returning true or false, handling the return of netif_txq_maybe_stop() properly. Use a correct goto in idpf_tx_maybe_stop_splitq() to avoid stopping the queue or incrementing the stops counter twice. Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") Fixes: a5ab9ee0df0b ("idpf: add singleq start_xmit and napi poll") Cc: stable(a)vger.kernel.org # 6.7+ Signed-off-by: Michal Kubiak <michal.kubiak(a)intel.com> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com> Signed-off-by: Alexander Lobakin <aleksander.lobakin(a)intel.com> --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 9 ++++- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 4 +++ drivers/net/ethernet/intel/idpf/idpf_txrx.c | 35 +++++-------------- 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 2478f71adb95..df3574ac58c2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1020,7 +1020,6 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, struct idpf_tx_buf *first, u16 ring_idx); unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, struct sk_buff *skb); -int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q); @@ -1029,4 +1028,12 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); +static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, + u32 needed) +{ + return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + IDPF_DESC_UNUSED(tx_q), + needed, needed); +} + #endif /* !_IDPF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 947d3ff9677c..5ba360abbe66 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -375,6 +375,10 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, IDPF_TX_DESCS_FOR_CTX)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index fd44a65a0537..26ef064972d4 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2127,29 +2127,6 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } -/** - * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions - * @tx_q: the queue to be checked - * @size: number of descriptors we want to assure is available - * - * Returns 0 if stop is not needed - */ -int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size) -{ - struct netdev_queue *nq; - - if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) - return 0; - - u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.q_busy); - u64_stats_update_end(&tx_q->stats_sync); - - nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); - - return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); -} - /** * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked @@ -2161,7 +2138,7 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, unsigned int descs_needed) { if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) - goto splitq_stop; + goto out; /* If there are too many outstanding completions expected on the * completion queue, stop the TX queue to give the device some time to @@ -2180,10 +2157,12 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, return 0; splitq_stop: + netif_stop_subqueue(tx_q->netdev, tx_q->idx); + +out: u64_stats_update_begin(&tx_q->stats_sync); u64_stats_inc(&tx_q->q_stats.q_busy); u64_stats_update_end(&tx_q->stats_sync); - netif_stop_subqueue(tx_q->netdev, tx_q->idx); return -EBUSY; } @@ -2206,7 +2185,11 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); tx_q->next_to_use = val; - idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); + if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) { + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + } /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only -- 2.45.2

1 year, 5 months

[PATCH] net: drop bad gso csum_start and offset in virtio_net_hdr

by mathieu.tortuyaux＠gmail.com

From: Willem de Bruijn <willemb(a)google.com> [ Upstream commit 89add40066f9ed9abe5f7f886fe5789ff7e0c50e ] Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb for GSO packets. The function already checks that a checksum requested with VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets this might not hold for segs after segmentation. Syzkaller demonstrated to reach this warning in skb_checksum_help offset = skb_checksum_start_offset(skb); ret = -EINVAL; if (WARN_ON_ONCE(offset >= skb_headlen(skb))) By injecting a TSO packet: WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0 ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774 ip_finish_output_gso net/ipv4/ip_output.c:279 [inline] __ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301 iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4850 [inline] netdev_start_xmit include/linux/netdevice.h:4864 [inline] xmit_one net/core/dev.c:3595 [inline] dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611 __dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261 packet_snd net/packet/af_packet.c:3073 [inline] The geometry of the bad input packet at tcp_gso_segment: [ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0 [ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244 [ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0)) [ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536 ip_summed=3 complete_sw=0 valid=0 level=0) Mitigate with stricter input validation. csum_offset: for GSO packets, deduce the correct value from gso_type. This is already done for USO. Extend it to TSO. Let UFO be: udp[46]_ufo_fragment ignores these fields and always computes the checksum in software. csum_start: finding the real offset requires parsing to the transport header. Do not add a parser, use existing segmentation parsing. Thanks to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded. Again test both TSO and USO. Do not test UFO for the above reason, and do not test UDP tunnel offload. GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit from devices with no checksum offload"), but then still these fields are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no need to test for ip_summed == CHECKSUM_PARTIAL first. This revises an existing fix mentioned in the Fixes tag, which broke small packets with GSO offload, as detected by kselftests. Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871 Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org Fixes: e269d79c7d35 ("net: missing check virtio") Cc: stable(a)vger.kernel.org Signed-off-by: Willem de Bruijn <willemb(a)google.com> Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gma… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- Hi, This patch fixes network failures on OpenStack VMs running with Kernel 6.6.44 (it was working fine with 6.6.43). ``` [ 237.422038] eth0: bad gso: type: 1, size: 1432 ``` This has been tested on Flatcar Linux CI with Kernel 6.6.44. I think it has to be backported on Linux branches that have the "net: missing check virtio" commit. At this moment, I know those releases to be concerned: * 6.1.y (with 6.1.103) * 6.6.y (with 6.6.44) * 6.10.y (with 6.10.3) Thanks, Mathieu - @tormath1 include/linux/virtio_net.h | 16 +++++----------- net/ipv4/tcp_offload.c | 3 +++ net/ipv4/udp_offload.c | 4 ++++ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d1d7825318c3..6c395a2600e8 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; - u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); - if (hdr->gso_size) { - gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); - ret = div64_u64_rem(skb->len, gso_size, &remainder); - if (!(ret && (hdr->gso_size > needed) && - ((remainder > needed) || (remainder == 0)))) { - return -EINVAL; - } - skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; - } - if (!pskb_may_pull(skb, needed)) return -EINVAL; @@ -182,6 +171,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + if (skb->csum_offset != offsetof(struct tcphdr, check)) + return -EINVAL; + break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 8311c38267b5..69e6012ae82f 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -73,6 +73,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (thlen < sizeof(*th)) goto out; + if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) + goto out; + if (!pskb_may_pull(skb, thlen)) goto out; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e5971890d637..9cb13a50011e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -278,6 +278,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); + if (unlikely(skb_checksum_start(gso_skb) != + skb_transport_header(gso_skb))) + return ERR_PTR(-EINVAL); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), -- 2.44.2

1 year, 5 months

[PATCH v1] arm64: dts: ti: k3-am62-verdin-dahlia: Keep CTRL_SLEEP_MOCI# regulator on

by Francesco Dolcini

From: Francesco Dolcini <francesco.dolcini(a)toradex.com> This reverts commit 3935fbc87ddebea5439f3ab6a78b1e83e976bf88. CTRL_SLEEP_MOCI# is a signal that is defined for all the SoM implementing the Verdin family specification, this signal is supposed to control the power enable in the carrier board when the system is in deep sleep mode. However this is not possible with Texas Instruments AM62 SoC, IOs output buffer is disabled in deep sleep and IOs are in tri-state mode. Given that we cannot properly control this pin, force it to be always high to minimize potential issues. Fixes: 3935fbc87dde ("arm64: dts: ti: k3-am62-verdin-dahlia: support sleep-moci") Cc: <stable(a)vger.kernel.org> Link: https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1… Signed-off-by: Francesco Dolcini <francesco.dolcini(a)toradex.com> --- .../boot/dts/ti/k3-am62-verdin-dahlia.dtsi | 22 ------------------- arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 6 ----- 2 files changed, 28 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi index e8f4d136e5df..9202181fbd65 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi @@ -43,15 +43,6 @@ simple-audio-card,cpu { sound-dai = <&mcasp0>; }; }; - - reg_usb_hub: regulator-usb-hub { - compatible = "regulator-fixed"; - enable-active-high; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>; - regulator-boot-on; - regulator-name = "HUB_PWR_EN"; - }; }; /* Verdin ETHs */ @@ -193,11 +184,6 @@ &ospi0 { status = "okay"; }; -/* Do not force CTRL_SLEEP_MOCI# always enabled */ -&reg_force_sleep_moci { - status = "disabled"; -}; - /* Verdin SD_1 */ &sdhci1 { status = "okay"; @@ -218,15 +204,7 @@ &usbss1 { }; &usb1 { - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - usb-hub@1 { - compatible = "usb424,2744"; - reg = <1>; - vdd-supply = <&reg_usb_hub>; - }; }; /* Verdin CTRL_WAKE1_MICO# */ diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi index 359f53f3e019..5bef31b8577b 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi @@ -138,12 +138,6 @@ reg_1v8_eth: regulator-1v8-eth { vin-supply = <&reg_1v8>; }; - /* - * By default we enable CTRL_SLEEP_MOCI#, this is required to have - * peripherals on the carrier board powered. - * If more granularity or power saving is required this can be disabled - * in the carrier board device tree files. - */ reg_force_sleep_moci: regulator-force-sleep-moci { compatible = "regulator-fixed"; enable-active-high; -- 2.39.2

1 year, 5 months

[PATCH 1/8] mm: Fix endless reclaim on machines with unaccepted memory

by Kirill A. Shutemov

Unaccepted memory is considered unusable free memory, which is not counted as free on the zone watermark check. This causes get_page_from_freelist() to accept more memory to hit the high watermark, but it creates problems in the reclaim path. The reclaim path encounters a failed zone watermark check and attempts to reclaim memory. This is usually successful, but if there is little or no reclaimable memory, it can result in endless reclaim with little to no progress. This can occur early in the boot process, just after start of the init process when the only reclaimable memory is the page cache of the init executable and its libraries. Make unaccepted memory free from watermark check point of view. This way unaccepted memory will never be the trigger of memory reclaim. Accept more memory in the get_page_from_freelist() if needed. Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reported-by: Jianxiong Gao <jxgao(a)google.com> Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Cc: stable(a)vger.kernel.org # v6.5+ --- mm/page_alloc.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c04..aa9b1eaa638c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3368,6 +3365,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3450,10 +3447,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -6951,9 +6946,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6979,23 +6971,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7038,7 +7036,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } -- 2.43.0

1 year, 5 months

Jump to page:

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror August 2024