If find_linux_pte fails, IRQs will not be restored. This is unlikely
to happen in practice since it would have been reported as hanging
hosts, but it should of course be fixed anyway.
Cc: stable(a)vger.kernel.org
Reported-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/powerpc/kvm/e500_mmu_host.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index e5a145b578a4..6824e8139801 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -479,7 +479,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (pte_present(pte)) {
wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
MAS2_WIMGE_MASK;
- local_irq_restore(flags);
} else {
local_irq_restore(flags);
pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
@@ -488,8 +487,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
goto out;
}
}
+ local_irq_restore(flags);
+
writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
-
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
--
2.47.1
In case of possible unpredictably large arguments passed to
rose_setsockopt() and multiplied by extra values on top of that,
integer overflows may occur.
Do the safest minimum and fix these issues by checking the
contents of 'opt' and returning -EINVAL if they are too large. Also,
switch to unsigned int and remove useless check for negative 'opt'
in ROSE_IDLE case.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
net/rose/af_rose.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 59050caab65c..72c65d938a15 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -397,15 +397,15 @@ static int rose_setsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
- int opt;
+ unsigned int opt;
if (level != SOL_ROSE)
return -ENOPROTOOPT;
- if (optlen < sizeof(int))
+ if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (copy_from_sockptr(&opt, optval, sizeof(int)))
+ if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
return -EFAULT;
switch (optname) {
@@ -414,31 +414,31 @@ static int rose_setsockopt(struct socket *sock, int level, int optname,
return 0;
case ROSE_T1:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->t1 = opt * HZ;
return 0;
case ROSE_T2:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->t2 = opt * HZ;
return 0;
case ROSE_T3:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->t3 = opt * HZ;
return 0;
case ROSE_HOLDBACK:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->hb = opt * HZ;
return 0;
case ROSE_IDLE:
- if (opt < 0)
+ if (opt > UINT_MAX / (60 * HZ))
return -EINVAL;
rose->idle = opt * 60 * HZ;
return 0;
pci-legacy systems are not using logic_pio to managed PIO
allocations, thus the generic pci_address_to_pio won't work
when PCI_IOBASE is defined.
Override the function to use architecture implementation to
fix the problem.
Cc: stable(a)vger.kernel.org
Fixes: 4bfb53e7d317 ("mips: add <asm-generic/io.h> including")
Reported-by: Mateusz Jończyk <mat.jonczyk(a)o2.pl>
Closes: https://lore.kernel.org/r/99f75c66-4c2d-45dc-a808-b5ba440c7551@app.fastmail…
Signed-off-by: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
---
This is a quick fix for fixes tree and stable backporting.
In long term, we should get logic_pio accept fixed mapping,
and make PCI core code aware of platforms not using vmap
for PCI_IOBASE.
---
arch/mips/pci/pci-legacy.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index ec2567f8efd83bff7b106cbbd9ec7a6de0308c4c..66898fd182dc1fec1d1e9ae4c908873d59777182 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -29,6 +29,14 @@ static LIST_HEAD(controllers);
static int pci_initialized;
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ if (address > IO_SPACE_LIMIT)
+ return (unsigned long)-1;
+
+ return (unsigned long) address;
+}
+
/*
* We need to avoid collisions with `mirrored' VGA ports
* and other strange ISA hardware, so we always want the
---
base-commit: dab2734f8e9ecba609d66d1dd087a392a7774c04
change-id: 20250114-malta-io-fixes-85e14b1b9f8b
Best regards,
--
Jiaxun Yang <jiaxun.yang(a)flygoat.com>
The comparison function cmpworker() violates the C standard's
requirements for qsort() comparison functions, which mandate symmetry
and transitivity:
Symmetry: If x < y, then y > x.
Transitivity: If x < y and y < z, then x < z.
In its current implementation, cmpworker() incorrectly returns 0 when
w1->tid < w2->tid, which breaks both symmetry and transitivity. This
violation causes undefined behavior, potentially leading to issues such
as memory corruption in glibc [1].
Fix the issue by returning -1 when w1->tid < w2->tid, ensuring
compliance with the C standard and preventing undefined behavior.
Link: https://www.qualys.com/2024/01/30/qsort.txt [1]
Fixes: 121dd9ea0116 ("perf bench: Add epoll parallel epoll_wait benchmark")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kuan-Wei Chiu <visitorckw(a)gmail.com>
---
Changes in v3:
- Perform a full comparison for clarity, as suggested by James.
tools/perf/bench/epoll-wait.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index ef5c4257844d..20fe4f72b4af 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -420,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2)
struct worker *w1 = (struct worker *) p1;
struct worker *w2 = (struct worker *) p2;
- return w1->tid > w2->tid;
+
+ if (w1->tid > w2->tid)
+ return 1;
+ if (w1->tid < w2->tid)
+ return -1;
+ return 0;
}
int bench_epoll_wait(int argc, const char **argv)
--
2.34.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 9322d1915f9d976ee48c09d800fbd5169bc2ddcc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012050-motor-prevalent-e802@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9322d1915f9d976ee48c09d800fbd5169bc2ddcc Mon Sep 17 00:00:00 2001
From: Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
Date: Sun, 15 Dec 2024 12:39:45 +0900
Subject: [PATCH] irqchip: Plug a OF node reference leak in
platform_irqchip_probe()
platform_irqchip_probe() leaks a OF node when irq_init_cb() fails. Fix it
by declaring par_np with the __free(device_node) cleanup construct.
This bug was found by an experimental static analysis tool that I am
developing.
Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros")
Signed-off-by: Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20241215033945.3414223-1-joe@pf.is.s.u-tokyo.ac…
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
index 1eeb0d0156ce..0ee7b6b71f5f 100644
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -35,11 +35,10 @@ void __init irqchip_init(void)
int platform_irqchip_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
- struct device_node *par_np = of_irq_find_parent(np);
+ struct device_node *par_np __free(device_node) = of_irq_find_parent(np);
of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev);
if (!irq_init_cb) {
- of_node_put(par_np);
return -EINVAL;
}
@@ -55,7 +54,6 @@ int platform_irqchip_probe(struct platform_device *pdev)
* interrupt controller can check for specific domains as necessary.
*/
if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) {
- of_node_put(par_np);
return -EPROBE_DEFER;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 9322d1915f9d976ee48c09d800fbd5169bc2ddcc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012049-connector-oxford-19f1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9322d1915f9d976ee48c09d800fbd5169bc2ddcc Mon Sep 17 00:00:00 2001
From: Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
Date: Sun, 15 Dec 2024 12:39:45 +0900
Subject: [PATCH] irqchip: Plug a OF node reference leak in
platform_irqchip_probe()
platform_irqchip_probe() leaks a OF node when irq_init_cb() fails. Fix it
by declaring par_np with the __free(device_node) cleanup construct.
This bug was found by an experimental static analysis tool that I am
developing.
Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros")
Signed-off-by: Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20241215033945.3414223-1-joe@pf.is.s.u-tokyo.ac…
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
index 1eeb0d0156ce..0ee7b6b71f5f 100644
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -35,11 +35,10 @@ void __init irqchip_init(void)
int platform_irqchip_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
- struct device_node *par_np = of_irq_find_parent(np);
+ struct device_node *par_np __free(device_node) = of_irq_find_parent(np);
of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev);
if (!irq_init_cb) {
- of_node_put(par_np);
return -EINVAL;
}
@@ -55,7 +54,6 @@ int platform_irqchip_probe(struct platform_device *pdev)
* interrupt controller can check for specific domains as necessary.
*/
if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) {
- of_node_put(par_np);
return -EPROBE_DEFER;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012032-buffoon-cabbie-1e42@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Tue, 7 Jan 2025 14:47:52 +0000
Subject: [PATCH] mm: clear uffd-wp PTE/PMD state on mremap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When mremap()ing a memory region previously registered with userfaultfd as
write-protected but without UFFD_FEATURE_EVENT_REMAP, an inconsistency in
flag clearing leads to a mismatch between the vma flags (which have
uffd-wp cleared) and the pte/pmd flags (which do not have uffd-wp
cleared). This mismatch causes a subsequent mprotect(PROT_WRITE) to
trigger a warning in page_table_check_pte_flags() due to setting the pte
to writable while uffd-wp is still set.
Fix this by always explicitly clearing the uffd-wp pte/pmd flags on any
such mremap() so that the values are consistent with the existing clearing
of VM_UFFD_WP. Be careful to clear the logical flag regardless of its
physical form; a PTE bit, a swap PTE bit, or a PTE marker. Cover PTE,
huge PMD and hugetlb paths.
Link: https://lkml.kernel.org/r/20250107144755.1871363-2-ryan.roberts@arm.com
Co-developed-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Closes: https://lore.kernel.org/linux-mm/810b44a8-d2ae-4107-b665-5a42eae2d948@arm.c…
Fixes: 63b2d4174c4a ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl")
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index cb40f1a1d081..75342022d144 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -247,6 +247,13 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
vma_is_shmem(vma);
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0;
+}
+
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
void dup_userfaultfd_fail(struct list_head *);
@@ -402,6 +409,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
return false;
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ return false;
+}
+
#endif /* CONFIG_USERFAULTFD */
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e53d83b3e5cf..db64116a4f84 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2206,6 +2206,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
return pmd;
}
+static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
+{
+ if (pmd_present(pmd))
+ pmd = pmd_clear_uffd_wp(pmd);
+ else if (is_swap_pmd(pmd))
+ pmd = pmd_swp_clear_uffd_wp(pmd);
+
+ return pmd;
+}
+
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
@@ -2244,6 +2254,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
pmd = move_soft_dirty_pmd(pmd);
+ if (vma_has_uffd_without_event_remap(vma))
+ pmd = clear_uffd_wp_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
if (force_flush)
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c498874a7170..eaaec19caa7c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5402,6 +5402,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
unsigned long sz)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
spinlock_t *src_ptl, *dst_ptl;
@@ -5418,7 +5419,18 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
- set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ huge_pte_clear(mm, new_addr, dst_pte, sz);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = huge_pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+ }
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);
diff --git a/mm/mremap.c b/mm/mremap.c
index 60473413836b..cff7f552f909 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
pmd_t dummy_pmdval;
@@ -216,7 +217,18 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
force_flush = true;
pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
- set_pte_at(mm, new_addr, new_pte, pte);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ pte_clear(mm, new_addr, new_pte);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_pte_at(mm, new_addr, new_pte, pte);
+ }
}
arch_leave_lazy_mmu_mode();
@@ -278,6 +290,15 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
+ /* If this pmd belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -333,6 +354,15 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
+ /* If this pud belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012006-abnormal-unnoticed-6f89@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Tue, 7 Jan 2025 14:47:52 +0000
Subject: [PATCH] mm: clear uffd-wp PTE/PMD state on mremap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When mremap()ing a memory region previously registered with userfaultfd as
write-protected but without UFFD_FEATURE_EVENT_REMAP, an inconsistency in
flag clearing leads to a mismatch between the vma flags (which have
uffd-wp cleared) and the pte/pmd flags (which do not have uffd-wp
cleared). This mismatch causes a subsequent mprotect(PROT_WRITE) to
trigger a warning in page_table_check_pte_flags() due to setting the pte
to writable while uffd-wp is still set.
Fix this by always explicitly clearing the uffd-wp pte/pmd flags on any
such mremap() so that the values are consistent with the existing clearing
of VM_UFFD_WP. Be careful to clear the logical flag regardless of its
physical form; a PTE bit, a swap PTE bit, or a PTE marker. Cover PTE,
huge PMD and hugetlb paths.
Link: https://lkml.kernel.org/r/20250107144755.1871363-2-ryan.roberts@arm.com
Co-developed-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Closes: https://lore.kernel.org/linux-mm/810b44a8-d2ae-4107-b665-5a42eae2d948@arm.c…
Fixes: 63b2d4174c4a ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl")
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index cb40f1a1d081..75342022d144 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -247,6 +247,13 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
vma_is_shmem(vma);
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0;
+}
+
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
void dup_userfaultfd_fail(struct list_head *);
@@ -402,6 +409,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
return false;
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ return false;
+}
+
#endif /* CONFIG_USERFAULTFD */
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e53d83b3e5cf..db64116a4f84 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2206,6 +2206,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
return pmd;
}
+static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
+{
+ if (pmd_present(pmd))
+ pmd = pmd_clear_uffd_wp(pmd);
+ else if (is_swap_pmd(pmd))
+ pmd = pmd_swp_clear_uffd_wp(pmd);
+
+ return pmd;
+}
+
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
@@ -2244,6 +2254,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
pmd = move_soft_dirty_pmd(pmd);
+ if (vma_has_uffd_without_event_remap(vma))
+ pmd = clear_uffd_wp_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
if (force_flush)
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c498874a7170..eaaec19caa7c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5402,6 +5402,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
unsigned long sz)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
spinlock_t *src_ptl, *dst_ptl;
@@ -5418,7 +5419,18 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
- set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ huge_pte_clear(mm, new_addr, dst_pte, sz);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = huge_pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+ }
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);
diff --git a/mm/mremap.c b/mm/mremap.c
index 60473413836b..cff7f552f909 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
pmd_t dummy_pmdval;
@@ -216,7 +217,18 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
force_flush = true;
pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
- set_pte_at(mm, new_addr, new_pte, pte);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ pte_clear(mm, new_addr, new_pte);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_pte_at(mm, new_addr, new_pte, pte);
+ }
}
arch_leave_lazy_mmu_mode();
@@ -278,6 +290,15 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
+ /* If this pmd belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -333,6 +354,15 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
+ /* If this pud belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012005-persecute-bankroll-36e6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Tue, 7 Jan 2025 14:47:52 +0000
Subject: [PATCH] mm: clear uffd-wp PTE/PMD state on mremap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When mremap()ing a memory region previously registered with userfaultfd as
write-protected but without UFFD_FEATURE_EVENT_REMAP, an inconsistency in
flag clearing leads to a mismatch between the vma flags (which have
uffd-wp cleared) and the pte/pmd flags (which do not have uffd-wp
cleared). This mismatch causes a subsequent mprotect(PROT_WRITE) to
trigger a warning in page_table_check_pte_flags() due to setting the pte
to writable while uffd-wp is still set.
Fix this by always explicitly clearing the uffd-wp pte/pmd flags on any
such mremap() so that the values are consistent with the existing clearing
of VM_UFFD_WP. Be careful to clear the logical flag regardless of its
physical form; a PTE bit, a swap PTE bit, or a PTE marker. Cover PTE,
huge PMD and hugetlb paths.
Link: https://lkml.kernel.org/r/20250107144755.1871363-2-ryan.roberts@arm.com
Co-developed-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Closes: https://lore.kernel.org/linux-mm/810b44a8-d2ae-4107-b665-5a42eae2d948@arm.c…
Fixes: 63b2d4174c4a ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl")
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index cb40f1a1d081..75342022d144 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -247,6 +247,13 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
vma_is_shmem(vma);
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0;
+}
+
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
void dup_userfaultfd_fail(struct list_head *);
@@ -402,6 +409,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
return false;
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ return false;
+}
+
#endif /* CONFIG_USERFAULTFD */
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e53d83b3e5cf..db64116a4f84 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2206,6 +2206,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
return pmd;
}
+static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
+{
+ if (pmd_present(pmd))
+ pmd = pmd_clear_uffd_wp(pmd);
+ else if (is_swap_pmd(pmd))
+ pmd = pmd_swp_clear_uffd_wp(pmd);
+
+ return pmd;
+}
+
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
@@ -2244,6 +2254,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
pmd = move_soft_dirty_pmd(pmd);
+ if (vma_has_uffd_without_event_remap(vma))
+ pmd = clear_uffd_wp_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
if (force_flush)
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c498874a7170..eaaec19caa7c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5402,6 +5402,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
unsigned long sz)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
spinlock_t *src_ptl, *dst_ptl;
@@ -5418,7 +5419,18 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
- set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ huge_pte_clear(mm, new_addr, dst_pte, sz);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = huge_pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+ }
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);
diff --git a/mm/mremap.c b/mm/mremap.c
index 60473413836b..cff7f552f909 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
pmd_t dummy_pmdval;
@@ -216,7 +217,18 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
force_flush = true;
pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
- set_pte_at(mm, new_addr, new_pte, pte);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ pte_clear(mm, new_addr, new_pte);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_pte_at(mm, new_addr, new_pte, pte);
+ }
}
arch_leave_lazy_mmu_mode();
@@ -278,6 +290,15 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
+ /* If this pmd belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -333,6 +354,15 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
+ /* If this pud belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012004-applaud-dipped-9aec@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Tue, 7 Jan 2025 14:47:52 +0000
Subject: [PATCH] mm: clear uffd-wp PTE/PMD state on mremap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When mremap()ing a memory region previously registered with userfaultfd as
write-protected but without UFFD_FEATURE_EVENT_REMAP, an inconsistency in
flag clearing leads to a mismatch between the vma flags (which have
uffd-wp cleared) and the pte/pmd flags (which do not have uffd-wp
cleared). This mismatch causes a subsequent mprotect(PROT_WRITE) to
trigger a warning in page_table_check_pte_flags() due to setting the pte
to writable while uffd-wp is still set.
Fix this by always explicitly clearing the uffd-wp pte/pmd flags on any
such mremap() so that the values are consistent with the existing clearing
of VM_UFFD_WP. Be careful to clear the logical flag regardless of its
physical form; a PTE bit, a swap PTE bit, or a PTE marker. Cover PTE,
huge PMD and hugetlb paths.
Link: https://lkml.kernel.org/r/20250107144755.1871363-2-ryan.roberts@arm.com
Co-developed-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Mikołaj Lenczewski <miko.lenczewski(a)arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Closes: https://lore.kernel.org/linux-mm/810b44a8-d2ae-4107-b665-5a42eae2d948@arm.c…
Fixes: 63b2d4174c4a ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl")
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index cb40f1a1d081..75342022d144 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -247,6 +247,13 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
vma_is_shmem(vma);
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0;
+}
+
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
void dup_userfaultfd_fail(struct list_head *);
@@ -402,6 +409,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
return false;
}
+static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+{
+ return false;
+}
+
#endif /* CONFIG_USERFAULTFD */
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e53d83b3e5cf..db64116a4f84 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2206,6 +2206,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
return pmd;
}
+static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
+{
+ if (pmd_present(pmd))
+ pmd = pmd_clear_uffd_wp(pmd);
+ else if (is_swap_pmd(pmd))
+ pmd = pmd_swp_clear_uffd_wp(pmd);
+
+ return pmd;
+}
+
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
@@ -2244,6 +2254,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
pmd = move_soft_dirty_pmd(pmd);
+ if (vma_has_uffd_without_event_remap(vma))
+ pmd = clear_uffd_wp_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
if (force_flush)
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c498874a7170..eaaec19caa7c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5402,6 +5402,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
unsigned long sz)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
spinlock_t *src_ptl, *dst_ptl;
@@ -5418,7 +5419,18 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
- set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ huge_pte_clear(mm, new_addr, dst_pte, sz);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = huge_pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
+ }
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);
diff --git a/mm/mremap.c b/mm/mremap.c
index 60473413836b..cff7f552f909 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
+ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
pmd_t dummy_pmdval;
@@ -216,7 +217,18 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
force_flush = true;
pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
- set_pte_at(mm, new_addr, new_pte, pte);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ pte_clear(mm, new_addr, new_pte);
+ else {
+ if (need_clear_uffd_wp) {
+ if (pte_present(pte))
+ pte = pte_clear_uffd_wp(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ }
+ set_pte_at(mm, new_addr, new_pte, pte);
+ }
}
arch_leave_lazy_mmu_mode();
@@ -278,6 +290,15 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
+ /* If this pmd belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -333,6 +354,15 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
+ /* If this pud belongs to a uffd vma with remap events disabled, we need
+ * to ensure that the uffd-wp state is cleared from all pgtables. This
+ * means recursing into lower page tables in move_page_tables(), and we
+ * can reuse the existing code if we simply treat the entry as "not
+ * moved".
+ */
+ if (vma_has_uffd_without_event_remap(vma))
+ return false;
+
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 35ca53b7b0f0ffd16c6675fd76abac9409cf83e0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012030-alongside-scenic-3cc9@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 35ca53b7b0f0ffd16c6675fd76abac9409cf83e0 Mon Sep 17 00:00:00 2001
From: Leo Li <sunpeng.li(a)amd.com>
Date: Wed, 11 Dec 2024 12:06:24 -0500
Subject: [PATCH] drm/amd/display: Do not elevate mem_type change to full
update
[Why]
There should not be any need to revalidate bandwidth on memory placement
change, since the fb is expected to be pinned to DCN-accessable memory
before scanout. For APU it's DRAM, and DGPU, it's VRAM. However, async
flips + memory type change needs to be rejected.
[How]
Do not set lock_and_validation_needed on mem_type change. Instead,
reject an async_flip request if the crtc's buffer(s) changed mem_type.
This may fix stuttering/corruption experienced with PSR SU and PSR1
panels, if the compositor allocates fbs in both VRAM carveout and GTT
and flips between them.
Fixes: a7c0cad0dc06 ("drm/amd/display: ensure async flips are only accepted for fast updates")
Reviewed-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Leo Li <sunpeng.li(a)amd.com>
Signed-off-by: Tom Chung <chiahsuan.chung(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit 4caacd1671b7a013ad04cd8b6398f002540bdd4d)
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 56b47e02db0b..dcc5d8ded662 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -11379,6 +11379,25 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev,
return 0;
}
+static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ struct drm_crtc_state *crtc_state)
+{
+ struct drm_plane *plane;
+ struct drm_plane_state *new_plane_state, *old_plane_state;
+
+ drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) {
+ new_plane_state = drm_atomic_get_plane_state(state, plane);
+ old_plane_state = drm_atomic_get_plane_state(state, plane);
+
+ if (old_plane_state->fb && new_plane_state->fb &&
+ get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb))
+ return true;
+ }
+
+ return false;
+}
+
/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
*
@@ -11576,10 +11595,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/* Remove exiting planes if they are modified */
for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
- if (old_plane_state->fb && new_plane_state->fb &&
- get_mem_type(old_plane_state->fb) !=
- get_mem_type(new_plane_state->fb))
- lock_and_validation_needed = true;
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
@@ -11874,9 +11889,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/*
* Only allow async flips for fast updates that don't change
- * the FB pitch, the DCC state, rotation, etc.
+ * the FB pitch, the DCC state, rotation, mem_type, etc.
*/
- if (new_crtc_state->async_flip && lock_and_validation_needed) {
+ if (new_crtc_state->async_flip &&
+ (lock_and_validation_needed ||
+ amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) {
drm_dbg_atomic(crtc->dev,
"[CRTC:%d:%s] async flips are only supported for fast updates\n",
crtc->base.id, crtc->name);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 35ca53b7b0f0ffd16c6675fd76abac9409cf83e0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012030-xerox-tremor-f7d9@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 35ca53b7b0f0ffd16c6675fd76abac9409cf83e0 Mon Sep 17 00:00:00 2001
From: Leo Li <sunpeng.li(a)amd.com>
Date: Wed, 11 Dec 2024 12:06:24 -0500
Subject: [PATCH] drm/amd/display: Do not elevate mem_type change to full
update
[Why]
There should not be any need to revalidate bandwidth on memory placement
change, since the fb is expected to be pinned to DCN-accessable memory
before scanout. For APU it's DRAM, and DGPU, it's VRAM. However, async
flips + memory type change needs to be rejected.
[How]
Do not set lock_and_validation_needed on mem_type change. Instead,
reject an async_flip request if the crtc's buffer(s) changed mem_type.
This may fix stuttering/corruption experienced with PSR SU and PSR1
panels, if the compositor allocates fbs in both VRAM carveout and GTT
and flips between them.
Fixes: a7c0cad0dc06 ("drm/amd/display: ensure async flips are only accepted for fast updates")
Reviewed-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Leo Li <sunpeng.li(a)amd.com>
Signed-off-by: Tom Chung <chiahsuan.chung(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit 4caacd1671b7a013ad04cd8b6398f002540bdd4d)
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 56b47e02db0b..dcc5d8ded662 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -11379,6 +11379,25 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev,
return 0;
}
+static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ struct drm_crtc_state *crtc_state)
+{
+ struct drm_plane *plane;
+ struct drm_plane_state *new_plane_state, *old_plane_state;
+
+ drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) {
+ new_plane_state = drm_atomic_get_plane_state(state, plane);
+ old_plane_state = drm_atomic_get_plane_state(state, plane);
+
+ if (old_plane_state->fb && new_plane_state->fb &&
+ get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb))
+ return true;
+ }
+
+ return false;
+}
+
/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
*
@@ -11576,10 +11595,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/* Remove exiting planes if they are modified */
for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
- if (old_plane_state->fb && new_plane_state->fb &&
- get_mem_type(old_plane_state->fb) !=
- get_mem_type(new_plane_state->fb))
- lock_and_validation_needed = true;
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
@@ -11874,9 +11889,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/*
* Only allow async flips for fast updates that don't change
- * the FB pitch, the DCC state, rotation, etc.
+ * the FB pitch, the DCC state, rotation, mem_type, etc.
*/
- if (new_crtc_state->async_flip && lock_and_validation_needed) {
+ if (new_crtc_state->async_flip &&
+ (lock_and_validation_needed ||
+ amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) {
drm_dbg_atomic(crtc->dev,
"[CRTC:%d:%s] async flips are only supported for fast updates\n",
crtc->base.id, crtc->name);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 05c82ee363f64c64b87a0cfd744298e9333475f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012056-marina-lagging-26cc@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 05c82ee363f64c64b87a0cfd744298e9333475f5 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb(a)google.com>
Date: Thu, 26 Dec 2024 13:16:39 -0800
Subject: [PATCH] alloc_tag: skip pgalloc_tag_swap if profiling is disabled
When memory allocation profiling is disabled, there is no need to swap
allocation tags during migration. Skip it to avoid unnecessary overhead.
Once I added these checks, the overhead of the mode when memory profiling
is enabled but turned off went down by about 50%.
Link: https://lkml.kernel.org/r/20241226211639.1357704-2-surenb@google.com
Fixes: e0a955bf7f61 ("mm/codetag: add pgalloc_tag_copy()")
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: David Wang <00107082(a)163.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Zhenhua Huang <quic_zhenhuah(a)quicinc.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 7dcebf118a3e..65e706e1bc19 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -195,6 +195,9 @@ void pgalloc_tag_swap(struct folio *new, struct folio *old)
union codetag_ref ref_old, ref_new;
struct alloc_tag *tag_old, *tag_new;
+ if (!mem_alloc_profiling_enabled())
+ return;
+
tag_old = pgalloc_tag_get(&old->page);
if (!tag_old)
return;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b8ed9da102beb2d0926a1d7a7e652392190151c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012032-tidal-chatting-cba2@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ed9da102beb2d0926a1d7a7e652392190151c0 Mon Sep 17 00:00:00 2001
From: Meetakshi Setiya <msetiya(a)microsoft.com>
Date: Fri, 10 Jan 2025 07:10:27 -0500
Subject: [PATCH] cifs: support reconnect with alternate password for SMB1
SMB1 shares the mount and remount code paths with SMB2/3 and already
supports password rotation in some scenarios. This patch extends the
password rotation support to SMB1 reconnects as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Meetakshi Setiya <msetiya(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..ab0b949924d7 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -152,8 +152,17 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_unlock(&ses->ses_lock);
rc = cifs_negotiate_protocol(0, ses, server);
- if (!rc)
+ if (!rc) {
rc = cifs_setup_session(0, ses, server, ses->local_nls);
+ if ((rc == -EACCES) || (rc == -EHOSTDOWN) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect if an alternate
+ * password is available.
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+ }
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b8ed9da102beb2d0926a1d7a7e652392190151c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012032-goggles-gory-004a@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ed9da102beb2d0926a1d7a7e652392190151c0 Mon Sep 17 00:00:00 2001
From: Meetakshi Setiya <msetiya(a)microsoft.com>
Date: Fri, 10 Jan 2025 07:10:27 -0500
Subject: [PATCH] cifs: support reconnect with alternate password for SMB1
SMB1 shares the mount and remount code paths with SMB2/3 and already
supports password rotation in some scenarios. This patch extends the
password rotation support to SMB1 reconnects as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Meetakshi Setiya <msetiya(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..ab0b949924d7 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -152,8 +152,17 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_unlock(&ses->ses_lock);
rc = cifs_negotiate_protocol(0, ses, server);
- if (!rc)
+ if (!rc) {
rc = cifs_setup_session(0, ses, server, ses->local_nls);
+ if ((rc == -EACCES) || (rc == -EHOSTDOWN) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect if an alternate
+ * password is available.
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+ }
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b8ed9da102beb2d0926a1d7a7e652392190151c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012031-partly-sputter-b363@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ed9da102beb2d0926a1d7a7e652392190151c0 Mon Sep 17 00:00:00 2001
From: Meetakshi Setiya <msetiya(a)microsoft.com>
Date: Fri, 10 Jan 2025 07:10:27 -0500
Subject: [PATCH] cifs: support reconnect with alternate password for SMB1
SMB1 shares the mount and remount code paths with SMB2/3 and already
supports password rotation in some scenarios. This patch extends the
password rotation support to SMB1 reconnects as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Meetakshi Setiya <msetiya(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..ab0b949924d7 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -152,8 +152,17 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_unlock(&ses->ses_lock);
rc = cifs_negotiate_protocol(0, ses, server);
- if (!rc)
+ if (!rc) {
rc = cifs_setup_session(0, ses, server, ses->local_nls);
+ if ((rc == -EACCES) || (rc == -EHOSTDOWN) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect if an alternate
+ * password is available.
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+ }
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b8ed9da102beb2d0926a1d7a7e652392190151c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012031-dealer-graded-ea0e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ed9da102beb2d0926a1d7a7e652392190151c0 Mon Sep 17 00:00:00 2001
From: Meetakshi Setiya <msetiya(a)microsoft.com>
Date: Fri, 10 Jan 2025 07:10:27 -0500
Subject: [PATCH] cifs: support reconnect with alternate password for SMB1
SMB1 shares the mount and remount code paths with SMB2/3 and already
supports password rotation in some scenarios. This patch extends the
password rotation support to SMB1 reconnects as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Meetakshi Setiya <msetiya(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..ab0b949924d7 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -152,8 +152,17 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_unlock(&ses->ses_lock);
rc = cifs_negotiate_protocol(0, ses, server);
- if (!rc)
+ if (!rc) {
rc = cifs_setup_session(0, ses, server, ses->local_nls);
+ if ((rc == -EACCES) || (rc == -EHOSTDOWN) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect if an alternate
+ * password is available.
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+ }
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b8ed9da102beb2d0926a1d7a7e652392190151c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012030-scanning-undrilled-830d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ed9da102beb2d0926a1d7a7e652392190151c0 Mon Sep 17 00:00:00 2001
From: Meetakshi Setiya <msetiya(a)microsoft.com>
Date: Fri, 10 Jan 2025 07:10:27 -0500
Subject: [PATCH] cifs: support reconnect with alternate password for SMB1
SMB1 shares the mount and remount code paths with SMB2/3 and already
supports password rotation in some scenarios. This patch extends the
password rotation support to SMB1 reconnects as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Meetakshi Setiya <msetiya(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..ab0b949924d7 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -152,8 +152,17 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_unlock(&ses->ses_lock);
rc = cifs_negotiate_protocol(0, ses, server);
- if (!rc)
+ if (!rc) {
rc = cifs_setup_session(0, ses, server, ses->local_nls);
+ if ((rc == -EACCES) || (rc == -EHOSTDOWN) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect if an alternate
+ * password is available.
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+ }
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x b8ed9da102beb2d0926a1d7a7e652392190151c0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012030-print-avert-f15d@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ed9da102beb2d0926a1d7a7e652392190151c0 Mon Sep 17 00:00:00 2001
From: Meetakshi Setiya <msetiya(a)microsoft.com>
Date: Fri, 10 Jan 2025 07:10:27 -0500
Subject: [PATCH] cifs: support reconnect with alternate password for SMB1
SMB1 shares the mount and remount code paths with SMB2/3 and already
supports password rotation in some scenarios. This patch extends the
password rotation support to SMB1 reconnects as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Meetakshi Setiya <msetiya(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..ab0b949924d7 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -152,8 +152,17 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_unlock(&ses->ses_lock);
rc = cifs_negotiate_protocol(0, ses, server);
- if (!rc)
+ if (!rc) {
rc = cifs_setup_session(0, ses, server, ses->local_nls);
+ if ((rc == -EACCES) || (rc == -EHOSTDOWN) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect if an alternate
+ * password is available.
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+ }
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 779b9955f64327c339a16f68055af98252fd3315
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012002-spent-fervor-ada9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 779b9955f64327c339a16f68055af98252fd3315 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Mon, 13 Jan 2025 21:44:58 +0000
Subject: [PATCH] mm: zswap: move allocations during CPU init outside the lock
In zswap_cpu_comp_prepare(), allocations are made and assigned to various
members of acomp_ctx under acomp_ctx->mutex. However, allocations may
recurse into zswap through reclaim, trying to acquire the same mutex and
deadlocking.
Move the allocations before the mutex critical section. Only the
initialization of acomp_ctx needs to be done with the mutex held.
Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 30f5a27a6862..b84c20d889b1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -820,15 +820,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
+ struct crypto_acomp *acomp = NULL;
+ struct acomp_req *req = NULL;
+ u8 *buffer = NULL;
int ret;
- mutex_lock(&acomp_ctx->mutex);
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer) {
+ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer) {
ret = -ENOMEM;
- goto buffer_fail;
+ goto fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
@@ -836,21 +836,25 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
ret = PTR_ERR(acomp);
- goto acomp_fail;
+ goto fail;
}
- acomp_ctx->acomp = acomp;
- acomp_ctx->is_sleepable = acomp_is_async(acomp);
- req = acomp_request_alloc(acomp_ctx->acomp);
+ req = acomp_request_alloc(acomp);
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
ret = -ENOMEM;
- goto req_fail;
+ goto fail;
}
- acomp_ctx->req = req;
+ /*
+ * Only hold the mutex after completing allocations, otherwise we may
+ * recurse into zswap through reclaim and attempt to hold the mutex
+ * again resulting in a deadlock.
+ */
+ mutex_lock(&acomp_ctx->mutex);
crypto_init_wait(&acomp_ctx->wait);
+
/*
* if the backend of acomp is async zip, crypto_req_done() will wakeup
* crypto_wait_req(); if the backend of acomp is scomp, the callback
@@ -859,15 +863,17 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ acomp_ctx->buffer = buffer;
+ acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ acomp_ctx->req = req;
mutex_unlock(&acomp_ctx->mutex);
return 0;
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
-buffer_fail:
- mutex_unlock(&acomp_ctx->mutex);
+fail:
+ if (acomp)
+ crypto_free_acomp(acomp);
+ kfree(buffer);
return ret;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 779b9955f64327c339a16f68055af98252fd3315
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012002-overstep-numbness-cf60@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 779b9955f64327c339a16f68055af98252fd3315 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Mon, 13 Jan 2025 21:44:58 +0000
Subject: [PATCH] mm: zswap: move allocations during CPU init outside the lock
In zswap_cpu_comp_prepare(), allocations are made and assigned to various
members of acomp_ctx under acomp_ctx->mutex. However, allocations may
recurse into zswap through reclaim, trying to acquire the same mutex and
deadlocking.
Move the allocations before the mutex critical section. Only the
initialization of acomp_ctx needs to be done with the mutex held.
Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 30f5a27a6862..b84c20d889b1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -820,15 +820,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
+ struct crypto_acomp *acomp = NULL;
+ struct acomp_req *req = NULL;
+ u8 *buffer = NULL;
int ret;
- mutex_lock(&acomp_ctx->mutex);
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer) {
+ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer) {
ret = -ENOMEM;
- goto buffer_fail;
+ goto fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
@@ -836,21 +836,25 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
ret = PTR_ERR(acomp);
- goto acomp_fail;
+ goto fail;
}
- acomp_ctx->acomp = acomp;
- acomp_ctx->is_sleepable = acomp_is_async(acomp);
- req = acomp_request_alloc(acomp_ctx->acomp);
+ req = acomp_request_alloc(acomp);
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
ret = -ENOMEM;
- goto req_fail;
+ goto fail;
}
- acomp_ctx->req = req;
+ /*
+ * Only hold the mutex after completing allocations, otherwise we may
+ * recurse into zswap through reclaim and attempt to hold the mutex
+ * again resulting in a deadlock.
+ */
+ mutex_lock(&acomp_ctx->mutex);
crypto_init_wait(&acomp_ctx->wait);
+
/*
* if the backend of acomp is async zip, crypto_req_done() will wakeup
* crypto_wait_req(); if the backend of acomp is scomp, the callback
@@ -859,15 +863,17 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ acomp_ctx->buffer = buffer;
+ acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ acomp_ctx->req = req;
mutex_unlock(&acomp_ctx->mutex);
return 0;
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
-buffer_fail:
- mutex_unlock(&acomp_ctx->mutex);
+fail:
+ if (acomp)
+ crypto_free_acomp(acomp);
+ kfree(buffer);
return ret;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 779b9955f64327c339a16f68055af98252fd3315
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012001-elbow-configure-d7c7@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 779b9955f64327c339a16f68055af98252fd3315 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Mon, 13 Jan 2025 21:44:58 +0000
Subject: [PATCH] mm: zswap: move allocations during CPU init outside the lock
In zswap_cpu_comp_prepare(), allocations are made and assigned to various
members of acomp_ctx under acomp_ctx->mutex. However, allocations may
recurse into zswap through reclaim, trying to acquire the same mutex and
deadlocking.
Move the allocations before the mutex critical section. Only the
initialization of acomp_ctx needs to be done with the mutex held.
Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 30f5a27a6862..b84c20d889b1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -820,15 +820,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
+ struct crypto_acomp *acomp = NULL;
+ struct acomp_req *req = NULL;
+ u8 *buffer = NULL;
int ret;
- mutex_lock(&acomp_ctx->mutex);
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer) {
+ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer) {
ret = -ENOMEM;
- goto buffer_fail;
+ goto fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
@@ -836,21 +836,25 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
ret = PTR_ERR(acomp);
- goto acomp_fail;
+ goto fail;
}
- acomp_ctx->acomp = acomp;
- acomp_ctx->is_sleepable = acomp_is_async(acomp);
- req = acomp_request_alloc(acomp_ctx->acomp);
+ req = acomp_request_alloc(acomp);
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
ret = -ENOMEM;
- goto req_fail;
+ goto fail;
}
- acomp_ctx->req = req;
+ /*
+ * Only hold the mutex after completing allocations, otherwise we may
+ * recurse into zswap through reclaim and attempt to hold the mutex
+ * again resulting in a deadlock.
+ */
+ mutex_lock(&acomp_ctx->mutex);
crypto_init_wait(&acomp_ctx->wait);
+
/*
* if the backend of acomp is async zip, crypto_req_done() will wakeup
* crypto_wait_req(); if the backend of acomp is scomp, the callback
@@ -859,15 +863,17 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ acomp_ctx->buffer = buffer;
+ acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ acomp_ctx->req = req;
mutex_unlock(&acomp_ctx->mutex);
return 0;
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
-buffer_fail:
- mutex_unlock(&acomp_ctx->mutex);
+fail:
+ if (acomp)
+ crypto_free_acomp(acomp);
+ kfree(buffer);
return ret;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 779b9955f64327c339a16f68055af98252fd3315
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012057-carrousel-marbled-221b@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 779b9955f64327c339a16f68055af98252fd3315 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Mon, 13 Jan 2025 21:44:58 +0000
Subject: [PATCH] mm: zswap: move allocations during CPU init outside the lock
In zswap_cpu_comp_prepare(), allocations are made and assigned to various
members of acomp_ctx under acomp_ctx->mutex. However, allocations may
recurse into zswap through reclaim, trying to acquire the same mutex and
deadlocking.
Move the allocations before the mutex critical section. Only the
initialization of acomp_ctx needs to be done with the mutex held.
Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 30f5a27a6862..b84c20d889b1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -820,15 +820,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
+ struct crypto_acomp *acomp = NULL;
+ struct acomp_req *req = NULL;
+ u8 *buffer = NULL;
int ret;
- mutex_lock(&acomp_ctx->mutex);
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer) {
+ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer) {
ret = -ENOMEM;
- goto buffer_fail;
+ goto fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
@@ -836,21 +836,25 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
ret = PTR_ERR(acomp);
- goto acomp_fail;
+ goto fail;
}
- acomp_ctx->acomp = acomp;
- acomp_ctx->is_sleepable = acomp_is_async(acomp);
- req = acomp_request_alloc(acomp_ctx->acomp);
+ req = acomp_request_alloc(acomp);
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
ret = -ENOMEM;
- goto req_fail;
+ goto fail;
}
- acomp_ctx->req = req;
+ /*
+ * Only hold the mutex after completing allocations, otherwise we may
+ * recurse into zswap through reclaim and attempt to hold the mutex
+ * again resulting in a deadlock.
+ */
+ mutex_lock(&acomp_ctx->mutex);
crypto_init_wait(&acomp_ctx->wait);
+
/*
* if the backend of acomp is async zip, crypto_req_done() will wakeup
* crypto_wait_req(); if the backend of acomp is scomp, the callback
@@ -859,15 +863,17 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ acomp_ctx->buffer = buffer;
+ acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ acomp_ctx->req = req;
mutex_unlock(&acomp_ctx->mutex);
return 0;
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
-buffer_fail:
- mutex_unlock(&acomp_ctx->mutex);
+fail:
+ if (acomp)
+ crypto_free_acomp(acomp);
+ kfree(buffer);
return ret;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 12dcb0ef540629a281533f9dedc1b6b8e14cfb65
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012015-siren-backyard-e676@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 12dcb0ef540629a281533f9dedc1b6b8e14cfb65 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Wed, 8 Jan 2025 22:24:41 +0000
Subject: [PATCH] mm: zswap: properly synchronize freeing resources during CPU
hotunplug
In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
current CPU at the beginning of the operation is retrieved and used
throughout. However, since neither preemption nor migration are disabled,
it is possible that the operation continues on a different CPU.
If the original CPU is hotunplugged while the acomp_ctx is still in use,
we run into a UAF bug as some of the resources attached to the acomp_ctx
are freed during hotunplug in zswap_cpu_comp_dead() (i.e.
acomp_ctx.buffer, acomp_ctx.req, or acomp_ctx.acomp).
The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use
crypto_acomp API for hardware acceleration") when the switch to the
crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was
retrieved using get_cpu_ptr() which disables preemption and makes sure the
CPU cannot go away from under us. Preemption cannot be disabled with the
crypto_acomp API as a sleepable context is needed.
Use the acomp_ctx.mutex to synchronize CPU hotplug callbacks allocating
and freeing resources with compression/decompression paths. Make sure
that acomp_ctx.req is NULL when the resources are freed. In the
compression/decompression paths, check if acomp_ctx.req is NULL after
acquiring the mutex (meaning the CPU was offlined) and retry on the new
CPU.
The initialization of acomp_ctx.mutex is moved from the CPU hotplug
callback to the pool initialization where it belongs (where the mutex is
allocated). In addition to adding clarity, this makes sure that CPU
hotplug cannot reinitialize a mutex that is already locked by
compression/decompression.
Previously a fix was attempted by holding cpus_read_lock() [1]. This
would have caused a potential deadlock as it is possible for code already
holding the lock to fall into reclaim and enter zswap (causing a
deadlock). A fix was also attempted using SRCU for synchronization, but
Johannes pointed out that synchronize_srcu() cannot be used in CPU hotplug
notifiers [2].
Alternative fixes that were considered/attempted and could have worked:
- Refcounting the per-CPU acomp_ctx. This involves complexity in
handling the race between the refcount dropping to zero in
zswap_[de]compress() and the refcount being re-initialized when the
CPU is onlined.
- Disabling migration before getting the per-CPU acomp_ctx [3], but
that's discouraged and is a much bigger hammer than needed, and could
result in subtle performance issues.
[1]https://lkml.kernel.org/20241219212437.2714151-1-yosryahmed@google.com/
[2]https://lkml.kernel.org/20250107074724.1756696-2-yosryahmed@google.com/
[3]https://lkml.kernel.org/20250107222236.2715883-2-yosryahmed@google.com/
[yosryahmed(a)google.com: remove comment]
Link: https://lkml.kernel.org/r/CAJD7tkaxS1wjn+swugt8QCvQ-rVF5RZnjxwPGX17k8x9zSMa…
Link: https://lkml.kernel.org/r/20250108222441.3622031-1-yosryahmed@google.com
Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
Reported-by: Sam Sun <samsun1006219(a)gmail.com>
Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tP…
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Kanchana P Sridhar <kanchana.p.sridhar(a)intel.com>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Vitaly Wool <vitalywool(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb23..30f5a27a6862 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -251,7 +251,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
struct zswap_pool *pool;
char name[38]; /* 'zswap' + 32 char (max) num + \0 */
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
- int ret;
+ int ret, cpu;
if (!zswap_has_pool) {
/* if either are unset, pool initialization failed, and we
@@ -285,6 +285,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
goto error;
}
+ for_each_possible_cpu(cpu)
+ mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
+
ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
&pool->node);
if (ret)
@@ -821,11 +824,12 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
struct acomp_req *req;
int ret;
- mutex_init(&acomp_ctx->mutex);
-
+ mutex_lock(&acomp_ctx->mutex);
acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer)
- return -ENOMEM;
+ if (!acomp_ctx->buffer) {
+ ret = -ENOMEM;
+ goto buffer_fail;
+ }
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
if (IS_ERR(acomp)) {
@@ -855,12 +859,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ mutex_unlock(&acomp_ctx->mutex);
return 0;
req_fail:
crypto_free_acomp(acomp_ctx->acomp);
acomp_fail:
kfree(acomp_ctx->buffer);
+buffer_fail:
+ mutex_unlock(&acomp_ctx->mutex);
return ret;
}
@@ -869,17 +876,45 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+ mutex_lock(&acomp_ctx->mutex);
if (!IS_ERR_OR_NULL(acomp_ctx)) {
if (!IS_ERR_OR_NULL(acomp_ctx->req))
acomp_request_free(acomp_ctx->req);
+ acomp_ctx->req = NULL;
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
crypto_free_acomp(acomp_ctx->acomp);
kfree(acomp_ctx->buffer);
}
+ mutex_unlock(&acomp_ctx->mutex);
return 0;
}
+static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
+{
+ struct crypto_acomp_ctx *acomp_ctx;
+
+ for (;;) {
+ acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
+ mutex_lock(&acomp_ctx->mutex);
+ if (likely(acomp_ctx->req))
+ return acomp_ctx;
+ /*
+ * It is possible that we were migrated to a different CPU after
+ * getting the per-CPU ctx but before the mutex was acquired. If
+ * the old CPU got offlined, zswap_cpu_comp_dead() could have
+ * already freed ctx->req (among other things) and set it to
+ * NULL. Just try again on the new CPU that we ended up on.
+ */
+ mutex_unlock(&acomp_ctx->mutex);
+ }
+}
+
+static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
+{
+ mutex_unlock(&acomp_ctx->mutex);
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -893,10 +928,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
- acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-
- mutex_lock(&acomp_ctx->mutex);
-
+ acomp_ctx = acomp_ctx_get_cpu_lock(pool);
dst = acomp_ctx->buffer;
sg_init_table(&input, 1);
sg_set_page(&input, page, PAGE_SIZE, 0);
@@ -949,7 +981,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
else if (alloc_ret)
zswap_reject_alloc_fail++;
- mutex_unlock(&acomp_ctx->mutex);
+ acomp_ctx_put_unlock(acomp_ctx);
return comp_ret == 0 && alloc_ret == 0;
}
@@ -960,9 +992,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
struct crypto_acomp_ctx *acomp_ctx;
u8 *src;
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(&acomp_ctx->mutex);
-
+ acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
/*
* If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
@@ -986,10 +1016,10 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
- mutex_unlock(&acomp_ctx->mutex);
if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
+ acomp_ctx_put_unlock(acomp_ctx);
}
/*********************************
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 12dcb0ef540629a281533f9dedc1b6b8e14cfb65
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025012014-sardine-hardwood-6828@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 12dcb0ef540629a281533f9dedc1b6b8e14cfb65 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Wed, 8 Jan 2025 22:24:41 +0000
Subject: [PATCH] mm: zswap: properly synchronize freeing resources during CPU
hotunplug
In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
current CPU at the beginning of the operation is retrieved and used
throughout. However, since neither preemption nor migration are disabled,
it is possible that the operation continues on a different CPU.
If the original CPU is hotunplugged while the acomp_ctx is still in use,
we run into a UAF bug as some of the resources attached to the acomp_ctx
are freed during hotunplug in zswap_cpu_comp_dead() (i.e.
acomp_ctx.buffer, acomp_ctx.req, or acomp_ctx.acomp).
The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use
crypto_acomp API for hardware acceleration") when the switch to the
crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was
retrieved using get_cpu_ptr() which disables preemption and makes sure the
CPU cannot go away from under us. Preemption cannot be disabled with the
crypto_acomp API as a sleepable context is needed.
Use the acomp_ctx.mutex to synchronize CPU hotplug callbacks allocating
and freeing resources with compression/decompression paths. Make sure
that acomp_ctx.req is NULL when the resources are freed. In the
compression/decompression paths, check if acomp_ctx.req is NULL after
acquiring the mutex (meaning the CPU was offlined) and retry on the new
CPU.
The initialization of acomp_ctx.mutex is moved from the CPU hotplug
callback to the pool initialization where it belongs (where the mutex is
allocated). In addition to adding clarity, this makes sure that CPU
hotplug cannot reinitialize a mutex that is already locked by
compression/decompression.
Previously a fix was attempted by holding cpus_read_lock() [1]. This
would have caused a potential deadlock as it is possible for code already
holding the lock to fall into reclaim and enter zswap (causing a
deadlock). A fix was also attempted using SRCU for synchronization, but
Johannes pointed out that synchronize_srcu() cannot be used in CPU hotplug
notifiers [2].
Alternative fixes that were considered/attempted and could have worked:
- Refcounting the per-CPU acomp_ctx. This involves complexity in
handling the race between the refcount dropping to zero in
zswap_[de]compress() and the refcount being re-initialized when the
CPU is onlined.
- Disabling migration before getting the per-CPU acomp_ctx [3], but
that's discouraged and is a much bigger hammer than needed, and could
result in subtle performance issues.
[1]https://lkml.kernel.org/20241219212437.2714151-1-yosryahmed@google.com/
[2]https://lkml.kernel.org/20250107074724.1756696-2-yosryahmed@google.com/
[3]https://lkml.kernel.org/20250107222236.2715883-2-yosryahmed@google.com/
[yosryahmed(a)google.com: remove comment]
Link: https://lkml.kernel.org/r/CAJD7tkaxS1wjn+swugt8QCvQ-rVF5RZnjxwPGX17k8x9zSMa…
Link: https://lkml.kernel.org/r/20250108222441.3622031-1-yosryahmed@google.com
Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
Reported-by: Sam Sun <samsun1006219(a)gmail.com>
Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tP…
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Kanchana P Sridhar <kanchana.p.sridhar(a)intel.com>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Vitaly Wool <vitalywool(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb23..30f5a27a6862 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -251,7 +251,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
struct zswap_pool *pool;
char name[38]; /* 'zswap' + 32 char (max) num + \0 */
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
- int ret;
+ int ret, cpu;
if (!zswap_has_pool) {
/* if either are unset, pool initialization failed, and we
@@ -285,6 +285,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
goto error;
}
+ for_each_possible_cpu(cpu)
+ mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
+
ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
&pool->node);
if (ret)
@@ -821,11 +824,12 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
struct acomp_req *req;
int ret;
- mutex_init(&acomp_ctx->mutex);
-
+ mutex_lock(&acomp_ctx->mutex);
acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer)
- return -ENOMEM;
+ if (!acomp_ctx->buffer) {
+ ret = -ENOMEM;
+ goto buffer_fail;
+ }
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
if (IS_ERR(acomp)) {
@@ -855,12 +859,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ mutex_unlock(&acomp_ctx->mutex);
return 0;
req_fail:
crypto_free_acomp(acomp_ctx->acomp);
acomp_fail:
kfree(acomp_ctx->buffer);
+buffer_fail:
+ mutex_unlock(&acomp_ctx->mutex);
return ret;
}
@@ -869,17 +876,45 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+ mutex_lock(&acomp_ctx->mutex);
if (!IS_ERR_OR_NULL(acomp_ctx)) {
if (!IS_ERR_OR_NULL(acomp_ctx->req))
acomp_request_free(acomp_ctx->req);
+ acomp_ctx->req = NULL;
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
crypto_free_acomp(acomp_ctx->acomp);
kfree(acomp_ctx->buffer);
}
+ mutex_unlock(&acomp_ctx->mutex);
return 0;
}
+static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
+{
+ struct crypto_acomp_ctx *acomp_ctx;
+
+ for (;;) {
+ acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
+ mutex_lock(&acomp_ctx->mutex);
+ if (likely(acomp_ctx->req))
+ return acomp_ctx;
+ /*
+ * It is possible that we were migrated to a different CPU after
+ * getting the per-CPU ctx but before the mutex was acquired. If
+ * the old CPU got offlined, zswap_cpu_comp_dead() could have
+ * already freed ctx->req (among other things) and set it to
+ * NULL. Just try again on the new CPU that we ended up on.
+ */
+ mutex_unlock(&acomp_ctx->mutex);
+ }
+}
+
+static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
+{
+ mutex_unlock(&acomp_ctx->mutex);
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -893,10 +928,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
- acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-
- mutex_lock(&acomp_ctx->mutex);
-
+ acomp_ctx = acomp_ctx_get_cpu_lock(pool);
dst = acomp_ctx->buffer;
sg_init_table(&input, 1);
sg_set_page(&input, page, PAGE_SIZE, 0);
@@ -949,7 +981,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
else if (alloc_ret)
zswap_reject_alloc_fail++;
- mutex_unlock(&acomp_ctx->mutex);
+ acomp_ctx_put_unlock(acomp_ctx);
return comp_ret == 0 && alloc_ret == 0;
}
@@ -960,9 +992,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
struct crypto_acomp_ctx *acomp_ctx;
u8 *src;
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(&acomp_ctx->mutex);
-
+ acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
/*
* If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
@@ -986,10 +1016,10 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
- mutex_unlock(&acomp_ctx->mutex);
if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
+ acomp_ctx_put_unlock(acomp_ctx);
}
/*********************************
From: lei lu <llfamsec(a)gmail.com>
commit fb63435b7c7dc112b1ae1baea5486e0a6e27b196 upstream.
There is a lack of verification of the space occupied by fixed members
of xlog_op_header in the xlog_recover_process_data.
We can create a crafted image to trigger an out of bounds read by
following these steps:
1) Mount an image of xfs, and do some file operations to leave records
2) Before umounting, copy the image for subsequent steps to simulate
abnormal exit. Because umount will ensure that tail_blk and
head_blk are the same, which will result in the inability to enter
xlog_recover_process_data
3) Write a tool to parse and modify the copied image in step 2
4) Make the end of the xlog_op_header entries only 1 byte away from
xlog_rec_header->h_size
5) xlog_rec_header->h_num_logops++
6) Modify xlog_rec_header->h_crc
Fix:
Add a check to make sure there is sufficient space to access fixed members
of xlog_op_header.
Signed-off-by: lei lu <llfamsec(a)gmail.com>
Reviewed-by: Dave Chinner <dchinner(a)redhat.com>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu(a)kernel.org>
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
---
Backport fix for CVE-2024-41014
Link: https://nvd.nist.gov/vuln/detail/cve-2024-41014
---
fs/xfs/xfs_log_recover.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e61f28ce3e44..eafe76f304ef 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2419,7 +2419,10 @@ xlog_recover_process_data(
ohead = (struct xlog_op_header *)dp;
dp += sizeof(*ohead);
- ASSERT(dp <= end);
+ if (dp > end) {
+ xfs_warn(log->l_mp, "%s: op header overrun", __func__);
+ return -EFSCORRUPTED;
+ }
/* errors will abort recovery */
error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
--
2.43.0
From: Nick Child <nnac123(a)linux.ibm.com>
From: Nick Child <nnac123(a)linux.ibm.com>
commit 0983d288caf984de0202c66641577b739caad561 upstream.
Below is a summary of how the driver stores a reference to an skb during
transmit:
tx_buff[free_map[consumer_index]]->skb = new_skb;
free_map[consumer_index] = IBMVNIC_INVALID_MAP;
consumer_index ++;
Where variable data looks like this:
free_map == [4, IBMVNIC_INVALID_MAP, IBMVNIC_INVALID_MAP, 0, 3]
consumer_index^
tx_buff == [skb=null, skb=<ptr>, skb=<ptr>, skb=null, skb=null]
The driver has checks to ensure that free_map[consumer_index] pointed to
a valid index but there was no check to ensure that this index pointed
to an unused/null skb address. So, if, by some chance, our free_map and
tx_buff lists become out of sync then we were previously risking an
skb memory leak. This could then cause tcp congestion control to stop
sending packets, eventually leading to ETIMEDOUT.
Therefore, add a conditional to ensure that the skb address is null. If
not then warn the user (because this is still a bug that should be
patched) and free the old pointer to prevent memleak/tcp problems.
Signed-off-by: Nick Child <nnac123(a)linux.ibm.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
[Denis: minor fix to resolve merge conflict.]
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
---
Backport fix for CVE-2024-41066
Link: https://nvd.nist.gov/vuln/detail/CVE-2024-41066
---
drivers/net/ethernet/ibm/ibmvnic.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 84da6ccaf339..439796975cbf 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1625,6 +1625,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
(tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[index];
+
+ /* Sanity checks on our free map to make sure it points to an index
+ * that is not being occupied by another skb. If skb memory is
+ * not freed then we see congestion control kick in and halt tx.
+ */
+ if (unlikely(tx_buff->skb)) {
+ dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
+ skb_is_gso(skb) ? "tso_pool" : "tx_pool",
+ queue_num, bufidx);
+ dev_kfree_skb_any(tx_buff->skb);
+ }
+
tx_buff->skb = skb;
tx_buff->data_dma[0] = data_dma_addr;
tx_buff->data_len[0] = skb->len;
--
2.43.0
From: Willem de Bruijn <willemb(a)google.com>
commit dd89a81d850fa9a65f67b4527c0e420d15bf836c upstream.
Drop the WARN_ON_ONCE inn gue_gro_receive if the encapsulated type is
not known or does not have a GRO handler.
Such a packet is easily constructed. Syzbot generates them and sets
off this warning.
Remove the warning as it is expected and not actionable.
The warning was previously reduced from WARN_ON to WARN_ON_ONCE in
commit 270136613bf7 ("fou: Do WARN_ON_ONCE in gue_gro_receive for bad
proto callbacks").
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Link: https://lore.kernel.org/r/20240614122552.1649044-1-willemdebruijn.kernel@gm…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
---
Backport fix for CVE-2024-44940
Link: https://www.cve.org/CVERecord/?id=CVE-2024-44940
---
net/ipv4/fou.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1d67df4d8ed6..b1a8e4eec3f6 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -453,7 +453,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
- if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
+ if (!ops || !ops->callbacks.gro_receive)
goto out;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
--
2.43.0
This driver supports page faults on PCI RID since commit <9f831c16c69e>
("iommu/vt-d: Remove the pasid present check in prq_event_thread") by
allowing the reporting of page faults with the pasid_present field cleared
to the upper layer for further handling. The fundamental assumption here
is that the detach or replace operations act as a fence for page faults.
This implies that all pending page faults associated with a specific RID
or PASID are flushed when a domain is detached or replaced from a device
RID or PASID.
However, the intel_iommu_drain_pasid_prq() helper does not correctly
handle faults for RID. This leads to faults potentially remaining pending
in the iommu hardware queue even after the domain is detached, thereby
violating the aforementioned assumption.
Fix this issue by extending intel_iommu_drain_pasid_prq() to cover faults
for RID.
Fixes: 9f831c16c69e ("iommu/vt-d: Remove the pasid present check in prq_event_thread")
Cc: stable(a)vger.kernel.org
Suggested-by: Kevin Tian <kevin.tian(a)intel.com>
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
---
drivers/iommu/intel/prq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
index c2d792db52c3..043f02d7b460 100644
--- a/drivers/iommu/intel/prq.c
+++ b/drivers/iommu/intel/prq.c
@@ -87,7 +87,8 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
struct page_req_dsc *req;
req = &iommu->prq[head / sizeof(*req)];
- if (!req->pasid_present || req->pasid != pasid) {
+ if (req->rid != sid ||
+ (req->pasid_present && req->pasid != pasid)) {
head = (head + sizeof(*req)) & PRQ_RING_MASK;
continue;
}
--
2.43.0
Hi,
Are you interested in the latest attendee and exhibitor lists for Integrated Systems Europe 2025? This comprehensive database now includes last-minute registrants, providing you with up-to-date and actionable insights.
Event Details:
Dates: 04 - 07 Feb 2025
Location: Fira Barcelona Gran Via, Barcelona, Spain
Exhibitors: 1,460
Attendees: 73,891
Data fields: Induvial Email address, Cell Phone Number, Contact Name, Job Title, Company Name, Company website, Physical Address and more),
Would you like details on the attendees list, exhibitors list, or both? Let me know, and I’ll be happy to share pricing details and answer any questions.
Looking forward to hearing from you!
Best regards,
Grace Green
Sr. Demand Generation
P.S. If you’d prefer not to receive updates, simply reply with “NO.”
I'm announcing the release of the 6.1.126 kernel.
Only upgrade if 6.1.125 did not build properly for you. If it did build
properly, no need to upgrade. Thanks to Ron Economos for the fix for
this issue.
The updated 6.1.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.1.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
drivers/usb/host/xhci-pci.c | 4 ++++
2 files changed, 5 insertions(+), 1 deletion(-)
Greg Kroah-Hartman (1):
Linux 6.1.126
Ron Economos (1):
Partial revert of xhci: use pm_ptr() instead #ifdef for CONFIG_PM conditionals
If device_add() fails, do not use device_unregister() for error
handling. device_unregister() consists two functions: device_del() and
put_device(). device_unregister() should only be called after
device_add() succeeded because device_del() undoes what device_add()
does if successful. Change device_unregister() to put_device() call
before returning from the function.
As comment of device_add() says, 'if device_add() succeeds, you should
call device_del() when you want to get rid of it. If device_add() has
not succeeded, use only put_device() to drop the reference count'.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 53d2a715c240 ("phy: Add Tegra XUSB pad controller support")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- modified the bug description as suggestions.
---
drivers/phy/tegra/xusb.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 79d4814d758d..c89df95aa6ca 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port,
err = dev_set_name(&port->dev, "%s-%u", name, index);
if (err < 0)
- goto unregister;
+ goto put_device;
err = device_add(&port->dev);
if (err < 0)
- goto unregister;
+ goto put_device;
return 0;
-unregister:
- device_unregister(&port->dev);
+put_device:
+ put_device(&port->dev);
return err;
}
--
2.25.1
This reverts commit 3e221877dd92dfeccc840700868e7fef2675181b which is
commit 7246a4520b4bf1494d7d030166a11b5226f6d508 upstream.
This patch causes a regression in cuttlefish/crossvm boot on arm64.
The patch was part of a series that when applied will not cause a regression
but this patch was backported to the 6.6 branch by itself.
The other patches do not apply cleanly to the 6.6 branch.
Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org>
---
drivers/pci/controller/pci-host-common.c | 4 ++++
drivers/pci/probe.c | 20 +++++++++++---------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index e2602e38ae45..6be3266cd7b5 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -73,6 +73,10 @@ int pci_host_common_probe(struct platform_device *pdev)
if (IS_ERR(cfg))
return PTR_ERR(cfg);
+ /* Do not reassign resources if probe only */
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->msi_domain = true;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7e84e472b338..03b519a22840 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -3096,18 +3096,20 @@ int pci_host_probe(struct pci_host_bridge *bridge)
bus = bridge->bus;
- /* If we must preserve the resource configuration, claim now */
- if (bridge->preserve_config)
- pci_bus_claim_resources(bus);
-
/*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
+ * We insert PCI resources into the iomem_resource and
+ * ioport_resource trees in either pci_bus_claim_resources()
+ * or pci_bus_assign_resources().
*/
- pci_assign_unassigned_root_bus_resources(bus);
+ if (pci_has_flag(PCI_PROBE_ONLY)) {
+ pci_bus_claim_resources(bus);
+ } else {
+ pci_bus_size_bridges(bus);
+ pci_bus_assign_resources(bus);
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
+ }
pci_bus_add_devices(bus);
return 0;
--
2.39.5
This reverts commit f858b0fab28d8bc2d0f0e8cd4afc3216f347cfcc which is
commit 7246a4520b4bf1494d7d030166a11b5226f6d508 upstream.
This patch causes a regression in cuttlefish/crossvm boot on arm64.
The patch was part of a series that when applied will not cause a regression
but this patch was backported to the 6.1 branch by itself.
The other patches do not apply cleanly to the 6.1 branch.
Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org>
---
drivers/pci/controller/pci-host-common.c | 4 ++++
drivers/pci/probe.c | 20 +++++++++++---------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index fd3020a399cf..d3924a44db02 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -73,6 +73,10 @@ int pci_host_common_probe(struct platform_device *pdev)
if (IS_ERR(cfg))
return PTR_ERR(cfg);
+ /* Do not reassign resources if probe only */
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->msi_domain = true;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index fbb47954a96c..5c1ab9ee65eb 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -3082,18 +3082,20 @@ int pci_host_probe(struct pci_host_bridge *bridge)
bus = bridge->bus;
- /* If we must preserve the resource configuration, claim now */
- if (bridge->preserve_config)
- pci_bus_claim_resources(bus);
-
/*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
+ * We insert PCI resources into the iomem_resource and
+ * ioport_resource trees in either pci_bus_claim_resources()
+ * or pci_bus_assign_resources().
*/
- pci_assign_unassigned_root_bus_resources(bus);
+ if (pci_has_flag(PCI_PROBE_ONLY)) {
+ pci_bus_claim_resources(bus);
+ } else {
+ pci_bus_size_bridges(bus);
+ pci_bus_assign_resources(bus);
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
+ }
pci_bus_add_devices(bus);
return 0;
--
2.39.5
From: Baokun Li <libaokun1(a)huawei.com>
commit 72a6e22c604c95ddb3b10b5d3bb85b6ff4dbc34f upstream.
The fscache_cookie_lru_timer is initialized when the fscache module
is inserted, but is not deleted when the fscache module is removed.
If timer_reduce() is called before removing the fscache module,
the fscache_cookie_lru_timer will be added to the timer list of
the current cpu. Afterwards, a use-after-free will be triggered
in the softIRQ after removing the fscache module, as follows:
==================================================================
BUG: unable to handle page fault for address: fffffbfff803c9e9
PF: supervisor read access in kernel mode
PF: error_code(0x0000) - not-present page
PGD 21ffea067 P4D 21ffea067 PUD 21ffe6067 PMD 110a7c067 PTE 0
Oops: Oops: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G W 6.11.0-rc3 #855
Tainted: [W]=WARN
RIP: 0010:__run_timer_base.part.0+0x254/0x8a0
Call Trace:
<IRQ>
tmigr_handle_remote_up+0x627/0x810
__walk_groups.isra.0+0x47/0x140
tmigr_handle_remote+0x1fa/0x2f0
handle_softirqs+0x180/0x590
irq_exit_rcu+0x84/0xb0
sysvec_apic_timer_interrupt+0x6e/0x90
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x1a/0x20
RIP: 0010:default_idle+0xf/0x20
default_idle_call+0x38/0x60
do_idle+0x2b5/0x300
cpu_startup_entry+0x54/0x60
start_secondary+0x20d/0x280
common_startup_64+0x13e/0x148
</TASK>
Modules linked in: [last unloaded: netfs]
==================================================================
Therefore delete fscache_cookie_lru_timer when removing the fscahe module.
Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning")
Cc: stable(a)kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Link: https://lore.kernel.org/r/20240826112056.2458299-1-libaokun@huaweicloud.com
Acked-by: David Howells <dhowells(a)redhat.com>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
[kovalev: use the `del_timer_sync()` call instead of `timer_shutdown_sync()`]
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
Backport to fix CVE-2024-46786
Link: https://www.cve.org/CVERecord/?id=CVE-2024-46786
---
fs/fscache/main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index dad85fd84f6f9f..dc37c9df3bef7d 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -114,6 +114,7 @@ static void __exit fscache_exit(void)
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
+ del_timer_sync(&fscache_cookie_lru_timer);
destroy_workqueue(fscache_wq);
pr_notice("Unloaded\n");
}
--
2.33.8
This reverts commit 0dde3ae52a0dcc5cdfe2185ec58ec52b43fda22e which is
commit 7246a4520b4bf1494d7d030166a11b5226f6d508 upstream.
This patch causes a regression in cuttlefish/crossvm boot on arm64.
The patch was part of a series that when applied will not cause a regression
but this patch was backported to the 5.10 branch by itself.
The other patches do not apply cleanly to the 5.10 branch.
Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org>
---
drivers/pci/controller/pci-host-common.c | 4 ++++
drivers/pci/probe.c | 20 +++++++++++---------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index 2525bd043261..6ce34a1deecb 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -71,6 +71,10 @@ int pci_host_common_probe(struct platform_device *pdev)
if (IS_ERR(cfg))
return PTR_ERR(cfg);
+ /* Do not reassign resources if probe only */
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b0ac721e047d..02a75f3b5920 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -3018,18 +3018,20 @@ int pci_host_probe(struct pci_host_bridge *bridge)
bus = bridge->bus;
- /* If we must preserve the resource configuration, claim now */
- if (bridge->preserve_config)
- pci_bus_claim_resources(bus);
-
/*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
+ * We insert PCI resources into the iomem_resource and
+ * ioport_resource trees in either pci_bus_claim_resources()
+ * or pci_bus_assign_resources().
*/
- pci_assign_unassigned_root_bus_resources(bus);
+ if (pci_has_flag(PCI_PROBE_ONLY)) {
+ pci_bus_claim_resources(bus);
+ } else {
+ pci_bus_size_bridges(bus);
+ pci_bus_assign_resources(bus);
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
+ }
pci_bus_add_devices(bus);
return 0;
--
2.39.5
This reverts commit c1a1393f7844c645389e5f1a3f1f0350e0fb9316 which is
commit 7246a4520b4bf1494d7d030166a11b5226f6d508 upstream.
This patch causes a regression in cuttlefish/crossvm boot on arm64.
The patch was part of a series that when applied will not cause a regression
but this patch was backported to the 5.15 branch by itself.
The other patches do not apply cleanly to the 5.15 branch.
Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org>
---
drivers/pci/controller/pci-host-common.c | 4 ++++
drivers/pci/probe.c | 20 +++++++++++---------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index fd3020a399cf..d3924a44db02 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -73,6 +73,10 @@ int pci_host_common_probe(struct platform_device *pdev)
if (IS_ERR(cfg))
return PTR_ERR(cfg);
+ /* Do not reassign resources if probe only */
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->msi_domain = true;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cda6650aa3b1..dd2134c7c419 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -3048,18 +3048,20 @@ int pci_host_probe(struct pci_host_bridge *bridge)
bus = bridge->bus;
- /* If we must preserve the resource configuration, claim now */
- if (bridge->preserve_config)
- pci_bus_claim_resources(bus);
-
/*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
+ * We insert PCI resources into the iomem_resource and
+ * ioport_resource trees in either pci_bus_claim_resources()
+ * or pci_bus_assign_resources().
*/
- pci_assign_unassigned_root_bus_resources(bus);
+ if (pci_has_flag(PCI_PROBE_ONLY)) {
+ pci_bus_claim_resources(bus);
+ } else {
+ pci_bus_size_bridges(bus);
+ pci_bus_assign_resources(bus);
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
+ }
pci_bus_add_devices(bus);
return 0;
--
2.39.5
From: Zhongqiu Han <quic_zhonhan(a)quicinc.com>
[ Upstream commit 02f6b0e1ec7e0e7d059dddc893645816552039da ]
The use-after-free issue occurs as follows: when the GPIO chip device file
is being closed by invoking gpio_chrdev_release(), watched_lines is freed
by bitmap_free(), but the unregistration of lineinfo_changed_nb notifier
chain failed due to waiting write rwsem. Additionally, one of the GPIO
chip's lines is also in the release process and holds the notifier chain's
read rwsem. Consequently, a race condition leads to the use-after-free of
watched_lines.
Here is the typical stack when issue happened:
[free]
gpio_chrdev_release()
--> bitmap_free(cdev->watched_lines) <-- freed
--> blocking_notifier_chain_unregister()
--> down_write(&nh->rwsem) <-- waiting rwsem
--> __down_write_common()
--> rwsem_down_write_slowpath()
--> schedule_preempt_disabled()
--> schedule()
[use]
st54spi_gpio_dev_release()
--> gpio_free()
--> gpiod_free()
--> gpiod_free_commit()
--> gpiod_line_state_notify()
--> blocking_notifier_call_chain()
--> down_read(&nh->rwsem); <-- held rwsem
--> notifier_call_chain()
--> lineinfo_changed_notify()
--> test_bit(xxxx, cdev->watched_lines) <-- use after free
The side effect of the use-after-free issue is that a GPIO line event is
being generated for userspace where it shouldn't. However, since the chrdev
is being closed, userspace won't have the chance to read that event anyway.
To fix the issue, call the bitmap_free() function after the unregistration
of lineinfo_changed_nb notifier chain.
Fixes: 51c1064e82e7 ("gpiolib: add new ioctl() for monitoring changes in line info")
Signed-off-by: Zhongqiu Han <quic_zhonhan(a)quicinc.com>
Link: https://lore.kernel.org/r/20240505141156.2944912-1-quic_zhonhan@quicinc.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Signed-off-by: Bruno VERNAY <bruno.vernay(a)se.com>
Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource(a)witekio.com>
---
drivers/gpio/gpiolib-cdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 55f640ef3fee..897d20996a8c 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2860,9 +2860,9 @@ static int gpio_chrdev_release(struct inode *inode, struct file *file)
struct gpio_chardev_data *cdev = file->private_data;
struct gpio_device *gdev = cdev->gdev;
- bitmap_free(cdev->watched_lines);
blocking_notifier_chain_unregister(&gdev->notifier,
&cdev->lineinfo_changed_nb);
+ bitmap_free(cdev->watched_lines);
put_device(&gdev->dev);
kfree(cdev);
--
2.43.0
This reverts commit d02f02c28f5561cf5b2345f8b4c910bd98d18553.
I tried to upgrade a RasPi 3B+ with Waveshare 7inch HDMI LCD
from 6.1.y to 6.6.y but found that the display is broken with
this log message:
[ 17.776315] vc4-drm soc:gpu: bound 3f400000.hvs (ops vc4_drm_unregister [vc4])
[ 17.784034] platform 3f806000.vec: deferred probe pending
Some tests revealed that while 6.1.y works, 6.2-rc1 is already broken but all
newer kernels as well. And a bisect did lead me to this patch.
I could repair several versions up to 6.13-rc7 by doing
this revert. Newer kernels have just to take care of
commit f702475b839c ("ARM: dts: bcm2835-rpi: Move duplicate firmware-clocks to bcm2835-rpi.dtsi")
but that is straightforward.
Fixes: d02f02c28f55 ("ARM: dts: bcm2835-rpi: Use firmware clocks for display")
Signed-off-by: H. Nikolaus Schaller <hns(a)goldelico.com>
---
arch/arm/boot/dts/bcm2835-rpi-common.dtsi | 17 -----------------
1 file changed, 17 deletions(-)
diff --git a/arch/arm/boot/dts/bcm2835-rpi-common.dtsi b/arch/arm/boot/dts/bcm2835-rpi-common.dtsi
index 4e7b4a592da7c..8a55b6cded592 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-common.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi-common.dtsi
@@ -7,23 +7,6 @@
#include <dt-bindings/power/raspberrypi-power.h>
-&firmware {
- firmware_clocks: clocks {
- compatible = "raspberrypi,firmware-clocks";
- #clock-cells = <1>;
- };
-};
-
-&hdmi {
- clocks = <&firmware_clocks 9>,
- <&firmware_clocks 13>;
- clock-names = "pixel", "hdmi";
-};
-
&v3d {
power-domains = <&power RPI_POWER_DOMAIN_V3D>;
};
-
-&vec {
- clocks = <&firmware_clocks 15>;
-};
--
2.47.0
This is the start of the stable review cycle for the 6.1.125 release.
There are 92 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 17 Jan 2025 10:34:58 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.1.125-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.1.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.1.125-rc1
Biju Das <biju.das.jz(a)bp.renesas.com>
drm: adv7511: Fix use-after-free in adv7533_attach_dsi()
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
drm: bridge: adv7511: use dev_err_probe in probe function
Dennis Lam <dennis.lamerice(a)gmail.com>
ocfs2: fix slab-use-after-free due to dangling pointer dqi_priv
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: correct return value of ocfs2_local_free_info()
Andrea della Porta <andrea.porta(a)suse.com>
of: address: Preserve the flags portion on 1:1 dma-ranges mapping
Rob Herring <robh(a)kernel.org>
of: address: Store number of bus flag cells rather than bool
Herve Codina <herve.codina(a)bootlin.com>
of: address: Remove duplicated functions
Herve Codina <herve.codina(a)bootlin.com>
of: address: Fix address translation when address-size is greater than 2
Rob Herring <robh(a)kernel.org>
of/address: Add support for 3 address cell bus
Rob Herring <robh(a)kernel.org>
of: unittest: Add bus address range parsing tests
Peter Geis <pgwipeout(a)gmail.com>
arm64: dts: rockchip: add hevc power domain clock to rk3328
Yu Kuai <yukuai3(a)huawei.com>
block, bfq: fix waker_bfqq UAF after bfq_split_bfqq()
Jesse Taube <Mr.Bossman075(a)gmail.com>
ARM: dts: imxrt1050: Fix clocks for mmc
Jens Axboe <axboe(a)kernel.dk>
io_uring/eventfd: ensure io_eventfd_signal() defers another RCU period
Uwe Kleine-König <u.kleine-koenig(a)baylibre.com>
iio: adc: ad7124: Disable all channels at probe time
Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
iio: inkern: call iio_device_put() only on mapped devices
Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
iio: adc: at91: call input_free_device() on allocated iio_dev
Fabio Estevam <festevam(a)gmail.com>
iio: adc: ti-ads124s08: Use gpiod_set_value_cansleep()
Carlos Song <carlos.song(a)nxp.com>
iio: gyro: fxas21002c: Fix missing data update in trigger handler
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
iio: adc: ti-ads8688: fix information leak in triggered buffer
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
iio: imu: kmx61: fix information leak in triggered buffer
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
iio: light: vcnl4035: fix information leak in triggered buffer
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
iio: dummy: iio_simply_dummy_buffer: fix information leak in triggered buffer
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
iio: pressure: zpa2326: fix information leak in triggered buffer
Akash M <akash.m5(a)samsung.com>
usb: gadget: f_fs: Remove WARN_ON in functionfs_bind
Prashanth K <quic_prashk(a)quicinc.com>
usb: gadget: f_uac2: Fix incorrect setting of bNumEndpoints
Ma Ke <make_ruc2021(a)163.com>
usb: fix reference leak in usb_new_device()
Kai-Heng Feng <kaihengf(a)nvidia.com>
USB: core: Disable LPM only for non-suspended ports
Jun Yan <jerrysteve1101(a)gmail.com>
USB: usblp: return error when setting unsupported protocol
Prashanth K <quic_prashk(a)quicinc.com>
usb: dwc3-am62: Disable autosuspend during remove
Lianqin Hu <hulianqin(a)vivo.com>
usb: gadget: u_serial: Disable ep before setting port to null to fix the crash caused by port being null
Rengarajan S <rengarajan.s(a)microchip.com>
misc: microchip: pci1xxxx: Resolve return code mismatch during GPIO set config
Rengarajan S <rengarajan.s(a)microchip.com>
misc: microchip: pci1xxxx: Resolve kernel panic during GPIO IRQ handling
Li Huafei <lihuafei1(a)huawei.com>
topology: Keep the cpumask unchanged when printing cpumap
André Draszik <andre.draszik(a)linaro.org>
usb: dwc3: gadget: fix writing NYET threshold
Johan Hovold <johan(a)kernel.org>
USB: serial: cp210x: add Phoenix Contact UPS Device
Lubomir Rintel <lrintel(a)redhat.com>
usb-storage: Add max sectors quirk for Nokia 208
Zicheng Qu <quzicheng(a)huawei.com>
staging: iio: ad9832: Correct phase range check
Zicheng Qu <quzicheng(a)huawei.com>
staging: iio: ad9834: Correct phase range check
Michal Hrusecky <michal.hrusecky(a)turris.com>
USB: serial: option: add Neoway N723-EA support
Chukun Pan <amadeus(a)jmu.edu.cn>
USB: serial: option: add MeiG Smart SRM815
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix overloading of MEM_UNINIT's meaning
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Add MEM_WRITE attribute
Milan Broz <gmazyland(a)gmail.com>
dm-verity FEC: Fix RS FEC repair for roots unaligned to block size (take 2)
Melissa Wen <mwen(a)igalia.com>
drm/amd/display: increase MAX_SURFACES to the value supported by hw
Hans de Goede <hdegoede(a)redhat.com>
ACPI: resource: Add Asus Vivobook X1504VAP to irq1_level_low_skip_override[]
Hans de Goede <hdegoede(a)redhat.com>
ACPI: resource: Add TongFang GM5HG0A to irq1_edge_low_force_override[]
Nam Cao <namcao(a)linutronix.de>
riscv: Fix sleeping in invalid context in die()
Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
thermal: of: fix OF node leak in of_thermal_zone_find()
Roman Li <Roman.Li(a)amd.com>
drm/amd/display: Add check for granularity in dml ceil/floor helpers
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
sctp: sysctl: plpmtud_probe_interval: avoid using current->nsproxy
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
sctp: sysctl: udp_port: avoid using current->nsproxy
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
sctp: sysctl: auth_enable: avoid using current->nsproxy
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
sctp: sysctl: rto_min/max: avoid using current->nsproxy
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
sctp: sysctl: cookie_hmac_alg: avoid using current->nsproxy
Mikulas Patocka <mpatocka(a)redhat.com>
dm-ebs: don't set the flag DM_TARGET_PASSES_INTEGRITY
Krister Johansen <kjlx(a)templeofstupid.com>
dm thin: make get_first_thin use rcu-safe list first function
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
cpuidle: riscv-sbi: fix device node release in early exit of for_each_possible_cpu
He Wang <xw897002528(a)gmail.com>
ksmbd: fix unexpectedly changed path in ksmbd_vfs_kern_path_locked
David Howells <dhowells(a)redhat.com>
afs: Fix the maximum cell name length
Wentao Liang <liangwentao(a)iscas.ac.cn>
ksmbd: fix a missing return value check bug
Liankun Yang <liankun.yang(a)mediatek.com>
drm/mediatek: Add return value check when reading DPCD
Liankun Yang <liankun.yang(a)mediatek.com>
drm/mediatek: Fix mode valid issue for dp
Liankun Yang <liankun.yang(a)mediatek.com>
drm/mediatek: Fix YCbCr422 color format issue for DP
Arnd Bergmann <arnd(a)arndb.de>
drm/mediatek: stop selecting foreign drivers
Chenguang Zhao <zhaochenguang(a)kylinos.cn>
net/mlx5: Fix variable not being completed when function returns
Toke Høiland-Jørgensen <toke(a)redhat.com>
sched: sch_cake: add bounds checks to host bulk flow fairness counts
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: conntrack: clamp maximum hashtable size to INT_MAX
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: imbalance in flowtable binding
Daniel Borkmann <daniel(a)iogearbox.net>
tcp: Annotate data-race around sk->sk_mark in tcp_v4_send_reset
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_sync: Fix not setting Random Address when required
Benjamin Coddington <bcodding(a)redhat.com>
tls: Fix tls_sw_sendmsg error handling
Przemyslaw Korba <przemyslaw.korba(a)intel.com>
ice: fix incorrect PHY settings for 100 GB/s
Anumula Murali Mohan Reddy <anumula(a)chelsio.com>
cxgb4: Avoid removal of uninserted tid
Kalesh AP <kalesh-anakkur.purayil(a)broadcom.com>
bnxt_en: Fix possible memory leak when hwrm_req_replace fails
Eric Dumazet <edumazet(a)google.com>
net_sched: cls_flow: validate TCA_FLOW_RSHIFT attribute
Zhongqiu Duan <dzq.aishenghu0(a)gmail.com>
tcp/dccp: allow a connection when sk_max_ack_backlog is zero
Jason Xing <kernelxing(a)tencent.com>
tcp/dccp: complete lockless accesses to sk->sk_max_ack_backlog
Antonio Pastor <antonio.pastor(a)gmail.com>
net: 802: LLC+SNAP OID:PID lookup on start of skb data
Keisuke Nishimura <keisuke.nishimura(a)inria.fr>
ieee802154: ca8210: Add missing check for kfifo_alloc() in ca8210_probe()
Chen-Yu Tsai <wenst(a)chromium.org>
ASoC: mediatek: disable buffer pre-allocation
Kuan-Wei Chiu <visitorckw(a)gmail.com>
scripts/sorttable: fix orc_sort_cmp() to maintain symmetry and transitivity
Yuezhang Mo <Yuezhang.Mo(a)sony.com>
exfat: fix the infinite loop in __exfat_free_cluster()
Yuezhang Mo <Yuezhang.Mo(a)sony.com>
exfat: fix the infinite loop in exfat_readdir()
Ming-Hung Tsai <mtsai(a)redhat.com>
dm array: fix cursor index when skipping across block boundaries
Ming-Hung Tsai <mtsai(a)redhat.com>
dm array: fix unreleased btree blocks on closing a faulty array cursor
Ming-Hung Tsai <mtsai(a)redhat.com>
dm array: fix releasing a faulty array block twice in dm_array_cursor_end
Zhang Yi <yi.zhang(a)huawei.com>
jbd2: flush filesystem device before updating tail sequence
Zhang Yi <yi.zhang(a)huawei.com>
jbd2: increase IO priority for writing revoke records
Qun-Wei Lin <qun-wei.lin(a)mediatek.com>
sched/task_stack: fix object_is_on_stack() for KASAN tagged pointers
Michal Luczaj <mhal(a)rbox.co>
bpf, sockmap: Fix race between element replace and close()
Max Kellermann <max.kellermann(a)ionos.com>
ceph: give up on paths longer than PATH_MAX
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/dts/imxrt1050.dtsi | 2 +-
arch/arm64/boot/dts/rockchip/rk3328.dtsi | 1 +
arch/riscv/kernel/traps.c | 6 +-
block/bfq-iosched.c | 12 +-
drivers/acpi/resource.c | 18 +++
drivers/base/topology.c | 24 +++-
drivers/cpuidle/cpuidle-riscv-sbi.c | 4 +-
drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
.../gpu/drm/amd/display/dc/dml/dml_inline_defs.h | 8 ++
drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 8 +-
drivers/gpu/drm/bridge/adv7511/adv7533.c | 22 ++--
drivers/gpu/drm/mediatek/Kconfig | 5 -
drivers/gpu/drm/mediatek/mtk_dp.c | 46 ++++---
drivers/iio/adc/ad7124.c | 3 +
drivers/iio/adc/at91_adc.c | 2 +-
drivers/iio/adc/ti-ads124s08.c | 4 +-
drivers/iio/adc/ti-ads8688.c | 2 +-
drivers/iio/dummy/iio_simple_dummy_buffer.c | 2 +-
drivers/iio/gyro/fxas21002c_core.c | 11 +-
drivers/iio/imu/kmx61.c | 2 +-
drivers/iio/inkern.c | 2 +-
drivers/iio/light/vcnl4035.c | 2 +-
drivers/iio/pressure/zpa2326.c | 2 +
drivers/md/dm-ebs-target.c | 2 +-
drivers/md/dm-thin.c | 5 +-
drivers/md/dm-verity-fec.c | 39 ++++--
drivers/md/persistent-data/dm-array.c | 19 +--
drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c | 4 +-
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 +-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 5 +-
drivers/net/ethernet/intel/ice/ice_ptp_consts.h | 4 +-
drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 +
drivers/net/ieee802154/ca8210.c | 6 +-
drivers/of/address.c | 76 ++++++++---
drivers/of/unittest-data/tests-address.dtsi | 9 +-
drivers/of/unittest.c | 109 ++++++++++++++++
drivers/staging/iio/frequency/ad9832.c | 2 +-
drivers/staging/iio/frequency/ad9834.c | 2 +-
drivers/thermal/thermal_of.c | 1 +
drivers/usb/class/usblp.c | 7 +-
drivers/usb/core/hub.c | 6 +-
drivers/usb/core/port.c | 7 +-
drivers/usb/dwc3/core.h | 1 +
drivers/usb/dwc3/dwc3-am62.c | 1 +
drivers/usb/dwc3/gadget.c | 4 +-
drivers/usb/gadget/function/f_fs.c | 2 +-
drivers/usb/gadget/function/f_uac2.c | 1 +
drivers/usb/gadget/function/u_serial.c | 8 +-
drivers/usb/serial/cp210x.c | 1 +
drivers/usb/serial/option.c | 4 +-
drivers/usb/storage/unusual_devs.h | 7 ++
fs/afs/afs.h | 2 +-
fs/afs/afs_vl.h | 1 +
fs/afs/vl_alias.c | 8 +-
fs/afs/vlclient.c | 2 +-
fs/ceph/mds_client.c | 9 +-
fs/exfat/dir.c | 3 +-
fs/exfat/fatent.c | 10 ++
fs/jbd2/commit.c | 4 +-
fs/jbd2/revoke.c | 2 +-
fs/ocfs2/quota_global.c | 2 +-
fs/ocfs2/quota_local.c | 10 +-
fs/smb/server/smb2pdu.c | 3 +
fs/smb/server/vfs.c | 3 +-
include/linux/bpf.h | 14 ++-
include/linux/sched/task_stack.h | 2 +
include/net/inet_connection_sock.h | 2 +-
io_uring/io_uring.c | 13 +-
kernel/bpf/helpers.c | 10 +-
kernel/bpf/ringbuf.c | 2 +-
kernel/bpf/syscall.c | 2 +-
kernel/bpf/verifier.c | 76 ++++++-----
kernel/trace/bpf_trace.c | 4 +-
net/802/psnap.c | 4 +-
net/bluetooth/hci_sync.c | 11 +-
net/core/filter.c | 4 +-
net/core/sock_map.c | 6 +-
net/ipv4/tcp_ipv4.c | 2 +-
net/netfilter/nf_conntrack_core.c | 5 +-
net/netfilter/nf_tables_api.c | 15 ++-
net/sched/cls_flow.c | 3 +-
net/sched/sch_cake.c | 140 +++++++++++----------
net/sctp/sysctl.c | 14 ++-
net/tls/tls_sw.c | 2 +-
scripts/sorttable.h | 6 +-
.../soc/mediatek/common/mtk-afe-platform-driver.c | 4 +-
87 files changed, 624 insertions(+), 306 deletions(-)
The CPU PMU in Apple SoCs can be configured to fire its interrupt in one
of several ways, and since Apple A11 one of the method is FIQ. Only handle
the PMC interrupt as a FIQ when the CPU PMU has been configured to fire
FIQs.
Cc: stable(a)vger.kernel.org
Fixes: c7708816c944 ("irqchip/apple-aic: Wire PMU interrupts")
Signed-off-by: Nick Chan <towinchenmi(a)gmail.com>
---
drivers/irqchip/irq-apple-aic.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index da5250f0155c..c3d435103d6d 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT));
}
- if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) {
+ if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) &
+ (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
int irq;
if (cpumask_test_cpu(smp_processor_id(),
&aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff))
base-commit: 40384c840ea1944d7c5a392e8975ed088ecf0b37
--
2.48.1
This is on Debian testing with the following kernel, built from
the tarball on kernel.org:
Linux sappc1 6.12.10 #4 SMP PREEMPT_DYNAMIC Fri Jan 17 22:17:45 CET 2025 x86_64 GNU/Linux
It is running on an 12th gen Intel Framework laptop, with monitor
connected through Framework's USB-C-to-HDMI adapter (the 3rd gen
one):
https://knowledgebase.frame.work/hdmi-expansion-card-generations-Sk7AQKUv2
Since my kernel got upgraded to version 6.12.*, I get frequently
journal messages like these:
Jan 15 15:24:51 host01 kernel: Registered IR keymap rc-cec
Jan 15 15:24:51 host01 kernel: rc rc0: DP-3 as /devices/pci0000:00/0000:00:02.0/rc/rc0
Jan 15 15:24:51 host01 kernel: input: DP-3 as /devices/pci0000:00/0000:00:02.0/rc/rc0/input146
Jan 15 15:24:51 host01 systemd-logind[1456]: Watching system buttons on /dev/input/event11 (DP-3)
Jan 15 15:24:51 host01 Xorg[1663]: (II) config/udev: Adding input device DP-3 (/dev/input/event11)
I tried to ignore these as long as they have been showing up during boot
only or when screen resolution got changed using xrandr, but today I
noticed that these repeat every 30 secs or so when my screen saver is
active. Which then kind of floods the journal.
With the previous 6.11.* kernel series I would see these messages
only once, during startup. When using a DisplayPort adapter instead
of the HDMI adapter, these messages do not show up either.
I guess that it is not the CEC subsystem being responsible here, but
rather some other component which triggers it more frequently than
earlier. Any help on how to find more about this issue appreciated.
Please CC me in replies.
Thanks!
The two provided max_scale_nano values must be multiplied by 100 and 10
respectively to achieve nano units. According to the comments:
Max scale for apds0306 is 16.326432 → the fractional part is 0.326432,
which is 326432000 in NANO. The current value is 3264320.
Max scale for apds0306-065 is 14.09721 → the fractional part is 0.09712,
which is 97120000 in NANO. The current value is 9712000.
Update max_scale_nano initialization to use the right NANO fractional
parts.
Cc: stable(a)vger.kernel.org
Fixes: 620d1e6c7a3f ("iio: light: Add support for APDS9306 Light Sensor")
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
---
drivers/iio/light/apds9306.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c
index 69a0d609cffc91cc3daba160f309f511270be385..5ed7e17f49e76206609aba83c85e8144c536d17d 100644
--- a/drivers/iio/light/apds9306.c
+++ b/drivers/iio/light/apds9306.c
@@ -108,11 +108,11 @@ static const struct part_id_gts_multiplier apds9306_gts_mul[] = {
{
.part_id = 0xB1,
.max_scale_int = 16,
- .max_scale_nano = 3264320,
+ .max_scale_nano = 326432000,
}, {
.part_id = 0xB3,
.max_scale_int = 14,
- .max_scale_nano = 9712000,
+ .max_scale_nano = 97120000,
},
};
---
base-commit: 577a66e2e634f712384c57a98f504c44ea4b47da
change-id: 20241218-apds9306_nano_vals-d880219a82f2
Best regards,
--
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Please backport
0b2c29fb68f8bf3e87a9
efi/zboot: Limit compression options to GZIP and ZSTD
to v6.12. Future work on kexec and EFI zboot will only support those
compression methods, and currently, only Loongarch on Debian uses this
with a different compression method (XZ) and so now is the time to
make this change.
Thanks,
Ard.
Fixed a possible UAF problem in driver_override_show() in drivers/cdx/cdx.c
This function driver_override_show() is part of DEVICE_ATTR_RW, which
includes both driver_override_show() and driver_override_store().
These functions can be executed concurrently in sysfs.
The driver_override_store() function uses driver_set_override() to
update the driver_override value, and driver_set_override() internally
locks the device (device_lock(dev)). If driver_override_show() reads
cdx_dev->driver_override without locking, it could potentially access
a freed pointer if driver_override_store() frees the string
concurrently. This could lead to printing a kernel address, which is a
security risk since DEVICE_ATTR can be read by all users.
Additionally, a similar pattern is used in drivers/amba/bus.c, as well
as many other bus drivers, where device_lock() is taken in the show
function, and it has been working without issues.
This potential bug was detected by our experimental static analysis
tool, which analyzes locking APIs and paired functions to identify
data races and atomicity violations.
Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
---
V2:
Modified the title and description.
Removed the changes to cdx_bus_match().
V3:
Revised the description to make it clearer.
Thanks Greg KH for helpful suggestions.
V4:
Fixed the incorrect code logic.
---
drivers/cdx/cdx.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c
index 76eac3653b1c..daa0fb62a1f7 100644
--- a/drivers/cdx/cdx.c
+++ b/drivers/cdx/cdx.c
@@ -470,8 +470,12 @@ static ssize_t driver_override_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cdx_device *cdx_dev = to_cdx_device(dev);
+ ssize_t len;
- return sysfs_emit(buf, "%s\n", cdx_dev->driver_override);
+ device_lock(dev);
+ len = sysfs_emit(buf, "%s\n", cdx_dev->driver_override);
+ device_unlock(dev);
+ return len;
}
static DEVICE_ATTR_RW(driver_override);
--
2.43.0
The current implementation sets the wMaxPacketSize of bulk in/out
endpoints to 1024 bytes at the end of the f_midi_bind function. However,
in cases where there is a failure in the first midi bind attempt,
consider rebinding. This scenario may encounter an f_midi_bind issue due
to the previous bind setting the bulk endpoint's wMaxPacketSize to 1024
bytes, which exceeds the ep->maxpacket_limit where configured TX/RX
FIFO's maxpacket size of 512 bytes for IN/OUT endpoints in support HS
speed only.
This commit addresses this issue by resetting the wMaxPacketSize before
endpoint claim.
Fixes: 46decc82ffd5 ("usb: gadget: unconditionally allocate hs/ss descriptor in bind operation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
---
drivers/usb/gadget/function/f_midi.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 837fcdfa3840..5caa0e4eb07e 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -907,6 +907,15 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f)
status = -ENODEV;
+ /*
+ * Reset wMaxPacketSize with maximum packet size of FS bulk transfer before
+ * endpoint claim. This ensures that the wMaxPacketSize does not exceed the
+ * limit during bind retries where configured TX/RX FIFO's maxpacket size
+ * of 512 bytes for IN/OUT endpoints in support HS speed only.
+ */
+ bulk_in_desc.wMaxPacketSize = cpu_to_le16(64);
+ bulk_out_desc.wMaxPacketSize = cpu_to_le16(64);
+
/* allocate instance-specific endpoints */
midi->in_ep = usb_ep_autoconfig(cdev->gadget, &bulk_in_desc);
if (!midi->in_ep)
--
2.17.1
External interrupts (EVENT_TYPE_EXTINT) and system calls (EVENT_TYPE_OTHER)
occur more frequently than other events in a typical system. Prioritizing
these events saves CPU cycles and optimizes the efficiency of performance-
critical paths.
When examining the compiler-generated assembly code for event dispatching
in the functions fred_entry_from_user() and fred_entry_from_kernel(), it
was observed that the compiler intelligently uses a binary search to match
all event type values (0-7) and perform dispatching. As a result, even if
the following cases:
case EVENT_TYPE_EXTINT:
return fred_extint(regs);
case EVENT_TYPE_OTHER:
return fred_other(regs);
are placed at the beginning of the switch() statement, the generated
assembly code would remain the same, and the expected prioritization would
not be achieved.
Command line to check the assembly code generated by the compiler for
fred_entry_from_user():
$objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
00000000000015a0 <fred_entry_from_user>:
15a0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
15a7: 48 8b 77 78 mov 0x78(%rdi),%rsi
15ab: 55 push %rbp
15ac: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi)
15b3: ff
15b4: 83 e0 0f and $0xf,%eax
15b7: 48 89 e5 mov %rsp,%rbp
15ba: 3c 04 cmp $0x4,%al
-->> /* match 4(EVENT_TYPE_SWINT) first */
15bc: 74 78 je 1636 <fred_entry_from_user+0x96>
15be: 77 15 ja 15d5 <fred_entry_from_user+0x35>
15c0: 3c 02 cmp $0x2,%al
15c2: 74 53 je 1617 <fred_entry_from_user+0x77>
15c4: 77 65 ja 162b <fred_entry_from_user+0x8b>
15c6: 84 c0 test %al,%al
15c8: 75 42 jne 160c <fred_entry_from_user+0x6c>
15ca: e8 71 fc ff ff callq 1240 <fred_extint>
15cf: 5d pop %rbp
15d0: e9 00 00 00 00 jmpq 15d5 <fred_entry_from_user+0x35>
15d5: 3c 06 cmp $0x6,%al
15d7: 74 7c je 1655 <fred_entry_from_user+0xb5>
15d9: 72 66 jb 1641 <fred_entry_from_user+0xa1>
15db: 3c 07 cmp $0x7,%al
15dd: 75 2d jne 160c <fred_entry_from_user+0x6c>
15df: 8b 87 a4 00 00 00 mov 0xa4(%rdi),%eax
15e5: 25 ff 00 00 02 and $0x20000ff,%eax
15ea: 3d 01 00 00 02 cmp $0x2000001,%eax
15ef: 75 6f jne 1660 <fred_entry_from_user+0xc0>
15f1: 48 8b 77 50 mov 0x50(%rdi),%rsi
15f5: 48 c7 47 50 da ff ff movq $0xffffffffffffffda,0x50(%rdi)
... ...
Command line to check the assembly code generated by the compiler for
fred_entry_from_kernel():
$objdump -d vmlinux.o | awk '/<fred_entry_from_kernel>:/{c=65} c&&c--'
00000000000016b0 <fred_entry_from_kernel>:
16b0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
16b7: 48 8b 77 78 mov 0x78(%rdi),%rsi
16bb: 55 push %rbp
16bc: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi)
16c3: ff
16c4: 83 e0 0f and $0xf,%eax
16c7: 48 89 e5 mov %rsp,%rbp
16ca: 3c 03 cmp $0x3,%al
-->> /* match 3(EVENT_TYPE_HWEXC) first */
16cc: 74 3c je 170a <fred_entry_from_kernel+0x5a>
16ce: 76 13 jbe 16e3 <fred_entry_from_kernel+0x33>
16d0: 3c 05 cmp $0x5,%al
16d2: 74 41 je 1715 <fred_entry_from_kernel+0x65>
16d4: 3c 06 cmp $0x6,%al
16d6: 75 27 jne 16ff <fred_entry_from_kernel+0x4f>
16d8: e8 73 fe ff ff callq 1550 <fred_swexc.isra.3>
16dd: 5d pop %rbp
... ...
Therefore, it is necessary to handle EVENT_TYPE_EXTINT and EVENT_TYPE_OTHER
before the switch statement using if-else syntax to ensure the compiler
generates the desired code. After applying the patch, the verification
results are as follows:
$objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
00000000000015a0 <fred_entry_from_user>:
15a0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
15a7: 48 8b 77 78 mov 0x78(%rdi),%rsi
15ab: 55 push %rbp
15ac: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi)
15b3: ff
15b4: 48 89 e5 mov %rsp,%rbp
15b7: 83 e0 0f and $0xf,%eax
15ba: 74 34 je 15f0 <fred_entry_from_user+0x50>
-->> /* match 0(EVENT_TYPE_EXTINT) first */
15bc: 3c 07 cmp $0x7,%al
-->> /* match 7(EVENT_TYPE_OTHER) second *
15be: 74 6e je 162e <fred_entry_from_user+0x8e>
15c0: 3c 04 cmp $0x4,%al
15c2: 0f 84 93 00 00 00 je 165b <fred_entry_from_user+0xbb>
15c8: 76 13 jbe 15dd <fred_entry_from_user+0x3d>
15ca: 3c 05 cmp $0x5,%al
15cc: 74 41 je 160f <fred_entry_from_user+0x6f>
15ce: 3c 06 cmp $0x6,%al
15d0: 75 51 jne 1623 <fred_entry_from_user+0x83>
15d2: e8 79 ff ff ff callq 1550 <fred_swexc.isra.3>
15d7: 5d pop %rbp
15d8: e9 00 00 00 00 jmpq 15dd <fred_entry_from_user+0x3d>
15dd: 3c 02 cmp $0x2,%al
15df: 74 1a je 15fb <fred_entry_from_user+0x5b>
15e1: 3c 03 cmp $0x3,%al
15e3: 75 3e jne 1623 <fred_entry_from_user+0x83>
... ...
The same desired code in fred_entry_from_kernel is no longer repeated.
While the C code with if-else placed before switch() may appear ugly, it
works. Additionally, using a jump table is not advisable; even if the jump
table resides in the L1 cache, the cost of loading it is over 10 times the
latency of a cmp instruction.
Signed-off-by: Ethan Zhao <haifeng.zhao(a)linux.intel.com>
---
base commit: 619f0b6fad524f08d493a98d55bac9ab8895e3a6
---
arch/x86/entry/entry_fred.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index f004a4dc74c2..591f47771ecf 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -228,9 +228,18 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
/* Invalidate orig_ax so that syscall_get_nr() works correctly */
regs->orig_ax = -1;
- switch (regs->fred_ss.type) {
- case EVENT_TYPE_EXTINT:
+ if (regs->fred_ss.type == EVENT_TYPE_EXTINT)
return fred_extint(regs);
+ else if (regs->fred_ss.type == EVENT_TYPE_OTHER)
+ return fred_other(regs);
+
+ /*
+ * Dispatch EVENT_TYPE_EXTINT and EVENT_TYPE_OTHER(syscall) type events
+ * first due to their high probability and let the compiler create binary search
+ * dispatching for the remaining events
+ */
+
+ switch (regs->fred_ss.type) {
case EVENT_TYPE_NMI:
if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
return fred_exc_nmi(regs);
@@ -245,8 +254,6 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
break;
case EVENT_TYPE_SWEXC:
return fred_swexc(regs, error_code);
- case EVENT_TYPE_OTHER:
- return fred_other(regs);
default: break;
}
@@ -260,9 +267,15 @@ __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
/* Invalidate orig_ax so that syscall_get_nr() works correctly */
regs->orig_ax = -1;
- switch (regs->fred_ss.type) {
- case EVENT_TYPE_EXTINT:
+ if (regs->fred_ss.type == EVENT_TYPE_EXTINT)
return fred_extint(regs);
+
+ /*
+ * Dispatch EVENT_TYPE_EXTINT type event first due to its high probability
+ * and let the compiler do binary search dispatching for the other events
+ */
+
+ switch (regs->fred_ss.type) {
case EVENT_TYPE_NMI:
if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
return fred_exc_nmi(regs);
--
2.31.1
Fixed a possible UAF problem in driver_override_show() in drivers/cdx/cdx.c
This function driver_override_show() is part of DEVICE_ATTR_RW, which
includes both driver_override_show() and driver_override_store().
These functions can be executed concurrently in sysfs.
The driver_override_store() function uses driver_set_override() to
update the driver_override value, and driver_set_override() internally
locks the device (device_lock(dev)). If driver_override_show() reads
cdx_dev->driver_override without locking, it could potentially access
a freed pointer if driver_override_store() frees the string
concurrently. This could lead to printing a kernel address, which is a
security risk since DEVICE_ATTR can be read by all users.
Additionally, a similar pattern is used in drivers/amba/bus.c, as well
as many other bus drivers, where device_lock() is taken in the show
function, and it has been working without issues.
This possible bug is found by an experimental static analysis tool
developed by our team. This tool analyzes the locking APIs to extract
function pairs that can be concurrently executed, and then analyzes the
instructions in the paired functions to identify possible concurrency bugs
including data races and atomicity violations.
Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
---
V2:
Modified the title and description.
Removed the changes to cdx_bus_match().
V3:
Revised the description to reduce the confusion it caused.
---
drivers/cdx/cdx.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c
index 07371cb653d3..e696ba0b573e 100644
--- a/drivers/cdx/cdx.c
+++ b/drivers/cdx/cdx.c
@@ -470,8 +470,11 @@ static ssize_t driver_override_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cdx_device *cdx_dev = to_cdx_device(dev);
+ ssize_t len;
- return sysfs_emit(buf, "%s\n", cdx_dev->driver_override);
+ device_lock(dev);
+ len = sysfs_emit(buf, "%s\n", cdx_dev->driver_override);
+ device_unlock(dev);
}
static DEVICE_ATTR_RW(driver_override);
--
2.34.1
If the socket has been de-assigned or assigned to another transport,
we must discard any packets received because they are not expected
and would cause issues when we access vsk->transport.
A possible scenario is described by Hyunwoo Kim in the attached link,
where after a first connect() interrupted by a signal, and a second
connect() failed, we can find `vsk->transport` at NULL, leading to a
NULL pointer dereference.
Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Cc: stable(a)vger.kernel.org
Reported-by: Hyunwoo Kim <v4bel(a)theori.io>
Reported-by: Wongi Lee <qwerty(a)theori.io>
Closes: https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/
Signed-off-by: Stefano Garzarella <sgarzare(a)redhat.com>
---
net/vmw_vsock/virtio_transport_common.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9acc13ab3f82..51a494b69be8 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1628,8 +1628,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
lock_sock(sk);
- /* Check if sk has been closed before lock_sock */
- if (sock_flag(sk, SOCK_DONE)) {
+ /* Check if sk has been closed or assigned to another transport before
+ * lock_sock (note: listener sockets are not assigned to any transport)
+ */
+ if (sock_flag(sk, SOCK_DONE) ||
+ (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
--
2.47.1
From: Sean Christopherson <seanjc(a)google.com>
[ Upstream commit 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 ]
Ignore the userspace provided x2APIC ID when fixing up APIC state for
KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM. Commit
a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID
register"), which added the fixup, didn't intend to allow userspace to
modify the x2APIC ID. In fact, that commit is when KVM first started
treating the x2APIC ID as readonly, apparently to fix some race:
static inline u32 kvm_apic_id(struct kvm_lapic *apic)
{
- return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+ /* To avoid a race between apic_base and following APIC_ID update when
+ * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
+ */
+ if (apic_x2apic_mode(apic))
+ return apic->vcpu->vcpu_id;
+
+ return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
}
Furthermore, KVM doesn't support delivering interrupts to vCPUs with a
modified x2APIC ID, but KVM *does* return the modified value on a guest
RDMSR and for KVM_GET_LAPIC. I.e. no remotely sane setup can actually
work with a modified x2APIC ID.
Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map
calculation, which expects the LDR to align with the x2APIC ID.
WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm]
CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014
RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm]
Call Trace:
<TASK>
kvm_apic_set_state+0x1cf/0x5b0 [kvm]
kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm]
kvm_vcpu_ioctl+0x663/0x8a0 [kvm]
__x64_sys_ioctl+0xb8/0xf0
do_syscall_64+0x56/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7fade8b9dd6f
Unfortunately, the WARN can still trigger for other CPUs than the current
one by racing against KVM_SET_LAPIC, so remove it completely.
Reported-by: Michal Luczaj <mhal(a)rbox.co>
Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co
Reported-by: Haoyu Wu <haoyuwu254(a)gmail.com>
Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com
Reported-by: syzbot+545f1326f405db4e1c3e(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-ID: <20240802202941.344889-2-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
---
arch/x86/kvm/lapic.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 34766abbabd8..cd9c1e1f6fd3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,10 +338,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
* reversing the LDR calculation to get cluster of APICs, i.e. no
* additional work is required.
*/
- if (apic_x2apic_mode(apic)) {
- WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
+ if (apic_x2apic_mode(apic))
return;
- }
if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
&cluster, &mask))) {
@@ -2964,18 +2962,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s, bool set)
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
+ u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
u32 *id = (u32 *)(s->regs + APIC_ID);
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
u64 icr;
if (vcpu->kvm->arch.x2apic_format) {
- if (*id != vcpu->vcpu_id)
+ if (*id != x2apic_id)
return -EINVAL;
} else {
+ /*
+ * Ignore the userspace value when setting APIC state.
+ * KVM's model is that the x2APIC ID is readonly, e.g.
+ * KVM only supports delivering interrupts to KVM's
+ * version of the x2APIC ID. However, for backwards
+ * compatibility, don't reject attempts to set a
+ * mismatched ID for userspace that hasn't opted into
+ * x2apic_format.
+ */
if (set)
- *id >>= 24;
+ *id = x2apic_id;
else
- *id <<= 24;
+ *id = x2apic_id << 24;
}
/*
@@ -2984,7 +2992,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
* split to ICR+ICR2 in userspace for backwards compatibility.
*/
if (set) {
- *ldr = kvm_apic_calc_x2apic_ldr(*id);
+ *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
(u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
--
2.43.0
From: Stefan Eichenberger <stefan.eichenberger(a)toradex.com>
Currently, the flags for the ACM clocks are set to 0. This configuration
causes the fsl-sai audio driver to fail when attempting to set the
sysclk, returning an EINVAL error. The following error messages
highlight the issue:
fsl-sai 59090000.sai: ASoC: error at snd_soc_dai_set_sysclk on 59090000.sai: -22
imx-hdmi sound-hdmi: failed to set cpu sysclk: -22
By setting the flag CLK_SET_RATE_NO_REPARENT, we signal that the ACM
driver does not support reparenting and instead relies on the clock tree
as defined in the device tree. This change resolves the issue with the
fsl-sai audio driver.
CC: stable(a)vger.kernel.org
Fixes: d3a0946d7ac9 ("clk: imx: imx8: add audio clock mux driver")
Signed-off-by: Stefan Eichenberger <stefan.eichenberger(a)toradex.com>
---
drivers/clk/imx/clk-imx8-acm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/clk/imx/clk-imx8-acm.c b/drivers/clk/imx/clk-imx8-acm.c
index c169fe53a35f..f20832a17ea3 100644
--- a/drivers/clk/imx/clk-imx8-acm.c
+++ b/drivers/clk/imx/clk-imx8-acm.c
@@ -371,7 +371,8 @@ static int imx8_acm_clk_probe(struct platform_device *pdev)
for (i = 0; i < priv->soc_data->num_sels; i++) {
hws[sels[i].clkid] = devm_clk_hw_register_mux_parent_data_table(dev,
sels[i].name, sels[i].parents,
- sels[i].num_parents, 0,
+ sels[i].num_parents,
+ CLK_SET_RATE_NO_REPARENT,
base + sels[i].reg,
sels[i].shift, sels[i].width,
0, NULL, NULL);
--
2.45.2
Hi all,
We've seen a regression on several lts branches(at least 5.10,5.15,6.1) causing
boot issues on cuttlefish/crossvm android emulation on arm64.
The offending patch is:
PCI: Use preserve_config in place of pci_flags
It looks like this patch was added to stable by AUTOSEL but without the other
3 patches in the series:
https://lore.kernel.org/all/20240508174138.3630283-1-vidyas@nvidia.com/
Applying the missing patches resolves the issue but they do not apply cleanly.
Can we revert this patch on the lts branches?
[Public]
> -----Original Message-----
> From: Sasha Levin <sashal(a)kernel.org>
> Sent: Thursday, January 2, 2025 7:42 PM
> To: stable-commits(a)vger.kernel.org; oushixiong(a)kylinos.cn
> Cc: Deucher, Alexander <Alexander.Deucher(a)amd.com>; Koenig, Christian
> <Christian.Koenig(a)amd.com>; Pan, Xinhui <Xinhui.Pan(a)amd.com>; David Airlie
> <airlied(a)gmail.com>; Simona Vetter <simona(a)ffwll.ch>
> Subject: Patch "drm/radeon: Delay Connector detecting when HPD singals is
> unstable" has been added to the 6.6-stable tree
>
> This is a note to let you know that I've just added the patch titled
>
> drm/radeon: Delay Connector detecting when HPD singals is unstable
>
> to the 6.6-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> drm-radeon-delay-connector-detecting-when-hpd-singal.patch
> and it can be found in the queue-6.6 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree, please let
> <stable(a)vger.kernel.org> know about it.
>
>
>
> commit 20430c3e75a06c4736598de02404f768653d953a
> Author: Shixiong Ou <oushixiong(a)kylinos.cn>
> Date: Thu May 9 16:57:58 2024 +0800
>
> drm/radeon: Delay Connector detecting when HPD singals is unstable
>
> [ Upstream commit 949658cb9b69ab9d22a42a662b2fdc7085689ed8 ]
>
> In some causes, HPD signals will jitter when plugging in
> or unplugging HDMI.
>
> Rescheduling the hotplug work for a second when EDID may still be
> readable but HDP is disconnected, and fixes this issue.
>
> Signed-off-by: Shixiong Ou <oushixiong(a)kylinos.cn>
> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
> Stable-dep-of: 979bfe291b5b ("Revert "drm/radeon: Delay Connector detecting
> when HPD singals is unstable"")
Please drop both of these patches. There is no need to pull back a patch just so that you can apply the revert.
Thanks,
Alex
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c
> b/drivers/gpu/drm/radeon/radeon_connectors.c
> index b84b58926106..cf0114ca59a4 100644
> --- a/drivers/gpu/drm/radeon/radeon_connectors.c
> +++ b/drivers/gpu/drm/radeon/radeon_connectors.c
> @@ -1267,6 +1267,16 @@ radeon_dvi_detect(struct drm_connector *connector,
> bool force)
> goto exit;
> }
> }
> +
> + if (dret && radeon_connector->hpd.hpd != RADEON_HPD_NONE &&
> + !radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
> + connector->connector_type == DRM_MODE_CONNECTOR_HDMIA) {
> + DRM_DEBUG_KMS("EDID is readable when HPD
> disconnected\n");
> + schedule_delayed_work(&rdev->hotplug_work,
> msecs_to_jiffies(1000));
> + ret = connector_status_disconnected;
> + goto exit;
> + }
> +
> if (dret) {
> radeon_connector->detected_by_load = false;
> radeon_connector_free_edid(connector);
This patch series is to fix bugs and improve codes for drivers/of/*.
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
Changes in v5:
- Drop 12 patches and add 1 patch
- Correct based on Rob's comments
- Link to v4: https://lore.kernel.org/r/20250109-of_core_fix-v4-0-db8a72415b8c@quicinc.com
Changes in v4:
- Remove 2 modalias relevant patches, and add more patches.
- Link to v3: https://lore.kernel.org/r/20241217-of_core_fix-v3-0-3bc49a2e8bda@quicinc.com
Changes in v3:
- Drop 2 applied patches and pick up patch 4/7 again
- Fix build error for patch 6/7.
- Include of_private.h instead of function declaration for patch 2/7
- Correct tile and commit messages.
- Link to v2: https://lore.kernel.org/r/20241216-of_core_fix-v2-0-e69b8f60da63@quicinc.com
Changes in v2:
- Drop applied/conflict/TBD patches.
- Correct based on Rob's comments.
- Link to v1: https://lore.kernel.org/r/20241206-of_core_fix-v1-0-dc28ed56bec3@quicinc.com
---
Zijun Hu (3):
of: Do not expose of_alias_scan() and correct its comments
of: reserved-memory: Warn for missing static reserved memory regions
of: Correct element count for two arrays in API of_parse_phandle_with_args_map()
drivers/of/base.c | 7 +++----
drivers/of/of_private.h | 2 ++
drivers/of/of_reserved_mem.c | 5 +++++
drivers/of/pdt.c | 2 ++
include/linux/of.h | 1 -
5 files changed, 12 insertions(+), 5 deletions(-)
---
base-commit: c141ecc3cecd764799e17c8251026336cab86800
change-id: 20241206-of_core_fix-dc3021a06418
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
Hi!
I was recently pointed to this driver for an example on how consumers
can get a pointer to the supplier's driver data and I noticed a leak.
Callers of of_qcom_ice_get() leak the device reference taken by
of_find_device_by_node(). Introduce devm variant for of_qcom_ice_get()
to spare consumers of an extra call to put the dev reference.
This set touches mmc and scsi subsystems. Since the fix is trivial for
them, I'd suggest taking everything through the SoC tree with Acked-by
tags if people consider this useful. Thanks!
Signed-off-by: Tudor Ambarus <tudor.ambarus(a)linaro.org>
---
Tudor Ambarus (4):
soc: qcom: ice: introduce devm_of_qcom_ice_get
mmc: sdhci-msm: fix dev reference leaked through of_qcom_ice_get
scsi: ufs: qcom: fix dev reference leaked through of_qcom_ice_get
soc: qcom: ice: make of_qcom_ice_get() static
drivers/mmc/host/sdhci-msm.c | 2 +-
drivers/soc/qcom/ice.c | 37 +++++++++++++++++++++++++++++++++++--
drivers/ufs/host/ufs-qcom.c | 2 +-
include/soc/qcom/ice.h | 3 ++-
4 files changed, 39 insertions(+), 5 deletions(-)
---
base-commit: b323d8e7bc03d27dec646bfdccb7d1a92411f189
change-id: 20250110-qcom-ice-fix-dev-leak-bbff59a964fb
Best regards,
--
Tudor Ambarus <tudor.ambarus(a)linaro.org>
I am using an Acer Aspire A115-31 laptop. When running newer kernel
versions, sound played through headphones is distorted, but when running
older versions, it is not.
Kernel version: Linux version 6.12.5 (user@hostname) (gcc (Debian
14.2.0-8) 14.2.0, GNU ld (GNU Binutils for Debian) 2.43.50.20241210) #1
SMP PREEMPT_DYNAMIC Sun Dec 15 05:09:16 IST 2024
Operating System: Debian GNU/Linux trixie/sid
No special actions are needed to reproduce the issue. The sound is
distorted all the time, and it doesn't depend on anything besides using
an affected kernel version.
It seems to be caused by commit 34ab5bbc6e82214d7f7393eba26d164b303ebb4e
(ALSA: hda/realtek - Add Headset Mic supported Acer NB platform).
Indeed, if I remove the entry that this commit adds, the issue disappears.
lspci output for the device in question:
00:0e.0 Multimedia audio controller [0401]: Intel Corporation
Celeron/Pentium Silver Processor High Definition Audio [8086:3198] (rev 06)
Subsystem: Acer Incorporated [ALI] Device [1025:1360]
Flags: bus master, fast devsel, latency 0, IRQ 130
Memory at a1214000 (64-bit, non-prefetchable) [size=16K]
Memory at a1000000 (64-bit, non-prefetchable) [size=1M]
Capabilities: [50] Power Management version 3
Capabilities: [80] Vendor Specific Information: Len=14 <?>
Capabilities: [60] MSI: Enable+ Count=1/1 Maskable- 64bit+
Capabilities: [70] Express Root Complex Integrated Endpoint,
IntMsgNum 0
Kernel driver in use: snd_hda_intel
Kernel modules: snd_hda_intel, snd_soc_avs, snd_sof_pci_intel_apl
This is a note to let you know that I've just added the patch titled
USB: serial: quatech2: fix null-ptr-deref in qt2_process_read_urb()
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 575a5adf48b06a2980c9eeffedf699ed5534fade Mon Sep 17 00:00:00 2001
From: Qasim Ijaz <qasdev00(a)gmail.com>
Date: Mon, 13 Jan 2025 18:00:34 +0000
Subject: USB: serial: quatech2: fix null-ptr-deref in qt2_process_read_urb()
This patch addresses a null-ptr-deref in qt2_process_read_urb() due to
an incorrect bounds check in the following:
if (newport > serial->num_ports) {
dev_err(&port->dev,
"%s - port change to invalid port: %i\n",
__func__, newport);
break;
}
The condition doesn't account for the valid range of the serial->port
buffer, which is from 0 to serial->num_ports - 1. When newport is equal
to serial->num_ports, the assignment of "port" in the
following code is out-of-bounds and NULL:
serial_priv->current_port = newport;
port = serial->port[serial_priv->current_port];
The fix checks if newport is greater than or equal to serial->num_ports
indicating it is out-of-bounds.
Reported-by: syzbot <syzbot+506479ebf12fe435d01a(a)syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=506479ebf12fe435d01a
Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver")
Cc: <stable(a)vger.kernel.org> # 3.5
Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/usb/serial/quatech2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index a317bdbd00ad..72fe83a6c978 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -503,7 +503,7 @@ static void qt2_process_read_urb(struct urb *urb)
newport = *(ch + 3);
- if (newport > serial->num_ports) {
+ if (newport >= serial->num_ports) {
dev_err(&port->dev,
"%s - port change to invalid port: %i\n",
__func__, newport);
--
2.48.1
This is a note to let you know that I've just added the patch titled
USB: serial: quatech2: fix null-ptr-deref in qt2_process_read_urb()
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the usb-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From 575a5adf48b06a2980c9eeffedf699ed5534fade Mon Sep 17 00:00:00 2001
From: Qasim Ijaz <qasdev00(a)gmail.com>
Date: Mon, 13 Jan 2025 18:00:34 +0000
Subject: USB: serial: quatech2: fix null-ptr-deref in qt2_process_read_urb()
This patch addresses a null-ptr-deref in qt2_process_read_urb() due to
an incorrect bounds check in the following:
if (newport > serial->num_ports) {
dev_err(&port->dev,
"%s - port change to invalid port: %i\n",
__func__, newport);
break;
}
The condition doesn't account for the valid range of the serial->port
buffer, which is from 0 to serial->num_ports - 1. When newport is equal
to serial->num_ports, the assignment of "port" in the
following code is out-of-bounds and NULL:
serial_priv->current_port = newport;
port = serial->port[serial_priv->current_port];
The fix checks if newport is greater than or equal to serial->num_ports
indicating it is out-of-bounds.
Reported-by: syzbot <syzbot+506479ebf12fe435d01a(a)syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=506479ebf12fe435d01a
Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver")
Cc: <stable(a)vger.kernel.org> # 3.5
Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/usb/serial/quatech2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index a317bdbd00ad..72fe83a6c978 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -503,7 +503,7 @@ static void qt2_process_read_urb(struct urb *urb)
newport = *(ch + 3);
- if (newport > serial->num_ports) {
+ if (newport >= serial->num_ports) {
dev_err(&port->dev,
"%s - port change to invalid port: %i\n",
__func__, newport);
--
2.48.1
On our Marvell OCTEON CN96XX board, we observed the following panic on
the latest kernel:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000080
Mem abort info:
ESR = 0x0000000096000005
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x05: level 1 translation fault
Data abort info:
ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
CM = 0, WnR = 0, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[0000000000000080] user address but active_mm is swapper
Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
Modules linked in:
CPU: 9 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.13.0-rc7-00149-g9bffa1ad25b8 #1
Hardware name: Marvell OcteonTX CN96XX board (DT)
pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : of_pci_add_properties+0x278/0x4c8
lr : of_pci_add_properties+0x258/0x4c8
sp : ffff8000822ef9b0
x29: ffff8000822ef9b0 x28: ffff000106dd8000 x27: ffff800081bc3b30
x26: ffff800081540118 x25: ffff8000813d2be0 x24: 0000000000000000
x23: ffff00010528a800 x22: ffff000107c50000 x21: ffff0001039c2630
x20: ffff0001039c2630 x19: 0000000000000000 x18: ffffffffffffffff
x17: 00000000a49c1b85 x16: 0000000084c07b58 x15: ffff000103a10f98
x14: ffffffffffffffff x13: ffff000103a10f96 x12: 0000000000000003
x11: 0101010101010101 x10: 000000000000002c x9 : ffff800080ca7acc
x8 : ffff0001038fd900 x7 : 0000000000000000 x6 : 0000000000696370
x5 : 0000000000000000 x4 : 0000000000000002 x3 : ffff8000822efa40
x2 : ffff800081341000 x1 : ffff000107c50000 x0 : 0000000000000000
Call trace:
of_pci_add_properties+0x278/0x4c8 (P)
of_pci_make_dev_node+0xe0/0x158
pci_bus_add_device+0x158/0x210
pci_bus_add_devices+0x40/0x98
pci_host_probe+0x94/0x118
pci_host_common_probe+0x120/0x1a0
platform_probe+0x70/0xf0
really_probe+0xb4/0x2a8
__driver_probe_device+0x80/0x140
driver_probe_device+0x48/0x170
__driver_attach+0x9c/0x1b0
bus_for_each_dev+0x7c/0xe8
driver_attach+0x2c/0x40
bus_add_driver+0xec/0x218
driver_register+0x68/0x138
__platform_driver_register+0x2c/0x40
gen_pci_driver_init+0x24/0x38
do_one_initcall+0x4c/0x278
kernel_init_freeable+0x1f4/0x3d0
kernel_init+0x28/0x1f0
ret_from_fork+0x10/0x20
Code: aa1603e1 f0005522 d2800044 91000042 (f94040a0)
This regression was introduced by commit 7246a4520b4b ("PCI: Use
preserve_config in place of pci_flags"). On our board, the 002:00:07.0
bridge is misconfigured by the bootloader. Both its secondary and
subordinate bus numbers are initialized to 0, while its fixed secondary
bus number is set to 8. However, bus number 8 is also assigned to another
bridge (0002:00:0f.0). Although this is a bootloader issue, before the
change in commit 7246a4520b4b, the PCI_REASSIGN_ALL_BUS flag was
set by default when PCI_PROBE_ONLY was enabled, ensuing that all the
bus number for these bridges were reassigned, avoiding any conflicts.
After the change introduced in commit 7246a4520b4b, the bus numbers
assigned by the bootloader are reused by all other bridges, except
the misconfigured 002:00:07.0 bridge. The kernel attempt to reconfigure
002:00:07.0 by reusing the fixed secondary bus number 8 assigned by
bootloader. However, since a pci_bus has already been allocated for
bus 8 due to the probe of 0002:00:0f.0, no new pci_bus allocated for
002:00:07.0. This results in a pci bridge device without a pci_bus
attached (pdev->subordinate == NULL). Consequently, accessing
pdev->subordinate in of_pci_prop_bus_range() leads to a NULL pointer
dereference.
To summarize, we need to restore the PCI_REASSIGN_ALL_BUS flag when
PCI_PROBE_ONLY is enabled in order to work around issue like the one
described above.
Fixes: 7246a4520b4b ("PCI: Use preserve_config in place of pci_flags")
Signed-off-by: Bo Sun <Bo.Sun.CN(a)windriver.com>
---
drivers/pci/controller/pci-host-common.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index cf5f59a745b3..615923acbc3e 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -73,6 +73,10 @@ int pci_host_common_probe(struct platform_device *pdev)
if (IS_ERR(cfg))
return PTR_ERR(cfg);
+ /* Do not reassign resources if probe only */
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->msi_domain = true;
--
2.48.1
From: Steven Rostedt <rostedt(a)goodmis.org>
Tracing tools like perf and trace-cmd read the /sys/kernel/tracing/events/*/*/format
files to know how to parse the data and also how to print it. For the
"print fmt" portion of that file, if anything uses an enum that is not
exported to the tracing system, user space will not be able to parse it.
The GFP flags use to be defines, and defines get translated in the print
fmt sections. But now they are converted to use enums, which is not.
The mm_page_alloc trace event format use to have:
print fmt: "page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s",
REC->pfn != -1UL ? (((struct page *)vmemmap_base) + (REC->pfn)) : ((void
*)0), REC->pfn != -1UL ? REC->pfn : 0, REC->order, REC->migratetype,
(REC->gfp_flags) ? __print_flags(REC->gfp_flags, "|", {( unsigned
long)(((((((( gfp_t)(0x400u|0x800u)) | (( gfp_t)0x40u) | (( gfp_t)0x80u) |
(( gfp_t)0x100000u)) | (( gfp_t)0x02u)) | (( gfp_t)0x08u) | (( gfp_t)0)) |
(( gfp_t)0x40000u) | (( gfp_t)0x80000u) | (( gfp_t)0x2000u)) & ~((
gfp_t)(0x400u|0x800u))) | (( gfp_t)0x400u)), "GFP_TRANSHUGE"}, {( unsigned
long)((((((( gfp_t)(0x400u|0x800u)) | (( gfp_t)0x40u) | (( gfp_t)0x80u) |
(( gfp_t)0x100000u)) | (( gfp_t)0x02u)) | (( gfp_t)0x08u) | (( gfp_t)0)) ...
Where the GFP values are shown and not their names. But after the GFP
flags were converted to use enums, it has:
print fmt: "page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s",
REC->pfn != -1UL ? (vmemmap + (REC->pfn)) : ((void *)0), REC->pfn != -1UL
? REC->pfn : 0, REC->order, REC->migratetype, (REC->gfp_flags) ?
__print_flags(REC->gfp_flags, "|", {( unsigned long)((((((((
gfp_t)(((((1UL))) << (___GFP_DIRECT_RECLAIM_BIT))|((((1UL))) <<
(___GFP_KSWAPD_RECLAIM_BIT)))) | (( gfp_t)((((1UL))) << (___GFP_IO_BIT)))
| (( gfp_t)((((1UL))) << (___GFP_FS_BIT))) | (( gfp_t)((((1UL))) <<
(___GFP_HARDWALL_BIT)))) | (( gfp_t)((((1UL))) << (___GFP_HIGHMEM_BIT))))
| (( gfp_t)((((1UL))) << (___GFP_MOVABLE_BIT))) | (( gfp_t)0)) | ((
gfp_t)((((1UL))) << (___GFP_COMP_BIT))) ...
Where the enums names like ___GFP_KSWAPD_RECLAIM_BIT are shown and not their
values. User space has no way to convert these names to their values and
the output will fail to parse. What is shown is now:
mm_page_alloc: page=0xffffffff981685f3 pfn=0x1d1ac1 order=0 migratetype=1 gfp_flags=0x140cca
The TRACE_DEFINE_ENUM() macro was created to handle enums in the print fmt
files. This causes them to be replaced at boot up with the numbers, so
that user space tooling can parse it. By using this macro, the output is
back to the human readable:
mm_page_alloc: page=0xffffffff981685f3 pfn=0x122233 order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_COMP
Cc: stable(a)vger.kernel.org
Reported-by: Michael Petlan <mpetlan(a)redhat.com>
Closes: https://lore.kernel.org/all/87be5f7c-1a0-dad-daa0-54e342efaea7@redhat.com/
Fixes: 772dd0342727c ("mm: enumerate all gfp flags")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
Changes since v1: https://lore.kernel.org/20250116132359.1f20cdec@gandalf.local.home
- Moved the updates to only include/trace/events/mmflags.h
- Removed the macro call in include/trace/events/kmem.h
- Use an internal TRACE_GFP_EM() macro in TRACE_GFP_FLAGS to allow
it to be expanded later for use with the __def_gfpflags_names()
macro
include/trace/events/mmflags.h | 63 ++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index bb8a59c6caa2..d36c857dd249 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -13,6 +13,69 @@
* Thus most bits set go first.
*/
+/* These define the values that are enums (the bits) */
+#define TRACE_GFP_FLAGS_GENERAL \
+ TRACE_GFP_EM(DMA) \
+ TRACE_GFP_EM(HIGHMEM) \
+ TRACE_GFP_EM(DMA32) \
+ TRACE_GFP_EM(MOVABLE) \
+ TRACE_GFP_EM(RECLAIMABLE) \
+ TRACE_GFP_EM(HIGH) \
+ TRACE_GFP_EM(IO) \
+ TRACE_GFP_EM(FS) \
+ TRACE_GFP_EM(ZERO) \
+ TRACE_GFP_EM(DIRECT_RECLAIM) \
+ TRACE_GFP_EM(KSWAPD_RECLAIM) \
+ TRACE_GFP_EM(WRITE) \
+ TRACE_GFP_EM(NOWARN) \
+ TRACE_GFP_EM(RETRY_MAYFAIL) \
+ TRACE_GFP_EM(NOFAIL) \
+ TRACE_GFP_EM(NORETRY) \
+ TRACE_GFP_EM(MEMALLOC) \
+ TRACE_GFP_EM(COMP) \
+ TRACE_GFP_EM(NOMEMALLOC) \
+ TRACE_GFP_EM(HARDWALL) \
+ TRACE_GFP_EM(THISNODE) \
+ TRACE_GFP_EM(ACCOUNT) \
+ TRACE_GFP_EM(ZEROTAGS)
+
+#ifdef CONFIG_KASAN_HW_TAGS
+# define TRACE_GFP_FLAGS_KASAN \
+ TRACE_GFP_EM(SKIP_ZERO) \
+ TRACE_GFP_EM(SKIP_KASAN)
+#else
+# define TRACE_GFP_FLAGS_KASAN
+#endif
+
+#ifdef CONFIG_LOCKDEP
+# define TRACE_GFP_FLAGS_LOCKDEP \
+ TRACE_GFP_EM(NOLOCKDEP)
+#else
+# define TRACE_GFP_FLAGS_LOCKDEP
+#endif
+
+#ifdef CONFIG_SLAB_OBJ_EXT
+# define TRACE_GFP_FLAGS_SLAB \
+ TRACE_GFP_EM(NO_OBJ_EXT)
+#else
+# define TRACE_GFP_FLAGS_SLAB
+#endif
+
+#define TRACE_GFP_FLAGS \
+ TRACE_GFP_FLAGS_GENERAL \
+ TRACE_GFP_FLAGS_KASAN \
+ TRACE_GFP_FLAGS_LOCKDEP \
+ TRACE_GFP_FLAGS_SLAB
+
+#undef TRACE_GFP_EM
+#define TRACE_GFP_EM(a) TRACE_DEFINE_ENUM(___GFP_##a##_BIT);
+
+TRACE_GFP_FLAGS
+
+/* Just in case these are ever used */
+TRACE_DEFINE_ENUM(___GFP_UNUSED_BIT);
+TRACE_DEFINE_ENUM(___GFP_LAST_BIT);
+
#define gfpflag_string(flag) {(__force unsigned long)flag, #flag}
#define __def_gfpflag_names \
--
2.45.2
On removal of the device or unloading of the kernel module a potential NULL
pointer dereference occurs.
The following sequence deletes the interface:
brcmf_detach()
brcmf_remove_interface()
brcmf_del_if()
Inside the brcmf_del_if() function the drvr->if2bss[ifidx] is updated to
BRCMF_BSSIDX_INVALID (-1) if the bsscfgidx matches.
After brcmf_remove_interface() call the brcmf_proto_detach() function is
called providing the following sequence:
brcmf_detach()
brcmf_proto_detach()
brcmf_proto_msgbuf_detach()
brcmf_flowring_detach()
brcmf_msgbuf_delete_flowring()
brcmf_msgbuf_remove_flowring()
brcmf_flowring_delete()
brcmf_get_ifp()
brcmf_txfinalize()
Since brcmf_get_ip() can and actually will return NULL in this case the
call to brcmf_txfinalize() will result in a NULL pointer dereference inside
brcmf_txfinalize() when trying to update ifp->ndev->stats.tx_errors.
This will only happen if a flowring still has an skb.
Although the NULL pointer dereference has only been seen when trying to
update the tx statistic, all other uses of the ifp pointer have been
guarded as well with an early return if ifp is NULL.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marcel Hamer <marcel.hamer(a)windriver.com>
Link: https://lore.kernel.org/all/b519e746-ddfd-421f-d897-7620d229e4b2@gmail.com/
---
v3:
- after review return early from brcmf_txfinalize() if ifp is NULL
- after review improve the subject line
v2: https://lore.kernel.org/linux-wireless/20250110134502.824722-1-marcel.hamer…
- guard all uses of the ifp pointer inside brcmf_txfinalize()
v1: https://lore.kernel.org/linux-wireless/20250109155201.3661298-1-marcel.hame…
- initial version
---
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index c3a57e30c855..3d63010ae079 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -540,6 +540,11 @@ void brcmf_txfinalize(struct brcmf_if *ifp, struct sk_buff *txp, bool success)
struct ethhdr *eh;
u16 type;
+ if (!ifp) {
+ brcmu_pkt_buf_free_skb(txp);
+ return;
+ }
+
eh = (struct ethhdr *)(txp->data);
type = ntohs(eh->h_proto);
--
2.34.1
From: Gui-Dong Han <2045gemini(a)gmail.com>
[ Upstream commit dfd2bf436709b2bccb78c2dda550dde93700efa7 ]
In raid5_cache_count():
if (conf->max_nr_stripes < conf->min_nr_stripes)
return 0;
return conf->max_nr_stripes - conf->min_nr_stripes;
The current check is ineffective, as the values could change immediately
after being checked.
In raid5_set_cache_size():
...
conf->min_nr_stripes = size;
...
while (size > conf->max_nr_stripes)
conf->min_nr_stripes = conf->max_nr_stripes;
...
Due to intermediate value updates in raid5_set_cache_size(), concurrent
execution of raid5_cache_count() and raid5_set_cache_size() may lead to
inconsistent reads of conf->max_nr_stripes and conf->min_nr_stripes.
The current checks are ineffective as values could change immediately
after being checked, raising the risk of conf->min_nr_stripes exceeding
conf->max_nr_stripes and potentially causing an integer overflow.
This possible bug is found by an experimental static analysis tool
developed by our team. This tool analyzes the locking APIs to extract
function pairs that can be concurrently executed, and then analyzes the
instructions in the paired functions to identify possible concurrency bugs
including data races and atomicity violations. The above possible bug is
reported when our tool analyzes the source code of Linux 6.2.
To resolve this issue, it is suggested to introduce local variables
'min_stripes' and 'max_stripes' in raid5_cache_count() to ensure the
values remain stable throughout the check. Adding locks in
raid5_cache_count() fails to resolve atomicity violations, as
raid5_set_cache_size() may hold intermediate values of
conf->min_nr_stripes while unlocked. With this patch applied, our tool no
longer reports the bug, with the kernel configuration allyesconfig for
x86_64. Due to the lack of associated hardware, we cannot test the patch
in runtime testing, and just verify it according to the code logic.
Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <2045gemini(a)gmail.com>
Reviewed-by: Yu Kuai <yukuai3(a)huawei.com>
Signed-off-by: Song Liu <song(a)kernel.org>
Link: https://lore.kernel.org/r/20240112071017.16313-1-2045gemini@gmail.com
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
Backport to fix CVE-2024-23307
Link: https://www.cve.org/CVERecord/?id=CVE-2024-23307
---
drivers/md/raid5.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7cdc6f20f50436..b1f038d71a4015 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2349,7 +2349,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
atomic_inc(&conf->active_stripes);
raid5_release_stripe(sh);
- conf->max_nr_stripes++;
+ WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1);
return 1;
}
@@ -2646,7 +2646,7 @@ static int drop_one_stripe(struct r5conf *conf)
shrink_buffers(sh);
free_stripe(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
- conf->max_nr_stripes--;
+ WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1);
return 1;
}
@@ -6575,7 +6575,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
if (size <= 16 || size > 32768)
return -EINVAL;
- conf->min_nr_stripes = size;
+ WRITE_ONCE(conf->min_nr_stripes, size);
mutex_lock(&conf->cache_size_mutex);
while (size < conf->max_nr_stripes &&
drop_one_stripe(conf))
@@ -6587,7 +6587,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL)) {
- conf->min_nr_stripes = conf->max_nr_stripes;
+ WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes);
result = -ENOMEM;
break;
}
@@ -7151,11 +7151,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+ int max_stripes = READ_ONCE(conf->max_nr_stripes);
+ int min_stripes = READ_ONCE(conf->min_nr_stripes);
- if (conf->max_nr_stripes < conf->min_nr_stripes)
+ if (max_stripes < min_stripes)
/* unlikely, but not impossible */
return 0;
- return conf->max_nr_stripes - conf->min_nr_stripes;
+ return max_stripes - min_stripes;
}
static struct r5conf *setup_conf(struct mddev *mddev)
--
2.33.8
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
In the sh-sci driver, serial ports are mapped to the sci_ports[] array,
with earlycon mapped at index zero.
The uart_add_one_port() function eventually calls __device_attach(),
which, in turn, calls pm_request_idle(). The identified code path is as
follows:
uart_add_one_port() ->
serial_ctrl_register_port() ->
serial_core_register_port() ->
serial_core_port_device_add() ->
serial_base_port_add() ->
device_add() ->
bus_probe_device() ->
device_initial_probe() ->
__device_attach() ->
// ...
if (dev->p->dead) {
// ...
} else if (dev->driver) {
// ...
} else {
// ...
pm_request_idle(dev);
// ...
}
The earlycon device clocks are enabled by the bootloader. However, the
pm_request_idle() call in __device_attach() disables the SCI port clocks
while earlycon is still active.
The earlycon write function, serial_console_write(), calls
sci_poll_put_char() via serial_console_putchar(). If the SCI port clocks
are disabled, writing to earlycon may sometimes cause the SR.TDFE bit to
remain unset indefinitely, causing the while loop in sci_poll_put_char()
to never exit. On single-core SoCs, this can result in the system being
blocked during boot when this issue occurs.
To resolve this, increment the runtime PM usage counter for the earlycon
SCI device before registering the UART port.
Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
Changes since RFT:
- used spaced instead of tabs in the call trace from patch description
- moved the comment in the code block started by
if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start)
- still kept the sci_ports[0].port.mapbase == sci_res->start check
as I haven't manage to find a better way
drivers/tty/serial/sh-sci.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index e64d59888ecd..b1ea48f38248 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -3436,6 +3436,22 @@ static int sci_probe_single(struct platform_device *dev,
}
if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start) {
+ /*
+ * In case:
+ * - this is the earlycon port (mapped on index 0 in sci_ports[]) and
+ * - it now maps to an alias other than zero and
+ * - the earlycon is still alive (e.g., "earlycon keep_bootcon" is
+ * available in bootargs)
+ *
+ * we need to avoid disabling clocks and PM domains through the runtime
+ * PM APIs called in __device_attach(). For this, increment the runtime
+ * PM reference counter (the clocks and PM domains were already enabled
+ * by the bootloader). Otherwise the earlycon may access the HW when it
+ * has no clocks enabled leading to failures (infinite loop in
+ * sci_poll_put_char()).
+ */
+ pm_runtime_get_noresume(&dev->dev);
+
/*
* Skip cleanup the sci_port[0] in early_console_exit(), this
* port is the same as the earlycon one.
--
2.43.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
The early_console_setup() function initializes sci_ports[0].port with an
object of type struct uart_port obtained from the struct earlycon_device
passed as an argument to early_console_setup().
Later, during serial port probing, the serial port used as earlycon
(e.g., port A) might be remapped to a different position in the sci_ports[]
array, and a different serial port (e.g., port B) might be assigned to slot
0. For example:
sci_ports[0] = port B
sci_ports[X] = port A
In this scenario, the new port mapped at index zero (port B) retains the
data associated with the earlycon configuration. Consequently, after the
Linux boot process, any access to the serial port now mapped to
sci_ports[0] (port B) will block the original earlycon port (port A).
To address this, introduce an early_console_exit() function to clean up
sci_ports[0] when earlycon is exited.
To prevent the cleanup of sci_ports[0] while the serial device is still
being used by earlycon, introduce the struct sci_port::probing flag and
account for it in early_console_exit().
Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
Changes since RFT:
- converted "probing" member of struct sci_port to a local variable
(named sci_uart_earlycon_dev_probing
- used sci_uart_earlycon instead of sci_port::earlycon from RFT
version
- dropped the double "up" in the added comment
- changed the cleanup condition in early_console_exit() to
if (!sci_uart_earlycon_dev_probing)
- set sci_uart_earlycon = false in early_console_exit()
drivers/tty/serial/sh-sci.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index b85a9d425f7e..e64d59888ecd 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -166,6 +166,7 @@ static struct sci_port sci_ports[SCI_NPORTS];
static unsigned long sci_ports_in_use;
static struct uart_driver sci_uart_driver;
static bool sci_uart_earlycon;
+static bool sci_uart_earlycon_dev_probing;
static inline struct sci_port *
to_sci_port(struct uart_port *uart)
@@ -3386,7 +3387,8 @@ static struct plat_sci_port *sci_parse_dt(struct platform_device *pdev,
static int sci_probe_single(struct platform_device *dev,
unsigned int index,
struct plat_sci_port *p,
- struct sci_port *sciport)
+ struct sci_port *sciport,
+ struct resource *sci_res)
{
int ret;
@@ -3433,6 +3435,14 @@ static int sci_probe_single(struct platform_device *dev,
sciport->port.flags |= UPF_HARD_FLOW;
}
+ if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start) {
+ /*
+ * Skip cleanup the sci_port[0] in early_console_exit(), this
+ * port is the same as the earlycon one.
+ */
+ sci_uart_earlycon_dev_probing = true;
+ }
+
return uart_add_one_port(&sci_uart_driver, &sciport->port);
}
@@ -3491,7 +3501,7 @@ static int sci_probe(struct platform_device *dev)
platform_set_drvdata(dev, sp);
- ret = sci_probe_single(dev, dev_id, p, sp);
+ ret = sci_probe_single(dev, dev_id, p, sp, res);
if (ret)
return ret;
@@ -3574,6 +3584,22 @@ sh_early_platform_init_buffer("earlyprintk", &sci_driver,
#ifdef CONFIG_SERIAL_SH_SCI_EARLYCON
static struct plat_sci_port port_cfg;
+static int early_console_exit(struct console *co)
+{
+ struct sci_port *sci_port = &sci_ports[0];
+
+ /*
+ * Clean the slot used by earlycon. A new SCI device might
+ * map to this slot.
+ */
+ if (!sci_uart_earlycon_dev_probing) {
+ memset(sci_port, 0, sizeof(*sci_port));
+ sci_uart_earlycon = false;
+ }
+
+ return 0;
+}
+
static int __init early_console_setup(struct earlycon_device *device,
int type)
{
@@ -3591,6 +3617,8 @@ static int __init early_console_setup(struct earlycon_device *device,
SCSCR_RE | SCSCR_TE | port_cfg.scscr);
device->con->write = serial_console_write;
+ device->con->exit = early_console_exit;
+
return 0;
}
static int __init sci_early_console_setup(struct earlycon_device *device,
--
2.43.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
In the sh-sci driver, sci_ports[0] is used by earlycon. If the earlycon is
still active when sci_probe() is called and the new serial port is supposed
to map to sci_ports[0], return -EBUSY to prevent breaking the earlycon.
This situation should occurs in debug scenarios, and users should be
aware of the potential conflict.
Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
Chances since RFT:
- converted the earlycon member of struct sci_port to a local variable
- added sp == &sci_ports[0] check in sci_probe() to be sure the code
is checking against the sci_port used as earlycon
- changed res->start != sp->port.mapbase condition to
sp->port.mapbase != res->start to use the same pattern as used in
patch 4/5
drivers/tty/serial/sh-sci.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 51382e354a2d..b85a9d425f7e 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -165,6 +165,7 @@ struct sci_port {
static struct sci_port sci_ports[SCI_NPORTS];
static unsigned long sci_ports_in_use;
static struct uart_driver sci_uart_driver;
+static bool sci_uart_earlycon;
static inline struct sci_port *
to_sci_port(struct uart_port *uart)
@@ -3438,6 +3439,7 @@ static int sci_probe_single(struct platform_device *dev,
static int sci_probe(struct platform_device *dev)
{
struct plat_sci_port *p;
+ struct resource *res;
struct sci_port *sp;
unsigned int dev_id;
int ret;
@@ -3467,6 +3469,26 @@ static int sci_probe(struct platform_device *dev)
}
sp = &sci_ports[dev_id];
+
+ /*
+ * In case:
+ * - the probed port alias is zero (as the one used by earlycon), and
+ * - the earlycon is still active (e.g., "earlycon keep_bootcon" in
+ * bootargs)
+ *
+ * defer the probe of this serial. This is a debug scenario and the user
+ * must be aware of it.
+ *
+ * Except when the probed port is the same as the earlycon port.
+ */
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ if (sci_uart_earlycon && sp == &sci_ports[0] && sp->port.mapbase != res->start)
+ return dev_err_probe(&dev->dev, -EBUSY, "sci_port[0] is used by earlycon!\n");
+
platform_set_drvdata(dev, sp);
ret = sci_probe_single(dev, dev_id, p, sp);
@@ -3563,6 +3585,7 @@ static int __init early_console_setup(struct earlycon_device *device,
port_cfg.type = type;
sci_ports[0].cfg = &port_cfg;
sci_ports[0].params = sci_probe_regmap(&port_cfg);
+ sci_uart_earlycon = true;
port_cfg.scscr = sci_serial_in(&sci_ports[0].port, SCSCR);
sci_serial_out(&sci_ports[0].port, SCSCR,
SCSCR_RE | SCSCR_TE | port_cfg.scscr);
--
2.43.0
msi_db_mask is of type 'u64', still the standard 'int' arithmetic is
performed to compute its value.
While most of the ntb_hw drivers actually don't utilize the higher 32
bits of the doorbell mask now, this may be the case for Switchtec - see
switchtec_ntb_init_db().
Found by Linux Verification Center (linuxtesting.org) with SVACE static
analysis tool.
Fixes: 2b0569b3b7e6 ("NTB: Add MSI interrupt support to ntb_transport")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
drivers/ntb/ntb_transport.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index a22ea4a4b202..4f775c3e218f 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1353,7 +1353,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
qp_count = ilog2(qp_bitmap);
if (nt->use_msi) {
qp_count -= 1;
- nt->msi_db_mask = 1 << qp_count;
+ nt->msi_db_mask = BIT_ULL(qp_count);
ntb_db_clear_mask(ndev, nt->msi_db_mask);
}
--
2.39.5
From: Kang Yang <quic_kangyang(a)quicinc.com>
commit 95c38953cb1ecf40399a676a1f85dfe2b5780a9a upstream.
When running 'rmmod ath10k', ath10k_sdio_remove() will free sdio
workqueue by destroy_workqueue(). But if CONFIG_INIT_ON_FREE_DEFAULT_ON
is set to yes, kernel panic will happen:
Call trace:
destroy_workqueue+0x1c/0x258
ath10k_sdio_remove+0x84/0x94
sdio_bus_remove+0x50/0x16c
device_release_driver_internal+0x188/0x25c
device_driver_detach+0x20/0x2c
This is because during 'rmmod ath10k', ath10k_sdio_remove() will call
ath10k_core_destroy() before destroy_workqueue(). wiphy_dev_release()
will finally be called in ath10k_core_destroy(). This function will free
struct cfg80211_registered_device *rdev and all its members, including
wiphy, dev and the pointer of sdio workqueue. Then the pointer of sdio
workqueue will be set to NULL due to CONFIG_INIT_ON_FREE_DEFAULT_ON.
After device release, destroy_workqueue() will use NULL pointer then the
kernel panic happen.
Call trace:
ath10k_sdio_remove
->ath10k_core_unregister
……
->ath10k_core_stop
->ath10k_hif_stop
->ath10k_sdio_irq_disable
->ath10k_hif_power_down
->del_timer_sync(&ar_sdio->sleep_timer)
->ath10k_core_destroy
->ath10k_mac_destroy
->ieee80211_free_hw
->wiphy_free
……
->wiphy_dev_release
->destroy_workqueue
Need to call destroy_workqueue() before ath10k_core_destroy(), free
the work queue buffer first and then free pointer of work queue by
ath10k_core_destroy(). This order matches the error path order in
ath10k_sdio_probe().
No work will be queued on sdio workqueue between it is destroyed and
ath10k_core_destroy() is called. Based on the call_stack above, the
reason is:
Only ath10k_sdio_sleep_timer_handler(), ath10k_sdio_hif_tx_sg() and
ath10k_sdio_irq_disable() will queue work on sdio workqueue.
Sleep timer will be deleted before ath10k_core_destroy() in
ath10k_hif_power_down().
ath10k_sdio_irq_disable() only be called in ath10k_hif_stop().
ath10k_core_unregister() will call ath10k_hif_power_down() to stop hif
bus, so ath10k_sdio_hif_tx_sg() won't be called anymore.
Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00189
Signed-off-by: Kang Yang <quic_kangyang(a)quicinc.com>
Tested-by: David Ruth <druth(a)chromium.org>
Reviewed-by: David Ruth <druth(a)chromium.org>
Link: https://patch.msgid.link/20241008022246.1010-1-quic_kangyang@quicinc.com
Signed-off-by: Jeff Johnson <quic_jjohnson(a)quicinc.com>
Signed-off-by: Bin Lan <lanbincn(a)qq.com>
---
drivers/net/wireless/ath/ath10k/sdio.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 79e09c7a82b3..886070b2a722 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -3,6 +3,7 @@
* Copyright (c) 2004-2011 Atheros Communications Inc.
* Copyright (c) 2011-2012,2017 Qualcomm Atheros, Inc.
* Copyright (c) 2016-2017 Erik Stromdahl <erik.stromdahl(a)gmail.com>
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
@@ -2647,9 +2648,9 @@ static void ath10k_sdio_remove(struct sdio_func *func)
netif_napi_del(&ar->napi);
- ath10k_core_destroy(ar);
-
destroy_workqueue(ar_sdio->workqueue);
+
+ ath10k_core_destroy(ar);
}
static const struct sdio_device_id ath10k_sdio_devices[] = {
--
2.43.0
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Commit 11c60f23ed13 ("integrity: Remove unused macro
IMA_ACTION_RULE_FLAGS") removed the IMA_ACTION_RULE_FLAGS mask, due to it
not being used after commit 0d73a55208e9 ("ima: re-introduce own integrity
cache lock").
However, it seems that the latter commit mistakenly used the wrong mask
when moving the code from ima_inode_post_setattr() to
process_measurement(). There is no mention in the commit message about this
change and it looks quite important, since changing from IMA_ACTIONS_FLAGS
(later renamed to IMA_NONACTION_FLAGS) to IMA_ACTION_RULE_FLAGS was done by
commit 42a4c603198f0 ("ima: fix ima_inode_post_setattr").
Restore the original change, but with new mask 0xfb000000 since the
policy-specific flags changed meanwhile, and rename IMA_ACTION_RULE_FLAGS
to IMA_NONACTION_RULE_FLAGS, to be consistent with IMA_NONACTION_FLAGS.
Cc: stable(a)vger.kernel.org # v4.16.x
Fixes: 11c60f23ed13 ("integrity: Remove unused macro IMA_ACTION_RULE_FLAGS")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/integrity/ima/ima.h | 1 +
security/integrity/ima/ima_main.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 22c3b87cfcac..32ffef2cc92a 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -141,6 +141,7 @@ struct ima_kexec_hdr {
/* IMA iint policy rule cache flags */
#define IMA_NONACTION_FLAGS 0xff000000
+#define IMA_NONACTION_RULE_FLAGS 0xfb000000
#define IMA_DIGSIG_REQUIRED 0x01000000
#define IMA_PERMIT_DIRECTIO 0x02000000
#define IMA_NEW_FILE 0x04000000
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 712c3a522e6c..83e467ad18d4 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -277,7 +277,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
/* reset appraisal flags if ima_inode_post_setattr was called */
iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED |
IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK |
- IMA_NONACTION_FLAGS);
+ IMA_NONACTION_RULE_FLAGS);
/*
* Re-evaulate the file if either the xattr has changed or the
--
2.47.0.118.gfd3785337b
This is a note to let you know that I've just added the patch titled
iio: dac: ad3552r-hs: clear reset status flag
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 012b8276f08a67b9f2e2fd0f35363ae4a75e5267 Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <adureghello(a)baylibre.com>
Date: Wed, 8 Jan 2025 18:29:16 +0100
Subject: iio: dac: ad3552r-hs: clear reset status flag
Clear reset status flag, to keep error status register
clean after reset (ad3552r manual, rev B table 38).
Reset error flag was left to 1, so debugging registers, the
"Error Status Register" was dirty (0x01). It is important
to clear this bit, so if there is any reset event over normal
working mode, it is possible to detect it.
Fixes: 0b4d9fe58be8 ("iio: dac: ad3552r: add high-speed platform driver")
Signed-off-by: Angelo Dureghello <adureghello(a)baylibre.com>
Reviewed-by: David Lechner <dlechner(a)baylibre.com>
Link: https://patch.msgid.link/20250108-wip-bl-ad3552r-axi-v0-iio-testing-carlos-…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/dac/ad3552r-hs.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c
index 216c634f3eaf..8974df625670 100644
--- a/drivers/iio/dac/ad3552r-hs.c
+++ b/drivers/iio/dac/ad3552r-hs.c
@@ -329,6 +329,12 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st)
dev_info(st->dev, "Chip ID error. Expected 0x%x, Read 0x%x\n",
AD3552R_ID, id);
+ /* Clear reset error flag, see ad3552r manual, rev B table 38. */
+ ret = st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_ERR_STATUS,
+ AD3552R_MASK_RESET_STATUS, 1);
+ if (ret)
+ return ret;
+
ret = st->data->bus_reg_write(st->back,
AD3552R_REG_ADDR_SH_REFERENCE_CONFIG,
0, 1);
--
2.48.1
This is a note to let you know that I've just added the patch titled
iio: dac: ad3552r-common: fix ad3541/2r ranges
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 1e758b613212b6964518a67939535910b5aee831 Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <adureghello(a)baylibre.com>
Date: Wed, 8 Jan 2025 18:29:15 +0100
Subject: iio: dac: ad3552r-common: fix ad3541/2r ranges
Fix ad3541/2r voltage ranges to be as per ad3542r datasheet,
rev. C, table 38 (page 57).
The wrong ad354xr ranges was generating erroneous Vpp output.
In more details:
- fix wrong number of ranges, they are 5 ranges, not 6,
- remove non-existent 0-3V range,
- adjust order, since ad3552r_find_range() get a wrong index,
producing a wrong Vpp as output.
Retested all the ranges on real hardware, EVALAD3542RFMCZ:
adi,output-range-microvolt (fdt):
<(000000) (2500000)>; ok (Rfbx1, switch 10)
<(000000) (5000000)>; ok (Rfbx1, switch 10)
<(000000) (10000000)>; ok (Rfbx1, switch 10)
<(-5000000) (5000000)>; ok (Rfbx2, switch +/- 5)
<(-2500000) (7500000)>; ok (Rfbx2, switch -2.5/7.5)
Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support")
Signed-off-by: Angelo Dureghello <adureghello(a)baylibre.com>
Reviewed-by: David Lechner <dlechner(a)baylibre.com>
Link: https://patch.msgid.link/20250108-wip-bl-ad3552r-axi-v0-iio-testing-carlos-…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/dac/ad3552r-common.c | 5 ++---
drivers/iio/dac/ad3552r.h | 8 +++-----
2 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/drivers/iio/dac/ad3552r-common.c b/drivers/iio/dac/ad3552r-common.c
index 0f495df2e5ce..03e0864f5084 100644
--- a/drivers/iio/dac/ad3552r-common.c
+++ b/drivers/iio/dac/ad3552r-common.c
@@ -22,11 +22,10 @@ EXPORT_SYMBOL_NS_GPL(ad3552r_ch_ranges, "IIO_AD3552R");
const s32 ad3542r_ch_ranges[AD3542R_MAX_RANGES][2] = {
[AD3542R_CH_OUTPUT_RANGE_0__2P5V] = { 0, 2500 },
- [AD3542R_CH_OUTPUT_RANGE_0__3V] = { 0, 3000 },
[AD3542R_CH_OUTPUT_RANGE_0__5V] = { 0, 5000 },
[AD3542R_CH_OUTPUT_RANGE_0__10V] = { 0, 10000 },
- [AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V] = { -2500, 7500 },
- [AD3542R_CH_OUTPUT_RANGE_NEG_5__5V] = { -5000, 5000 }
+ [AD3542R_CH_OUTPUT_RANGE_NEG_5__5V] = { -5000, 5000 },
+ [AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V] = { -2500, 7500 }
};
EXPORT_SYMBOL_NS_GPL(ad3542r_ch_ranges, "IIO_AD3552R");
diff --git a/drivers/iio/dac/ad3552r.h b/drivers/iio/dac/ad3552r.h
index fd5a3dfd1d1c..4b5581039ae9 100644
--- a/drivers/iio/dac/ad3552r.h
+++ b/drivers/iio/dac/ad3552r.h
@@ -131,7 +131,7 @@
#define AD3552R_CH1_ACTIVE BIT(1)
#define AD3552R_MAX_RANGES 5
-#define AD3542R_MAX_RANGES 6
+#define AD3542R_MAX_RANGES 5
#define AD3552R_QUAD_SPI 2
extern const s32 ad3552r_ch_ranges[AD3552R_MAX_RANGES][2];
@@ -189,16 +189,14 @@ enum ad3552r_ch_vref_select {
enum ad3542r_ch_output_range {
/* Range from 0 V to 2.5 V. Requires Rfb1x connection */
AD3542R_CH_OUTPUT_RANGE_0__2P5V,
- /* Range from 0 V to 3 V. Requires Rfb1x connection */
- AD3542R_CH_OUTPUT_RANGE_0__3V,
/* Range from 0 V to 5 V. Requires Rfb1x connection */
AD3542R_CH_OUTPUT_RANGE_0__5V,
/* Range from 0 V to 10 V. Requires Rfb2x connection */
AD3542R_CH_OUTPUT_RANGE_0__10V,
- /* Range from -2.5 V to 7.5 V. Requires Rfb2x connection */
- AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V,
/* Range from -5 V to 5 V. Requires Rfb2x connection */
AD3542R_CH_OUTPUT_RANGE_NEG_5__5V,
+ /* Range from -2.5 V to 7.5 V. Requires Rfb2x connection */
+ AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V,
};
enum ad3552r_ch_output_range {
--
2.48.1
This is a note to let you know that I've just added the patch titled
iio: chemical: bme680: Fix uninitialized variable in
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 20eb1fae4145bc45717aa8a6d05fcd6a64ed856a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Wed, 8 Jan 2025 12:37:22 +0300
Subject: iio: chemical: bme680: Fix uninitialized variable in
__bme680_read_raw()
The bme680_read_temp() function takes a pointer to s16 but we're passing
an int pointer to it. This will not work on big endian systems and it
also means that the other 16 bits are uninitialized.
Pass an s16 type variable.
Fixes: f51171ce2236 ("iio: chemical: bme680: Add SCALE and RAW channels")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Acked-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Link: https://patch.msgid.link/4addb68c-853a-49fc-8d40-739e78db5fa1@stanley.mount…
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/chemical/bme680_core.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c
index d12270409c8a..a2949daf9467 100644
--- a/drivers/iio/chemical/bme680_core.c
+++ b/drivers/iio/chemical/bme680_core.c
@@ -874,11 +874,11 @@ static int bme680_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_RAW:
switch (chan->type) {
case IIO_TEMP:
- ret = bme680_read_temp(data, (s16 *)&chan_val);
+ ret = bme680_read_temp(data, &temp_chan_val);
if (ret)
return ret;
- *val = chan_val;
+ *val = temp_chan_val;
return IIO_VAL_INT;
case IIO_PRESSURE:
ret = bme680_read_press(data, &chan_val);
--
2.48.1
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 2f8dea1692eef2b7ba6a256246ed82c365fdc686
Gitweb: https://git.kernel.org/tip/2f8dea1692eef2b7ba6a256246ed82c365fdc686
Author: Koichiro Den <koichiro.den(a)canonical.com>
AuthorDate: Fri, 20 Dec 2024 22:44:21 +09:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 16 Jan 2025 13:06:14 +01:00
hrtimers: Handle CPU state correctly on hotplug
Consider a scenario where a CPU transitions from CPUHP_ONLINE to halfway
through a CPU hotunplug down to CPUHP_HRTIMERS_PREPARE, and then back to
CPUHP_ONLINE:
Since hrtimers_prepare_cpu() does not run, cpu_base.hres_active remains set
to 1 throughout. However, during a CPU unplug operation, the tick and the
clockevents are shut down at CPUHP_AP_TICK_DYING. On return to the online
state, for instance CFS incorrectly assumes that the hrtick is already
active, and the chance of the clockevent device to transition to oneshot
mode is also lost forever for the CPU, unless it goes back to a lower state
than CPUHP_HRTIMERS_PREPARE once.
This round-trip reveals another issue; cpu_base.online is not set to 1
after the transition, which appears as a WARN_ON_ONCE in enqueue_hrtimer().
Aside of that, the bulk of the per CPU state is not reset either, which
means there are dangling pointers in the worst case.
Address this by adding a corresponding startup() callback, which resets the
stale per CPU state and sets the online flag.
[ tglx: Make the new callback unconditionally available, remove the online
modification in the prepare() callback and clear the remaining
state in the starting callback instead of the prepare callback ]
Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
Signed-off-by: Koichiro Den <koichiro.den(a)canonical.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20241220134421.3809834-1-koichiro.den@canonical…
---
include/linux/hrtimer.h | 1 +
kernel/cpu.c | 2 +-
kernel/time/hrtimer.c | 11 ++++++++++-
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7ef5f7e..f7bfdcf 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -386,6 +386,7 @@ extern void __init hrtimers_init(void);
extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
+int hrtimers_cpu_starting(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_cpu_dying(unsigned int cpu);
#else
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b605334..0509a97 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2179,7 +2179,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
},
[CPUHP_AP_HRTIMERS_DYING] = {
.name = "hrtimers:dying",
- .startup.single = NULL,
+ .startup.single = hrtimers_cpu_starting,
.teardown.single = hrtimers_cpu_dying,
},
[CPUHP_AP_TICK_DYING] = {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 80fe374..030426c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2202,6 +2202,15 @@ int hrtimers_prepare_cpu(unsigned int cpu)
}
cpu_base->cpu = cpu;
+ hrtimer_cpu_base_init_expiry_lock(cpu_base);
+ return 0;
+}
+
+int hrtimers_cpu_starting(unsigned int cpu)
+{
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+
+ /* Clear out any left over state from a CPU down operation */
cpu_base->active_bases = 0;
cpu_base->hres_active = 0;
cpu_base->hang_detected = 0;
@@ -2210,7 +2219,6 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->online = 1;
- hrtimer_cpu_base_init_expiry_lock(cpu_base);
return 0;
}
@@ -2286,5 +2294,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
void __init hrtimers_init(void)
{
hrtimers_prepare_cpu(smp_processor_id());
+ hrtimers_cpu_starting(smp_processor_id());
open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
}
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: b729cc1ec21a5899b7879ccfbe1786664928d597
Gitweb: https://git.kernel.org/tip/b729cc1ec21a5899b7879ccfbe1786664928d597
Author: Frederic Weisbecker <frederic(a)kernel.org>
AuthorDate: Wed, 15 Jan 2025 00:15:04 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 16 Jan 2025 12:47:11 +01:00
timers/migration: Fix another race between hotplug and idle entry/exit
Commit 10a0e6f3d3db ("timers/migration: Move hierarchy setup into
cpuhotplug prepare callback") fixed a race between idle exit and CPU
hotplug up leading to a wrong "0" value migrator assigned to the top
level. However there is still a situation that remains unhandled:
[GRP0:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 0
/ \ \
0 1 2..7
idle idle idle
0) The system is fully idle.
[GRP0:0]
migrator = CPU 0
active = CPU 0
groupmask = 0
/ \ \
0 1 2..7
active idle idle
1) CPU 0 is activating. It has done the cmpxchg on the top's ->migr_state
but it hasn't yet returned to __walk_groups().
[GRP0:0]
migrator = CPU 0
active = CPU 0, CPU 1
groupmask = 0
/ \ \
0 1 2..7
active active idle
2) CPU 1 is activating. CPU 0 stays the migrator (still stuck in
__walk_groups(), delayed by #VMEXIT for example).
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 0
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 2 groupmask = 1
/ \ \
0 1 2..7 8
active active idle !online
3) CPU 8 is preparing to boot. CPUHP_TMIGR_PREPARE is being ran by CPU 1
which has created the GRP0:1 and the new top GRP1:0 connected to GRP0:1
and GRP0:0. The groupmask of GRP0:0 is now 2. CPU 1 hasn't yet
propagated its activation up to GRP1:0.
[GRP1:0]
migrator = 0 (!!!)
active = NONE
groupmask = 0
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 2 groupmask = 1
/ \ \
0 1 2..7 8
active active idle !online
4) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups()
returning from tmigr_cpu_active(). The new top GRP1:0 is visible and
fetched but the freshly updated groupmask of GRP0:0 may not be visible
due to lack of ordering! As a result tmigr_active_up() is called to
GRP0:0 with a child's groupmask of "0". This buggy "0" groupmask then
becomes the migrator for GRP1:0 forever. As a result, timers on a fully
idle system get ignored.
One possible fix would be to define TMIGR_NONE as "0" so that such a
race would have no effect. And after all TMIGR_NONE doesn't need to be
anything else. However this would leave an uncomfortable state machine
where gears happen not to break by chance but are vulnerable to future
modifications.
Keep TMIGR_NONE as is instead and pre-initialize to "1" the groupmask of
any newly created top level. This groupmask is guaranteed to be visible
upon fetching the corresponding group for the 1st time:
_ By the upcoming CPU thanks to CPU hotplug synchronization between the
control CPU (BP) and the booting one (AP).
_ By the control CPU since the groupmask and parent pointers are
initialized locally.
_ By all CPUs belonging to the same group than the control CPU because
they must wait for it to ever become idle before needing to walk to
the new top. The cmpcxhg() on ->migr_state then makes sure its
groupmask is visible.
With this pre-initialization, it is guaranteed that if a future top level
is linked to an old one, it is walked through with a valid groupmask.
Fixes: 10a0e6f3d3db ("timers/migration: Move hierarchy setup into cpuhotplug prepare callback")
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250114231507.21672-2-frederic@kernel.org
---
kernel/time/timer_migration.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 8d57f76..c8a8ea2 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -1487,6 +1487,21 @@ static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
s.seq = 0;
atomic_set(&group->migr_state, s.state);
+ /*
+ * If this is a new top-level, prepare its groupmask in advance.
+ * This avoids accidents where yet another new top-level is
+ * created in the future and made visible before the current groupmask.
+ */
+ if (list_empty(&tmigr_level_list[lvl])) {
+ group->groupmask = BIT(0);
+ /*
+ * The previous top level has prepared its groupmask already,
+ * simply account it as the first child.
+ */
+ if (lvl > 0)
+ group->num_children = 1;
+ }
+
timerqueue_init_head(&group->events);
timerqueue_init(&group->groupevt.nextevt);
group->groupevt.nextevt.expires = KTIME_MAX;
@@ -1550,8 +1565,20 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
raw_spin_lock_irq(&child->lock);
raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
+ if (activate) {
+ /*
+ * @child is the old top and @parent the new one. In this
+ * case groupmask is pre-initialized and @child already
+ * accounted, along with its new sibling corresponding to the
+ * CPU going up.
+ */
+ WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2);
+ } else {
+ /* Adding @child for the CPU going up to @parent. */
+ child->groupmask = BIT(parent->num_children++);
+ }
+
child->parent = parent;
- child->groupmask = BIT(parent->num_children++);
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: de3ced72a79280fefd680e5e101d8b9f03cfa1d7
Gitweb: https://git.kernel.org/tip/de3ced72a79280fefd680e5e101d8b9f03cfa1d7
Author: Frederic Weisbecker <frederic(a)kernel.org>
AuthorDate: Wed, 15 Jan 2025 00:15:05 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 16 Jan 2025 12:47:11 +01:00
timers/migration: Enforce group initialization visibility to tree walkers
Commit 2522c84db513 ("timers/migration: Fix another race between hotplug
and idle entry/exit") fixed yet another race between idle exit and CPU
hotplug up leading to a wrong "0" value migrator assigned to the top
level. However there is yet another situation that remains unhandled:
[GRP0:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 1
/ \ \
0 1 2..7
idle idle idle
0) The system is fully idle.
[GRP0:0]
migrator = CPU 0
active = CPU 0
groupmask = 1
/ \ \
0 1 2..7
active idle idle
1) CPU 0 is activating. It has done the cmpxchg on the top's ->migr_state
but it hasn't yet returned to __walk_groups().
[GRP0:0]
migrator = CPU 0
active = CPU 0, CPU 1
groupmask = 1
/ \ \
0 1 2..7
active active idle
2) CPU 1 is activating. CPU 0 stays the migrator (still stuck in
__walk_groups(), delayed by #VMEXIT for example).
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 1 groupmask = 2
/ \ \
0 1 2..7 8
active active idle !online
3) CPU 8 is preparing to boot. CPUHP_TMIGR_PREPARE is being ran by CPU 1
which has created the GRP0:1 and the new top GRP1:0 connected to GRP0:1
and GRP0:0. CPU 1 hasn't yet propagated its activation up to GRP1:0.
[GRP1:0]
migrator = GRP0:0
active = GRP0:0
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 1 groupmask = 2
/ \ \
0 1 2..7 8
active active idle !online
4) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups()
returning from tmigr_cpu_active(). The new top GRP1:0 is visible and
fetched and the pre-initialized groupmask of GRP0:0 is also visible.
As a result tmigr_active_up() is called to GRP1:0 with GRP0:0 as active
and migrator. CPU 0 is returning to __walk_groups() but suffers again
a #VMEXIT.
[GRP1:0]
migrator = GRP0:0
active = GRP0:0
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 1 groupmask = 2
/ \ \
0 1 2..7 8
active active idle !online
5) CPU 1 propagates its activation of GRP0:0 to GRP1:0. This has no
effect since CPU 0 did it already.
[GRP1:0]
migrator = GRP0:0
active = GRP0:0, GRP0:1
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = CPU 8
active = CPU 0, CPU1 active = CPU 8
groupmask = 1 groupmask = 2
/ \ \ \
0 1 2..7 8
active active idle active
6) CPU 1 links CPU 8 to its group. CPU 8 boots and goes through
CPUHP_AP_TMIGR_ONLINE which propagates activation.
[GRP2:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 1
/ \
[GRP1:0] [GRP1:1]
migrator = GRP0:0 migrator = TMIGR_NONE
active = GRP0:0, GRP0:1 active = NONE
groupmask = 1 groupmask = 2
/ \
[GRP0:0] [GRP0:1] [GRP0:2]
migrator = CPU 0 migrator = CPU 8 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = CPU 8 active = NONE
groupmask = 1 groupmask = 2 groupmask = 0
/ \ \ \
0 1 2..7 8 64
active active idle active !online
7) CPU 64 is booting. CPUHP_TMIGR_PREPARE is being ran by CPU 1
which has created the GRP1:1, GRP0:2 and the new top GRP2:0 connected to
GRP1:1 and GRP1:0. CPU 1 hasn't yet propagated its activation up to
GRP2:0.
[GRP2:0]
migrator = 0 (!!!)
active = NONE
groupmask = 1
/ \
[GRP1:0] [GRP1:1]
migrator = GRP0:0 migrator = TMIGR_NONE
active = GRP0:0, GRP0:1 active = NONE
groupmask = 1 groupmask = 2
/ \
[GRP0:0] [GRP0:1] [GRP0:2]
migrator = CPU 0 migrator = CPU 8 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = CPU 8 active = NONE
groupmask = 1 groupmask = 2 groupmask = 0
/ \ \ \
0 1 2..7 8 64
active active idle active !online
8) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups()
returning from tmigr_cpu_active(). The new top GRP2:0 is visible and
fetched but the pre-initialized groupmask of GRP1:0 is not because no
ordering made its initialization visible. As a result tmigr_active_up()
may be called to GRP2:0 with a "0" child's groumask. Leaving the timers
ignored for ever when the system is fully idle.
The race is highly theoretical and perhaps impossible in practice but
the groupmask of the child is not the only concern here as the whole
initialization of the child is not guaranteed to be visible to any
tree walker racing against hotplug (idle entry/exit, remote handling,
etc...). Although the current code layout seem to be resilient to such
hazards, this doesn't tell much about the future.
Fix this with enforcing address dependency between group initialization
and the write/read to the group's parent's pointer. Fortunately that
doesn't involve any barrier addition in the fast paths.
Fixes: 10a0e6f3d3db ("timers/migration: Move hierarchy setup into cpuhotplug prepare callback")
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250114231507.21672-3-frederic@kernel.org
---
kernel/time/timer_migration.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index c8a8ea2..371a62a 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -534,8 +534,13 @@ static void __walk_groups(up_f up, struct tmigr_walk *data,
break;
child = group;
- group = group->parent;
+ /*
+ * Pairs with the store release on group connection
+ * to make sure group initialization is visible.
+ */
+ group = READ_ONCE(group->parent);
data->childmask = child->groupmask;
+ WARN_ON_ONCE(!data->childmask);
} while (group);
}
@@ -1578,7 +1583,12 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
child->groupmask = BIT(parent->num_children++);
}
- child->parent = parent;
+ /*
+ * Make sure parent initialization is visible before publishing it to a
+ * racing CPU entering/exiting idle. This RELEASE barrier enforces an
+ * address dependency that pairs with the READ_ONCE() in __walk_groups().
+ */
+ smp_store_release(&child->parent, parent);
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
From: Jos Wang <joswang(a)lenovo.com>
As PD2.0 spec ("8.3.3.2.3 PE_SRC_Send_Capabilities state"), after the
Source receives the GoodCRC Message from the Sink in response to the
Source_Capabilities message, it should start the SenderResponseTimer,
after the timer times out, the state machine transitions to the
HARD_RESET state.
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
---
drivers/usb/typec/tcpm/tcpm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 460dbde9fe22..57fae1118ac9 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4821,7 +4821,7 @@ static void run_state_machine(struct tcpm_port *port)
port->caps_count = 0;
port->pd_capable = true;
tcpm_set_state_cond(port, SRC_SEND_CAPABILITIES_TIMEOUT,
- PD_T_SEND_SOURCE_CAP);
+ PD_T_SENDER_RESPONSE);
}
break;
case SRC_SEND_CAPABILITIES_TIMEOUT:
--
2.17.1
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: de7cf2540a9b69e8e057ac25aa3c6b33fca81978
Gitweb: https://git.kernel.org/tip/de7cf2540a9b69e8e057ac25aa3c6b33fca81978
Author: Frederic Weisbecker <frederic(a)kernel.org>
AuthorDate: Wed, 15 Jan 2025 00:15:04 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 16 Jan 2025 12:04:47 +01:00
timers/migration: Fix another race between hotplug and idle entry/exit
Commit 10a0e6f3d3db ("timers/migration: Move hierarchy setup into
cpuhotplug prepare callback") fixed a race between idle exit and CPU
hotplug up leading to a wrong "0" value migrator assigned to the top
level. However there is still a situation that remains unhandled:
[GRP0:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 0
/ \ \
0 1 2..7
idle idle idle
0) The system is fully idle.
[GRP0:0]
migrator = CPU 0
active = CPU 0
groupmask = 0
/ \ \
0 1 2..7
active idle idle
1) CPU 0 is activating. It has done the cmpxchg on the top's ->migr_state
but it hasn't yet returned to __walk_groups().
[GRP0:0]
migrator = CPU 0
active = CPU 0, CPU 1
groupmask = 0
/ \ \
0 1 2..7
active active idle
2) CPU 1 is activating. CPU 0 stays the migrator (still stuck in
__walk_groups(), delayed by #VMEXIT for example).
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 0
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 2 groupmask = 1
/ \ \
0 1 2..7 8
active active idle !online
3) CPU 8 is preparing to boot. CPUHP_TMIGR_PREPARE is being ran by CPU 1
which has created the GRP0:1 and the new top GRP1:0 connected to GRP0:1
and GRP0:0. The groupmask of GRP0:0 is now 2. CPU 1 hasn't yet
propagated its activation up to GRP1:0.
[GRP1:0]
migrator = 0 (!!!)
active = NONE
groupmask = 0
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 2 groupmask = 1
/ \ \
0 1 2..7 8
active active idle !online
4) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups()
returning from tmigr_cpu_active(). The new top GRP1:0 is visible and
fetched but the freshly updated groupmask of GRP0:0 may not be visible
due to lack of ordering! As a result tmigr_active_up() is called to
GRP0:0 with a child's groupmask of "0". This buggy "0" groupmask then
becomes the migrator for GRP1:0 forever. As a result, timers on a fully
idle system get ignored.
One possible fix would be to define TMIGR_NONE as "0" so that such a
race would have no effect. And after all TMIGR_NONE doesn't need to be
anything else. However this would leave an uncomfortable state machine
where gears happen not to break by chance but are vulnerable to future
modifications.
Keep TMIGR_NONE as is instead and pre-initialize to "1" the groupmask of
any newly created top level. This groupmask is guaranteed to be visible
upon fetching the corresponding group for the 1st time:
_ By the upcoming CPU thanks to CPU hotplug synchronization between the
control CPU (BP) and the booting one (AP).
_ By the control CPU since the groupmask and parent pointers are
initialized locally.
_ By all CPUs belonging to the same group than the control CPU because
they must wait for it to ever become idle before needing to walk to
the new top. The cmpcxhg() on ->migr_state then makes sure its
groupmask is visible.
With this pre-initialization, it is guaranteed that if a future top level
is linked to an old one, it is walked through with a valid groupmask.
Fixes: 10a0e6f3d3db ("timers/migration: Move hierarchy setup into cpuhotplug prepare callback")
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250114231507.21672-2-frederic@kernel.org
---
kernel/time/timer_migration.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 8d57f76..c8a8ea2 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -1487,6 +1487,21 @@ static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
s.seq = 0;
atomic_set(&group->migr_state, s.state);
+ /*
+ * If this is a new top-level, prepare its groupmask in advance.
+ * This avoids accidents where yet another new top-level is
+ * created in the future and made visible before the current groupmask.
+ */
+ if (list_empty(&tmigr_level_list[lvl])) {
+ group->groupmask = BIT(0);
+ /*
+ * The previous top level has prepared its groupmask already,
+ * simply account it as the first child.
+ */
+ if (lvl > 0)
+ group->num_children = 1;
+ }
+
timerqueue_init_head(&group->events);
timerqueue_init(&group->groupevt.nextevt);
group->groupevt.nextevt.expires = KTIME_MAX;
@@ -1550,8 +1565,20 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
raw_spin_lock_irq(&child->lock);
raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
+ if (activate) {
+ /*
+ * @child is the old top and @parent the new one. In this
+ * case groupmask is pre-initialized and @child already
+ * accounted, along with its new sibling corresponding to the
+ * CPU going up.
+ */
+ WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2);
+ } else {
+ /* Adding @child for the CPU going up to @parent. */
+ child->groupmask = BIT(parent->num_children++);
+ }
+
child->parent = parent;
- child->groupmask = BIT(parent->num_children++);
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 4a6e60740a304765285446aa260f88b7c5e51c1b
Gitweb: https://git.kernel.org/tip/4a6e60740a304765285446aa260f88b7c5e51c1b
Author: Frederic Weisbecker <frederic(a)kernel.org>
AuthorDate: Wed, 15 Jan 2025 00:15:05 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 16 Jan 2025 12:04:47 +01:00
timers/migration: Enforce group initialization visibility to tree walkers
Commit 2522c84db513 ("timers/migration: Fix another race between hotplug
and idle entry/exit") fixed yet another race between idle exit and CPU
hotplug up leading to a wrong "0" value migrator assigned to the top
level. However there is yet another situation that remains unhandled:
[GRP0:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 1
/ \ \
0 1 2..7
idle idle idle
0) The system is fully idle.
[GRP0:0]
migrator = CPU 0
active = CPU 0
groupmask = 1
/ \ \
0 1 2..7
active idle idle
1) CPU 0 is activating. It has done the cmpxchg on the top's ->migr_state
but it hasn't yet returned to __walk_groups().
[GRP0:0]
migrator = CPU 0
active = CPU 0, CPU 1
groupmask = 1
/ \ \
0 1 2..7
active active idle
2) CPU 1 is activating. CPU 0 stays the migrator (still stuck in
__walk_groups(), delayed by #VMEXIT for example).
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 1 groupmask = 2
/ \ \
0 1 2..7 8
active active idle !online
3) CPU 8 is preparing to boot. CPUHP_TMIGR_PREPARE is being ran by CPU 1
which has created the GRP0:1 and the new top GRP1:0 connected to GRP0:1
and GRP0:0. CPU 1 hasn't yet propagated its activation up to GRP1:0.
[GRP1:0]
migrator = GRP0:0
active = GRP0:0
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 1 groupmask = 2
/ \ \
0 1 2..7 8
active active idle !online
4) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups()
returning from tmigr_cpu_active(). The new top GRP1:0 is visible and
fetched and the pre-initialized groupmask of GRP0:0 is also visible.
As a result tmigr_active_up() is called to GRP1:0 with GRP0:0 as active
and migrator. CPU 0 is returning to __walk_groups() but suffers again
a #VMEXIT.
[GRP1:0]
migrator = GRP0:0
active = GRP0:0
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = NONE
groupmask = 1 groupmask = 2
/ \ \
0 1 2..7 8
active active idle !online
5) CPU 1 propagates its activation of GRP0:0 to GRP1:0. This has no
effect since CPU 0 did it already.
[GRP1:0]
migrator = GRP0:0
active = GRP0:0, GRP0:1
groupmask = 1
/ \
[GRP0:0] [GRP0:1]
migrator = CPU 0 migrator = CPU 8
active = CPU 0, CPU1 active = CPU 8
groupmask = 1 groupmask = 2
/ \ \ \
0 1 2..7 8
active active idle active
6) CPU 1 links CPU 8 to its group. CPU 8 boots and goes through
CPUHP_AP_TMIGR_ONLINE which propagates activation.
[GRP2:0]
migrator = TMIGR_NONE
active = NONE
groupmask = 1
/ \
[GRP1:0] [GRP1:1]
migrator = GRP0:0 migrator = TMIGR_NONE
active = GRP0:0, GRP0:1 active = NONE
groupmask = 1 groupmask = 2
/ \
[GRP0:0] [GRP0:1] [GRP0:2]
migrator = CPU 0 migrator = CPU 8 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = CPU 8 active = NONE
groupmask = 1 groupmask = 2 groupmask = 0
/ \ \ \
0 1 2..7 8 64
active active idle active !online
7) CPU 64 is booting. CPUHP_TMIGR_PREPARE is being ran by CPU 1
which has created the GRP1:1, GRP0:2 and the new top GRP2:0 connected to
GRP1:1 and GRP1:0. CPU 1 hasn't yet propagated its activation up to
GRP2:0.
[GRP2:0]
migrator = 0 (!!!)
active = NONE
groupmask = 1
/ \
[GRP1:0] [GRP1:1]
migrator = GRP0:0 migrator = TMIGR_NONE
active = GRP0:0, GRP0:1 active = NONE
groupmask = 1 groupmask = 2
/ \
[GRP0:0] [GRP0:1] [GRP0:2]
migrator = CPU 0 migrator = CPU 8 migrator = TMIGR_NONE
active = CPU 0, CPU1 active = CPU 8 active = NONE
groupmask = 1 groupmask = 2 groupmask = 0
/ \ \ \
0 1 2..7 8 64
active active idle active !online
8) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups()
returning from tmigr_cpu_active(). The new top GRP2:0 is visible and
fetched but the pre-initialized groupmask of GRP1:0 is not because no
ordering made its initialization visible. As a result tmigr_active_up()
may be called to GRP2:0 with a "0" child's groumask. Leaving the timers
ignored for ever when the system is fully idle.
The race is highly theoretical and perhaps impossible in practice but
the groupmask of the child is not the only concern here as the whole
initialization of the child is not guaranteed to be visible to any
tree walker racing against hotplug (idle entry/exit, remote handling,
etc...). Although the current code layout seem to be resilient to such
hazards, this doesn't tell much about the future.
Fix this with enforcing address dependency between group initialization
and the write/read to the group's parent's pointer. Fortunately that
doesn't involve any barrier addition in the fast paths.
Fixes: 10a0e6f3d3db ("timers/migration: Move hierarchy setup into cpuhotplug prepare callback")
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250114231507.21672-3-frederic@kernel.org
---
kernel/time/timer_migration.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index c8a8ea2..371a62a 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -534,8 +534,13 @@ static void __walk_groups(up_f up, struct tmigr_walk *data,
break;
child = group;
- group = group->parent;
+ /*
+ * Pairs with the store release on group connection
+ * to make sure group initialization is visible.
+ */
+ group = READ_ONCE(group->parent);
data->childmask = child->groupmask;
+ WARN_ON_ONCE(!data->childmask);
} while (group);
}
@@ -1578,7 +1583,12 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
child->groupmask = BIT(parent->num_children++);
}
- child->parent = parent;
+ /*
+ * Make sure parent initialization is visible before publishing it to a
+ * racing CPU entering/exiting idle. This RELEASE barrier enforces an
+ * address dependency that pairs with the READ_ONCE() in __walk_groups().
+ */
+ smp_store_release(&child->parent, parent);
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: e00954d8b0a2d54075131fbad4a11b2b7355eee1
Gitweb: https://git.kernel.org/tip/e00954d8b0a2d54075131fbad4a11b2b7355eee1
Author: Koichiro Den <koichiro.den(a)canonical.com>
AuthorDate: Fri, 20 Dec 2024 22:44:21 +09:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 16 Jan 2025 10:39:25 +01:00
hrtimers: Handle CPU state correctly on hotplug
Consider a scenario where a CPU transitions from CPUHP_ONLINE to halfway
through a CPU hotunplug down to CPUHP_HRTIMERS_PREPARE, and then back to
CPUHP_ONLINE:
Since hrtimers_prepare_cpu() does not run, cpu_base.hres_active remains set
to 1 throughout. However, during a CPU unplug operation, the tick and the
clockevents are shut down at CPUHP_AP_TICK_DYING. On return to the online
state, for instance CFS incorrectly assumes that the hrtick is already
active, and the chance of the clockevent device to transition to oneshot
mode is also lost forever for the CPU, unless it goes back to a lower state
than CPUHP_HRTIMERS_PREPARE once.
This round-trip reveals another issue; cpu_base.online is not set to 1
after the transition, which appears as a WARN_ON_ONCE in enqueue_hrtimer().
Aside of that, the bulk of the per CPU state is not reset either, which
means there are dangling pointers in the worst case.
Address this by adding a corresponding startup() callback, which resets the
stale per CPU state and sets the online flag.
[ tglx: Make the new callback unconditionally available, remove the online
modification in the prepare() callback and clear the remaining
state in the starting callback instead of the prepare callback ]
Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
Signed-off-by: Koichiro Den <koichiro.den(a)canonical.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20241220134421.3809834-1-koichiro.den@canonical…
---
include/linux/hrtimer.h | 1 +
kernel/cpu.c | 2 +-
kernel/time/hrtimer.c | 10 +++++++++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7ef5f7e..f7bfdcf 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -386,6 +386,7 @@ extern void __init hrtimers_init(void);
extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
+int hrtimers_cpu_starting(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_cpu_dying(unsigned int cpu);
#else
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b605334..0509a97 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2179,7 +2179,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
},
[CPUHP_AP_HRTIMERS_DYING] = {
.name = "hrtimers:dying",
- .startup.single = NULL,
+ .startup.single = hrtimers_cpu_starting,
.teardown.single = hrtimers_cpu_dying,
},
[CPUHP_AP_TICK_DYING] = {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 80fe374..edb04af 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2202,6 +2202,15 @@ int hrtimers_prepare_cpu(unsigned int cpu)
}
cpu_base->cpu = cpu;
+ hrtimer_cpu_base_init_expiry_lock(cpu_base);
+ return 0;
+}
+
+int hrtimers_cpu_starting(unsigned int cpu)
+{
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+
+ /* Clear out any left over state from a CPU down operation */
cpu_base->active_bases = 0;
cpu_base->hres_active = 0;
cpu_base->hang_detected = 0;
@@ -2210,7 +2219,6 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->online = 1;
- hrtimer_cpu_base_init_expiry_lock(cpu_base);
return 0;
}
From: Kan Liang <kan.liang(a)linux.intel.com>
The WARN_ON(this_cpu_read(cpu_hw_events.enabled)) in the
intel_pmu_save_and_restart_reload() is triggered, when sampling read
topdown events.
In a NMI handler, the cpu_hw_events.enabled is set and used to indicate
the status of core PMU. The generic pmu->pmu_disable_count, updated in
the perf_pmu_disable/enable pair, is not touched.
However, the perf_pmu_disable/enable pair is invoked when sampling read
in a NMI handler. The cpuc->enabled is mistakenly set by the
perf_pmu_enable().
Avoid perf_pmu_disable/enable() if the core PMU is already disabled.
Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics")
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
A new patch to fix the issue found on a legacy platform.
(Not related to counters snapshotting feature)
But since it also touches the sampling read code, the patches to enable
the counters snapshotting feature must be on top of the patch.
The patch itself can be applied separately.
arch/x86/events/intel/core.c | 7 +++++--
arch/x86/events/intel/ds.c | 9 ++++++---
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2a2824e9c50d..bce423ad3fad 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2778,15 +2778,18 @@ DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
static void intel_pmu_read_topdown_event(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int pmu_enabled = cpuc->enabled;
/* Only need to call update_topdown_event() once for group read. */
if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
!is_slots_event(event))
return;
- perf_pmu_disable(event->pmu);
+ if (pmu_enabled)
+ perf_pmu_disable(event->pmu);
static_call(intel_pmu_update_topdown_event)(event);
- perf_pmu_enable(event->pmu);
+ if (pmu_enabled)
+ perf_pmu_enable(event->pmu);
}
static void intel_pmu_read_event(struct perf_event *event)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ba74e1198328..81b6ec8e824e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2096,11 +2096,14 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
void intel_pmu_auto_reload_read(struct perf_event *event)
{
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
+ int pmu_enabled = this_cpu_read(cpu_hw_events.enabled);
- perf_pmu_disable(event->pmu);
+ WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
+ if (pmu_enabled)
+ perf_pmu_disable(event->pmu);
intel_pmu_drain_pebs_buffer();
- perf_pmu_enable(event->pmu);
+ if (pmu_enabled)
+ perf_pmu_enable(event->pmu);
}
/*
--
2.38.1
The comparison function cmpworker() violates the C standard's
requirements for qsort() comparison functions, which mandate symmetry
and transitivity:
Symmetry: If x < y, then y > x.
Transitivity: If x < y and y < z, then x < z.
In its current implementation, cmpworker() incorrectly returns 0 when
w1->tid < w2->tid, which breaks both symmetry and transitivity. This
violation causes undefined behavior, potentially leading to issues such
as memory corruption in glibc [1].
Fix the issue by returning -1 when w1->tid < w2->tid, ensuring
compliance with the C standard and preventing undefined behavior.
Link: https://www.qualys.com/2024/01/30/qsort.txt [1]
Fixes: 121dd9ea0116 ("perf bench: Add epoll parallel epoll_wait benchmark")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kuan-Wei Chiu <visitorckw(a)gmail.com>
---
Changes in v2:
- Rewrite commit message
tools/perf/bench/epoll-wait.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index ef5c4257844d..4868d610e9bf 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -420,7 +420,7 @@ static int cmpworker(const void *p1, const void *p2)
struct worker *w1 = (struct worker *) p1;
struct worker *w2 = (struct worker *) p2;
- return w1->tid > w2->tid;
+ return w1->tid > w2->tid ? 1 : -1;
}
int bench_epoll_wait(int argc, const char **argv)
--
2.34.1
From: Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
[ Upstream commit ac2140449184a26eac99585b7f69814bd3ba8f2d ]
This commit addresses a potential null pointer dereference issue in the
`dcn32_acquire_idle_pipe_for_head_pipe_in_layer` function. The issue
could occur when `head_pipe` is null.
The fix adds a check to ensure `head_pipe` is not null before asserting
it. If `head_pipe` is null, the function returns NULL to prevent a
potential null pointer dereference.
Reported by smatch:
drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn32/dcn32_resource.c:2690 dcn32_acquire_idle_pipe_for_head_pipe_in_layer() error: we previously assumed 'head_pipe' could be null (see line 2681)
Cc: Tom Chung <chiahsuan.chung(a)amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira(a)amd.com>
Cc: Roman Li <roman.li(a)amd.com>
Cc: Alex Hung <alex.hung(a)amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
Reviewed-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Frank.C <tom.and.jerry.official.mail(a)gmail.com>
---
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index ef47fb2f6905..2b5bd3aa3229 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -2558,8 +2558,10 @@ struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx;
int head_index;
- if (!head_pipe)
+ if (!head_pipe) {
ASSERT(0);
+ return NULL;
+ }
/*
* Modified from dcn20_acquire_idle_pipe_for_layer
--
2.34.1
From: Kuniyuki Iwashima <kuniyu(a)amazon.com>
commit e8c526f2bdf1845bedaf6a478816a3d06fa78b8f upstream.
Martin KaFai Lau reported use-after-free [0] in reqsk_timer_handler().
"""
We are seeing a use-after-free from a bpf prog attached to
trace_tcp_retransmit_synack. The program passes the req->sk to the
bpf_sk_storage_get_tracing kernel helper which does check for null
before using it.
"""
The commit 83fccfc3940c ("inet: fix potential deadlock in
reqsk_queue_unlink()") added timer_pending() in reqsk_queue_unlink() not
to call del_timer_sync() from reqsk_timer_handler(), but it introduced a
small race window.
Before the timer is called, expire_timers() calls detach_timer(timer, true)
to clear timer->entry.pprev and marks it as not pending.
If reqsk_queue_unlink() checks timer_pending() just after expire_timers()
calls detach_timer(), TCP will miss del_timer_sync(); the reqsk timer will
continue running and send multiple SYN+ACKs until it expires.
The reported UAF could happen if req->sk is close()d earlier than the timer
expiration, which is 63s by default.
The scenario would be
1. inet_csk_complete_hashdance() calls inet_csk_reqsk_queue_drop(),
but del_timer_sync() is missed
2. reqsk timer is executed and scheduled again
3. req->sk is accept()ed and reqsk_put() decrements rsk_refcnt, but
reqsk timer still has another one, and inet_csk_accept() does not
clear req->sk for non-TFO sockets
4. sk is close()d
5. reqsk timer is executed again, and BPF touches req->sk
Let's not use timer_pending() by passing the caller context to
__inet_csk_reqsk_queue_drop().
Note that reqsk timer is pinned, so the issue does not happen in most
use cases. [1]
[0]
BUG: KFENCE: use-after-free read in bpf_sk_storage_get_tracing+0x2e/0x1b0
Use-after-free read at 0x00000000a891fb3a (in kfence-#1):
bpf_sk_storage_get_tracing+0x2e/0x1b0
bpf_prog_5ea3e95db6da0438_tcp_retransmit_synack+0x1d20/0x1dda
bpf_trace_run2+0x4c/0xc0
tcp_rtx_synack+0xf9/0x100
reqsk_timer_handler+0xda/0x3d0
run_timer_softirq+0x292/0x8a0
irq_exit_rcu+0xf5/0x320
sysvec_apic_timer_interrupt+0x6d/0x80
asm_sysvec_apic_timer_interrupt+0x16/0x20
intel_idle_irq+0x5a/0xa0
cpuidle_enter_state+0x94/0x273
cpu_startup_entry+0x15e/0x260
start_secondary+0x8a/0x90
secondary_startup_64_no_verify+0xfa/0xfb
kfence-#1: 0x00000000a72cc7b6-0x00000000d97616d9, size=2376, cache=TCPv6
allocated by task 0 on cpu 9 at 260507.901592s:
sk_prot_alloc+0x35/0x140
sk_clone_lock+0x1f/0x3f0
inet_csk_clone_lock+0x15/0x160
tcp_create_openreq_child+0x1f/0x410
tcp_v6_syn_recv_sock+0x1da/0x700
tcp_check_req+0x1fb/0x510
tcp_v6_rcv+0x98b/0x1420
ipv6_list_rcv+0x2258/0x26e0
napi_complete_done+0x5b1/0x2990
mlx5e_napi_poll+0x2ae/0x8d0
net_rx_action+0x13e/0x590
irq_exit_rcu+0xf5/0x320
common_interrupt+0x80/0x90
asm_common_interrupt+0x22/0x40
cpuidle_enter_state+0xfb/0x273
cpu_startup_entry+0x15e/0x260
start_secondary+0x8a/0x90
secondary_startup_64_no_verify+0xfa/0xfb
freed by task 0 on cpu 9 at 260507.927527s:
rcu_core_si+0x4ff/0xf10
irq_exit_rcu+0xf5/0x320
sysvec_apic_timer_interrupt+0x6d/0x80
asm_sysvec_apic_timer_interrupt+0x16/0x20
cpuidle_enter_state+0xfb/0x273
cpu_startup_entry+0x15e/0x260
start_secondary+0x8a/0x90
secondary_startup_64_no_verify+0xfa/0xfb
Fixes: 83fccfc3940c ("inet: fix potential deadlock in reqsk_queue_unlink()")
Reported-by: Martin KaFai Lau <martin.lau(a)kernel.org>
Closes: https://lore.kernel.org/netdev/eb6684d0-ffd9-4bdc-9196-33f690c25824@linux.d…
Link: https://lore.kernel.org/netdev/b55e2ca0-42f2-4b7c-b445-6ffd87ca74a0@linux.d… [1]
Signed-off-by: Kuniyuki Iwashima <kuniyu(a)amazon.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Reviewed-by: Martin KaFai Lau <martin.lau(a)kernel.org>
Link: https://patch.msgid.link/20241014223312.4254-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[d.privalov: adapt calling __inet_csk_reqsk_queue_drop()]
Signed-off-by: Dmitriy Privalov <d.privalov(a)omp.ru>
---
Backport fix for CVE-2024-50154
Link: https://nvd.nist.gov/vuln/detail/CVE-2024-50154
---
v2: Fix patch applying
net/ipv4/inet_connection_sock.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1dfa561e8f981..d912894ad4adc 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -700,21 +700,31 @@ static bool reqsk_queue_unlink(struct request_sock *req)
found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
spin_unlock(lock);
}
- if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
- reqsk_put(req);
+
return found;
}
-bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
+ struct request_sock *req,
+ bool from_timer)
{
bool unlinked = reqsk_queue_unlink(req);
+ if (!from_timer && del_timer_sync(&req->rsk_timer))
+ reqsk_put(req);
+
if (unlinked) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
+
return unlinked;
}
+
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+{
+ return __inet_csk_reqsk_queue_drop(sk, req, false);
+}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
@@ -781,7 +791,8 @@ static void reqsk_timer_handler(struct timer_list *t)
return;
}
drop:
- inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
+ __inet_csk_reqsk_queue_drop(sk_listener, req, true);
+ reqsk_put(req);
}
static void reqsk_queue_hash_req(struct request_sock *req,
--
2.34.1
Once device_register() failed, we should call put_device() to
decrement reference count for cleanup. Or it could cause memory leak.
As comment of device_register() says, 'NOTE: _Never_ directly free
@dev after calling this function, even if it returned an error! Always
use put_device() to give up the reference initialized in this function
instead.'
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v5:
- modified the bug description as suggestions;
Changes in v4:
- deleted the redundant initialization;
Changes in v3:
- modified the patch as suggestions;
Changes in v2:
- modified the patch as suggestions.
---
arch/arm/common/locomo.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index cb6ef449b987..45106066a17f 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -223,10 +223,8 @@ locomo_init_one_child(struct locomo *lchip, struct locomo_dev_info *info)
int ret;
dev = kzalloc(sizeof(struct locomo_dev), GFP_KERNEL);
- if (!dev) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!dev)
+ return -ENOMEM;
/*
* If the parent device has a DMA mask associated with it,
@@ -254,10 +252,9 @@ locomo_init_one_child(struct locomo *lchip, struct locomo_dev_info *info)
NO_IRQ : lchip->irq_base + info->irq[0];
ret = device_register(&dev->dev);
- if (ret) {
- out:
- kfree(dev);
- }
+ if (ret)
+ put_device(&dev->dev);
+
return ret;
}
--
2.25.1
From: BH Hsieh <bhsieh(a)nvidia.com>
Observed VBUS_OVERRIDE & ID_OVERRIDE might be programmed
with unexpected value prior to XUSB PADCTL driver, this
could also occur in virtualization scenario.
For example, UEFI firmware programs ID_OVERRIDE=GROUNDED to set
a type-c port to host mode and keeps the value to kernel.
If the type-c port is connected a usb host, below errors can be
observed right after USB host mode driver gets probed. The errors
would keep until usb role class driver detects the type-c port
as device mode and notifies usb device mode driver to set both
ID_OVERRIDE and VBUS_OVERRIDE to correct value by XUSB PADCTL
driver.
[ 173.765814] usb usb3-port2: Cannot enable. Maybe the USB cable is bad?
[ 173.765837] usb usb3-port2: config error
Taking virtualization into account, asserting XUSB PADCTL
reset would break XUSB functions used by other guest OS,
hence only reset VBUS & ID OVERRIDE of the port in
utmi_phy_init.
Fixes: bbf711682cd5 ("phy: tegra: xusb: Add Tegra186 support")
Cc: stable(a)vger.kernel.org
Signed-off-by: BH Hsieh <bhsieh(a)nvidia.com>
Signed-off-by: Henry Lin <henryl(a)nvidia.com>
---
drivers/phy/tegra/xusb-tegra186.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c
index 0f60d5d1c167..34c6d424a3e8 100644
--- a/drivers/phy/tegra/xusb-tegra186.c
+++ b/drivers/phy/tegra/xusb-tegra186.c
@@ -928,6 +928,7 @@ static int tegra186_utmi_phy_init(struct phy *phy)
unsigned int index = lane->index;
struct device *dev = padctl->dev;
int err;
+ u32 reg;
port = tegra_xusb_find_usb2_port(padctl, index);
if (!port) {
@@ -935,6 +936,13 @@ static int tegra186_utmi_phy_init(struct phy *phy)
return -ENODEV;
}
+ /* reset VBUS&ID OVERRIDE */
+ reg = padctl_readl(padctl, USB2_VBUS_ID);
+ reg &= ~VBUS_OVERRIDE;
+ reg &= ~ID_OVERRIDE(~0);
+ reg |= ID_OVERRIDE_FLOATING;
+ padctl_writel(padctl, reg, USB2_VBUS_ID);
+
if (port->supply && port->mode == USB_DR_MODE_HOST) {
err = regulator_enable(port->supply);
if (err) {
--
2.25.1
From: Ming Yen Hsieh <mingyen.hsieh(a)mediatek.com>
Due to the increase in the number of power tables for 6Ghz on CLC,
the variable nr_country is no longer sufficient to represent the
total quantity. Therefore, we have switched to calculating the
length of clc buf to obtain the correct power table.
Cc: stable(a)vger.kernel.org
Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips")
Signed-off-by: Ming Yen Hsieh <mingyen.hsieh(a)mediatek.com>
---
drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
index 15815ad84713..6b83a1b17140 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
@@ -3158,13 +3158,14 @@ __mt7925_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2,
.acpi_conf = mt792x_acpi_get_flags(&dev->phy),
};
int ret, valid_cnt = 0;
- u8 i, *pos;
+ u8 *pos, *last_pos;
if (!clc)
return 0;
pos = clc->data + sizeof(*seg) * clc->nr_seg;
- for (i = 0; i < clc->nr_country; i++) {
+ last_pos = clc->data + le32_to_cpu(*(__le32 *)(clc->data + 4));
+ while (pos < last_pos) {
struct mt7925_clc_rule *rule = (struct mt7925_clc_rule *)pos;
pos += sizeof(*rule);
--
2.45.2
Commit 32dd4f9c506b ("xfs: remove a superflous hash lookup when inserting
new buffers") converted xfs_buf_find_insert to use
rhashtable_lookup_get_insert_fast and thus an operation that returns the
existing buffer when an insert would duplicate the hash key. But this
code path misses the check for a buffer with a reference count of zero,
which could lead to reusing an about to be freed buffer. Fix this by
using the same atomic_inc_not_zero pattern as xfs_buf_insert.
Fixes: 32dd4f9c506b ("xfs: remove a superflous hash lookup when inserting new buffers")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Dave Chinner <dchinner(a)redhat.com>
Reviewed-by: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: <stable(a)vger.kernel.org> # v6.0
---
fs/xfs/xfs_buf.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 6f313fbf7669..f80e39fde53b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -664,9 +664,8 @@ xfs_buf_find_insert(
spin_unlock(&bch->bc_lock);
goto out_free_buf;
}
- if (bp) {
+ if (bp && atomic_inc_not_zero(&bp->b_hold)) {
/* found an existing buffer */
- atomic_inc(&bp->b_hold);
spin_unlock(&bch->bc_lock);
error = xfs_buf_find_lock(bp, flags);
if (error)
--
2.45.2
The quilt patch titled
Subject: memcg: fix soft lockup in the OOM process
has been removed from the -mm tree. Its filename was
memcg-fix-soft-lockup-in-the-oom-process.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Chen Ridong <chenridong(a)huawei.com>
Subject: memcg: fix soft lockup in the OOM process
Date: Tue, 24 Dec 2024 02:52:38 +0000
A soft lockup issue was found in the product with about 56,000 tasks were
in the OOM cgroup, it was traversing them when the soft lockup was
triggered.
watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [VM Thread:1503066]
CPU: 2 PID: 1503066 Comm: VM Thread Kdump: loaded Tainted: G
Hardware name: Huawei Cloud OpenStack Nova, BIOS
RIP: 0010:console_unlock+0x343/0x540
RSP: 0000:ffffb751447db9a0 EFLAGS: 00000247 ORIG_RAX: ffffffffffffff13
RAX: 0000000000000001 RBX: 0000000000000000 RCX: 00000000ffffffff
RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000247
RBP: ffffffffafc71f90 R08: 0000000000000000 R09: 0000000000000040
R10: 0000000000000080 R11: 0000000000000000 R12: ffffffffafc74bd0
R13: ffffffffaf60a220 R14: 0000000000000247 R15: 0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f2fe6ad91f0 CR3: 00000004b2076003 CR4: 0000000000360ee0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
vprintk_emit+0x193/0x280
printk+0x52/0x6e
dump_task+0x114/0x130
mem_cgroup_scan_tasks+0x76/0x100
dump_header+0x1fe/0x210
oom_kill_process+0xd1/0x100
out_of_memory+0x125/0x570
mem_cgroup_out_of_memory+0xb5/0xd0
try_charge+0x720/0x770
mem_cgroup_try_charge+0x86/0x180
mem_cgroup_try_charge_delay+0x1c/0x40
do_anonymous_page+0xb5/0x390
handle_mm_fault+0xc4/0x1f0
This is because thousands of processes are in the OOM cgroup, it takes a
long time to traverse all of them. As a result, this lead to soft lockup
in the OOM process.
To fix this issue, call 'cond_resched' in the 'mem_cgroup_scan_tasks'
function per 1000 iterations. For global OOM, call
'touch_softlockup_watchdog' per 1000 iterations to avoid this issue.
Link: https://lkml.kernel.org/r/20241224025238.3768787-1-chenridong@huaweicloud.c…
Fixes: 9cbb78bb3143 ("mm, memcg: introduce own oom handler to iterate only over its own threads")
Signed-off-by: Chen Ridong <chenridong(a)huawei.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Michal Koutn�� <mkoutny(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 7 ++++++-
mm/oom_kill.c | 8 +++++++-
2 files changed, 13 insertions(+), 2 deletions(-)
--- a/mm/memcontrol.c~memcg-fix-soft-lockup-in-the-oom-process
+++ a/mm/memcontrol.c
@@ -1161,6 +1161,7 @@ void mem_cgroup_scan_tasks(struct mem_cg
{
struct mem_cgroup *iter;
int ret = 0;
+ int i = 0;
BUG_ON(mem_cgroup_is_root(memcg));
@@ -1169,8 +1170,12 @@ void mem_cgroup_scan_tasks(struct mem_cg
struct task_struct *task;
css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
- while (!ret && (task = css_task_iter_next(&it)))
+ while (!ret && (task = css_task_iter_next(&it))) {
+ /* Avoid potential softlockup warning */
+ if ((++i & 1023) == 0)
+ cond_resched();
ret = fn(task, arg);
+ }
css_task_iter_end(&it);
if (ret) {
mem_cgroup_iter_break(memcg, iter);
--- a/mm/oom_kill.c~memcg-fix-soft-lockup-in-the-oom-process
+++ a/mm/oom_kill.c
@@ -44,6 +44,7 @@
#include <linux/init.h>
#include <linux/mmu_notifier.h>
#include <linux/cred.h>
+#include <linux/nmi.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -430,10 +431,15 @@ static void dump_tasks(struct oom_contro
mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
else {
struct task_struct *p;
+ int i = 0;
rcu_read_lock();
- for_each_process(p)
+ for_each_process(p) {
+ /* Avoid potential softlockup warning */
+ if ((++i & 1023) == 0)
+ touch_softlockup_watchdog();
dump_task(p, oc);
+ }
rcu_read_unlock();
}
}
_
Patches currently in -mm which might be from chenridong(a)huawei.com are
The quilt patch titled
Subject: mm: zswap: move allocations during CPU init outside the lock
has been removed from the -mm tree. Its filename was
mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Yosry Ahmed <yosryahmed(a)google.com>
Subject: mm: zswap: move allocations during CPU init outside the lock
Date: Mon, 13 Jan 2025 21:44:58 +0000
In zswap_cpu_comp_prepare(), allocations are made and assigned to various
members of acomp_ctx under acomp_ctx->mutex. However, allocations may
recurse into zswap through reclaim, trying to acquire the same mutex and
deadlocking.
Move the allocations before the mutex critical section. Only the
initialization of acomp_ctx needs to be done with the mutex held.
Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zswap.c | 42 ++++++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 18 deletions(-)
--- a/mm/zswap.c~mm-zswap-move-allocations-during-cpu-init-outside-the-lock
+++ a/mm/zswap.c
@@ -820,15 +820,15 @@ static int zswap_cpu_comp_prepare(unsign
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
+ struct crypto_acomp *acomp = NULL;
+ struct acomp_req *req = NULL;
+ u8 *buffer = NULL;
int ret;
- mutex_lock(&acomp_ctx->mutex);
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer) {
+ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer) {
ret = -ENOMEM;
- goto buffer_fail;
+ goto fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
@@ -836,21 +836,25 @@ static int zswap_cpu_comp_prepare(unsign
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
ret = PTR_ERR(acomp);
- goto acomp_fail;
+ goto fail;
}
- acomp_ctx->acomp = acomp;
- acomp_ctx->is_sleepable = acomp_is_async(acomp);
- req = acomp_request_alloc(acomp_ctx->acomp);
+ req = acomp_request_alloc(acomp);
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
ret = -ENOMEM;
- goto req_fail;
+ goto fail;
}
- acomp_ctx->req = req;
+ /*
+ * Only hold the mutex after completing allocations, otherwise we may
+ * recurse into zswap through reclaim and attempt to hold the mutex
+ * again resulting in a deadlock.
+ */
+ mutex_lock(&acomp_ctx->mutex);
crypto_init_wait(&acomp_ctx->wait);
+
/*
* if the backend of acomp is async zip, crypto_req_done() will wakeup
* crypto_wait_req(); if the backend of acomp is scomp, the callback
@@ -859,15 +863,17 @@ static int zswap_cpu_comp_prepare(unsign
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ acomp_ctx->buffer = buffer;
+ acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ acomp_ctx->req = req;
mutex_unlock(&acomp_ctx->mutex);
return 0;
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
-buffer_fail:
- mutex_unlock(&acomp_ctx->mutex);
+fail:
+ if (acomp)
+ crypto_free_acomp(acomp);
+ kfree(buffer);
return ret;
}
_
Patches currently in -mm which might be from yosryahmed(a)google.com are
The quilt patch titled
Subject: alloc_tag: skip pgalloc_tag_swap if profiling is disabled
has been removed from the -mm tree. Its filename was
alloc_tag-skip-pgalloc_tag_swap-if-profiling-is-disabled.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Suren Baghdasaryan <surenb(a)google.com>
Subject: alloc_tag: skip pgalloc_tag_swap if profiling is disabled
Date: Thu, 26 Dec 2024 13:16:39 -0800
When memory allocation profiling is disabled, there is no need to swap
allocation tags during migration. Skip it to avoid unnecessary overhead.
Once I added these checks, the overhead of the mode when memory profiling
is enabled but turned off went down by about 50%.
Link: https://lkml.kernel.org/r/20241226211639.1357704-2-surenb@google.com
Fixes: e0a955bf7f61 ("mm/codetag: add pgalloc_tag_copy()")
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: David Wang <00107082(a)163.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Zhenhua Huang <quic_zhenhuah(a)quicinc.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/alloc_tag.c | 3 +++
1 file changed, 3 insertions(+)
--- a/lib/alloc_tag.c~alloc_tag-skip-pgalloc_tag_swap-if-profiling-is-disabled
+++ a/lib/alloc_tag.c
@@ -195,6 +195,9 @@ void pgalloc_tag_swap(struct folio *new,
union codetag_ref ref_old, ref_new;
struct alloc_tag *tag_old, *tag_new;
+ if (!mem_alloc_profiling_enabled())
+ return;
+
tag_old = pgalloc_tag_get(&old->page);
if (!tag_old)
return;
_
Patches currently in -mm which might be from surenb(a)google.com are
alloc_tag-avoid-current-alloc_tag-manipulations-when-profiling-is-disabled.patch
From: Darrick J. Wong <djwong(a)kernel.org>
I received a report from the release engineering side of the house that
xfs_scrub without the -n flag (aka fix it mode) would try to fix a
broken filesystem even on a kernel that doesn't have online repair built
into it:
# xfs_scrub -dTvn /mnt/test
EXPERIMENTAL xfs_scrub program in use! Use at your own risk!
Phase 1: Find filesystem geometry.
/mnt/test: using 1 threads to scrub.
Phase 1: Memory used: 132k/0k (108k/25k), time: 0.00/ 0.00/ 0.00s
<snip>
Phase 4: Repair filesystem.
<snip>
Info: /mnt/test/some/victimdir directory entries: Attempting repair. (repair.c line 351)
Corruption: /mnt/test/some/victimdir directory entries: Repair unsuccessful; offline repair required. (repair.c line 204)
Source: https://blogs.oracle.com/linux/post/xfs-online-filesystem-repair
It is strange that xfs_scrub doesn't refuse to run, because the kernel
is supposed to return EOPNOTSUPP if we actually needed to run a repair,
and xfs_io's repair subcommand will perror that. And yet:
# xfs_io -x -c 'repair probe' /mnt/test
#
The first problem is commit dcb660f9222fd9 (4.15) which should have had
xchk_probe set the CORRUPT OFLAG so that any of the repair machinery
will get called at all.
It turns out that some refactoring that happened in the 6.6-6.8 era
broke the operation of this corner case. What we *really* want to
happen is that all the predicates that would steer xfs_scrub_metadata()
towards calling xrep_attempt() should function the same way that they do
when repair is compiled in; and then xrep_attempt gets to return the
fatal EOPNOTSUPP error code that causes the probe to fail.
Instead, commit 8336a64eb75cba (6.6) started the failwhale swimming by
hoisting OFLAG checking logic into a helper whose non-repair stub always
returns false, causing scrub to return "repair not needed" when in fact
the repair is not supported. Prior to that commit, the oflag checking
that was open-coded in scrub.c worked correctly.
Similarly, in commit 4bdfd7d15747b1 (6.8) we hoisted the IFLAG_REPAIR
and ALREADY_FIXED logic into a helper whose non-repair stub always
returns false, so we never enter the if test body that would have called
xrep_attempt, let alone fail to decode the OFLAGs correctly.
The final insult (yes, we're doing The Naked Gun now) is commit
48a72f60861f79 (6.8) in which we hoisted the "are we going to try a
repair?" predicate into yet another function with a non-repair stub
always returns false.
Fix xchk_probe to trigger xrep_probe if repair is enabled, or return
EOPNOTSUPP directly if it is not. For all the other scrub types, we
need to fix the header predicates so that the ->repair functions (which
are all xrep_notsupported) get called to return EOPNOTSUPP. Commit
48a72 is tagged here because the scrub code prior to LTS 6.12 are
incomplete and not worth patching.
Reported-by: David Flynn <david.flynn(a)oracle.com>
Cc: <stable(a)vger.kernel.org> # v6.8
Fixes: 48a72f60861f79 ("xfs: don't complain about unfixed metadata when repairs were injected")
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
---
fs/xfs/scrub/common.h | 5 -----
fs/xfs/scrub/repair.h | 11 ++++++++++-
fs/xfs/scrub/scrub.c | 12 ++++++++++++
3 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 546be550b221b6..4722cb51fd1522 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -225,7 +225,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
bool xchk_pptr_looks_zapped(struct xfs_inode *ip);
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/* Decide if a repair is required. */
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
{
@@ -245,10 +244,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
-#else
-# define xchk_needs_repair(sc) (false)
-# define xchk_could_repair(sc) (false)
-#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 823c00d1a50262..af0a3a9e5ed978 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -191,7 +191,16 @@ int xrep_reset_metafile_resv(struct xfs_scrub *sc);
#else
#define xrep_ino_dqattach(sc) (0)
-#define xrep_will_attempt(sc) (false)
+
+/*
+ * When online repair is not built into the kernel, we still want to attempt
+ * the repair so that the stub xrep_attempt below will return EOPNOTSUPP.
+ */
+static inline bool xrep_will_attempt(const struct xfs_scrub *sc)
+{
+ return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ xchk_needs_repair(sc->sm);
+}
static inline int
xrep_attempt(
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index d3a4ddd918f621..01fdbbc7adf30e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -149,6 +149,18 @@ xchk_probe(
if (xchk_should_terminate(sc, &error))
return error;
+ /*
+ * If the caller is probing to see if repair works but repair isn't
+ * built into the kernel, return EOPNOTSUPP because that's the signal
+ * that userspace expects. If online repair is built in, set the
+ * CORRUPT flag (without any of the usual tracing/logging) to force us
+ * into xrep_probe.
+ */
+ if (xchk_could_repair(sc)) {
+ if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR))
+ return -EOPNOTSUPP;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 38724591364e1e3b278b4053f102b49ea06ee17c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011310-ruckus-ceramics-7ebc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 38724591364e1e3b278b4053f102b49ea06ee17c Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Date: Mon, 25 Nov 2024 22:16:12 +0100
Subject: [PATCH] iio: adc: rockchip_saradc: fix information leak in triggered
buffer
The 'data' local struct is used to push data to user space from a
triggered buffer, but it does not set values for inactive channels, as
it only uses iio_for_each_active_channel() to assign new values.
Initialize the struct to zero before using it to avoid pushing
uninitialized information to userspace.
Cc: stable(a)vger.kernel.org
Fixes: 4e130dc7b413 ("iio: adc: rockchip_saradc: Add support iio buffers")
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Link: https://patch.msgid.link/20241125-iio_memset_scan_holes-v1-4-0cb6e98d895c@g…
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 240cfa391674..dfd47a6e1f4a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -368,6 +368,8 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p)
int ret;
int i, j = 0;
+ memset(&data, 0, sizeof(data));
+
mutex_lock(&info->lock);
iio_for_each_active_channel(i_dev, i) {
From: Dario Binacchi <dario.binacchi(a)amarulasolutions.com>
[ Upstream commit 9ad86d377ef4a19c75a9c639964879a5b25a433b ]
The commit a22bd630cfff ("can: hi311x: do not report txerr and rxerr
during bus-off") removed the reporting of rxerr and txerr even in case
of correct operation (i. e. not bus-off).
The error count information added to the CAN frame after netif_rx() is
a potential use after free, since there is no guarantee that the skb
is in the same state. It might be freed or reused.
Fix the issue by postponing the netif_rx() call in case of txerr and
rxerr reporting.
Fixes: a22bd630cfff ("can: hi311x: do not report txerr and rxerr during bus-off")
Signed-off-by: Dario Binacchi <dario.binacchi(a)amarulasolutions.com>
Link: https://patch.msgid.link/20241122221650.633981-5-dario.binacchi@amarulasolu…
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
[kovalev: changed the call order of netif_rx_ni()
according to netif_rx() of the upstream patch]
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
Backport to fix CVE-2024-56651
Link: https://www.cve.org/CVERecord/?id=CVE-2024-56651
---
drivers/net/can/spi/hi311x.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index 28273e84171a25..1cdc05475cda61 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -673,9 +673,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
tx_state = txerr >= rxerr ? new_state : 0;
rx_state = txerr <= rxerr ? new_state : 0;
can_change_state(net, cf, tx_state, rx_state);
- netif_rx_ni(skb);
if (new_state == CAN_STATE_BUS_OFF) {
+ netif_rx_ni(skb);
can_bus_off(net);
if (priv->can.restart_ms == 0) {
priv->force_quit = 1;
@@ -685,6 +685,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
} else {
cf->data[6] = txerr;
cf->data[7] = rxerr;
+ netif_rx_ni(skb);
}
}
--
2.33.8
From: Kuniyuki Iwashima <kuniyu(a)amazon.com>
commit e8c526f2bdf1845bedaf6a478816a3d06fa78b8f upstream.
Martin KaFai Lau reported use-after-free [0] in reqsk_timer_handler().
"""
We are seeing a use-after-free from a bpf prog attached to
trace_tcp_retransmit_synack. The program passes the req->sk to the
bpf_sk_storage_get_tracing kernel helper which does check for null
before using it.
"""
The commit 83fccfc3940c ("inet: fix potential deadlock in
reqsk_queue_unlink()") added timer_pending() in reqsk_queue_unlink() not
to call del_timer_sync() from reqsk_timer_handler(), but it introduced a
small race window.
Before the timer is called, expire_timers() calls detach_timer(timer, true)
to clear timer->entry.pprev and marks it as not pending.
If reqsk_queue_unlink() checks timer_pending() just after expire_timers()
calls detach_timer(), TCP will miss del_timer_sync(); the reqsk timer will
continue running and send multiple SYN+ACKs until it expires.
The reported UAF could happen if req->sk is close()d earlier than the timer
expiration, which is 63s by default.
The scenario would be
1. inet_csk_complete_hashdance() calls inet_csk_reqsk_queue_drop(),
but del_timer_sync() is missed
2. reqsk timer is executed and scheduled again
3. req->sk is accept()ed and reqsk_put() decrements rsk_refcnt, but
reqsk timer still has another one, and inet_csk_accept() does not
clear req->sk for non-TFO sockets
4. sk is close()d
5. reqsk timer is executed again, and BPF touches req->sk
Let's not use timer_pending() by passing the caller context to
__inet_csk_reqsk_queue_drop().
Note that reqsk timer is pinned, so the issue does not happen in most
use cases. [1]
[0]
BUG: KFENCE: use-after-free read in bpf_sk_storage_get_tracing+0x2e/0x1b0
Use-after-free read at 0x00000000a891fb3a (in kfence-#1):
bpf_sk_storage_get_tracing+0x2e/0x1b0
bpf_prog_5ea3e95db6da0438_tcp_retransmit_synack+0x1d20/0x1dda
bpf_trace_run2+0x4c/0xc0
tcp_rtx_synack+0xf9/0x100
reqsk_timer_handler+0xda/0x3d0
run_timer_softirq+0x292/0x8a0
irq_exit_rcu+0xf5/0x320
sysvec_apic_timer_interrupt+0x6d/0x80
asm_sysvec_apic_timer_interrupt+0x16/0x20
intel_idle_irq+0x5a/0xa0
cpuidle_enter_state+0x94/0x273
cpu_startup_entry+0x15e/0x260
start_secondary+0x8a/0x90
secondary_startup_64_no_verify+0xfa/0xfb
kfence-#1: 0x00000000a72cc7b6-0x00000000d97616d9, size=2376, cache=TCPv6
allocated by task 0 on cpu 9 at 260507.901592s:
sk_prot_alloc+0x35/0x140
sk_clone_lock+0x1f/0x3f0
inet_csk_clone_lock+0x15/0x160
tcp_create_openreq_child+0x1f/0x410
tcp_v6_syn_recv_sock+0x1da/0x700
tcp_check_req+0x1fb/0x510
tcp_v6_rcv+0x98b/0x1420
ipv6_list_rcv+0x2258/0x26e0
napi_complete_done+0x5b1/0x2990
mlx5e_napi_poll+0x2ae/0x8d0
net_rx_action+0x13e/0x590
irq_exit_rcu+0xf5/0x320
common_interrupt+0x80/0x90
asm_common_interrupt+0x22/0x40
cpuidle_enter_state+0xfb/0x273
cpu_startup_entry+0x15e/0x260
start_secondary+0x8a/0x90
secondary_startup_64_no_verify+0xfa/0xfb
freed by task 0 on cpu 9 at 260507.927527s:
rcu_core_si+0x4ff/0xf10
irq_exit_rcu+0xf5/0x320
sysvec_apic_timer_interrupt+0x6d/0x80
asm_sysvec_apic_timer_interrupt+0x16/0x20
cpuidle_enter_state+0xfb/0x273
cpu_startup_entry+0x15e/0x260
start_secondary+0x8a/0x90
secondary_startup_64_no_verify+0xfa/0xfb
Fixes: 83fccfc3940c ("inet: fix potential deadlock in reqsk_queue_unlink()")
Reported-by: Martin KaFai Lau <martin.lau(a)kernel.org>
Closes: https://lore.kernel.org/netdev/eb6684d0-ffd9-4bdc-9196-33f690c25824@linux.d…
Link: https://lore.kernel.org/netdev/b55e2ca0-42f2-4b7c-b445-6ffd87ca74a0@linux.d… [1]
Signed-off-by: Kuniyuki Iwashima <kuniyu(a)amazon.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Reviewed-by: Martin KaFai Lau <martin.lau(a)kernel.org>
Link: https://patch.msgid.link/20241014223312.4254-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[d.privalov: adapt calling __inet_csk_reqsk_queue_drop()]
Signed-off-by: Dmitriy Privalov <d.privalov(a)omp.ru>
---
Backport fix for CVE-2024-50154
Link: https://nvd.nist.gov/vuln/detail/CVE-2024-50154
---
net/ipv4/inet_connection_sock.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6ebe43b4d28f..332133aae3e1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -722,21 +722,31 @@ static bool reqsk_queue_unlink(struct request_sock *req)
found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
spin_unlock(lock);
}
- if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
- reqsk_put(req);
+
return found;
}
-bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
+ struct request_sock *req,
+ bool from_timer)
{
bool unlinked = reqsk_queue_unlink(req);
+ if (!from_timer && del_timer_sync(&req->rsk_timer))
+ reqsk_put(req);
+
if (unlinked) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
+
return unlinked;
}
+
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+{
+ return __inet_csk_reqsk_queue_drop(sk, req, false);
+}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
@@ -804,7 +814,8 @@ static void reqsk_timer_handler(struct timer_list *t)
return;
}
drop:
- inet_csk_reqsk_queue_drop_and_put(sk_listener, req);`
+ __inet_csk_reqsk_queue_drop(sk_listener, req, true);
+ reqsk_put(req);
}
static void reqsk_queue_hash_req(struct request_sock *req,
--
2.34.1
From: Tejun Heo <tj(a)kernel.org>
commit 86e6ca55b83c575ab0f2e105cf08f98e58d3d7af upstream.
blkcg_unpin_online() walks up the blkcg hierarchy putting the online pin. To
walk up, it uses blkcg_parent(blkcg) but it was calling that after
blkcg_destroy_blkgs(blkcg) which could free the blkcg, leading to the
following UAF:
==================================================================
BUG: KASAN: slab-use-after-free in blkcg_unpin_online+0x15a/0x270
Read of size 8 at addr ffff8881057678c0 by task kworker/9:1/117
CPU: 9 UID: 0 PID: 117 Comm: kworker/9:1 Not tainted 6.13.0-rc1-work-00182-gb8f52214c61a-dirty #48
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 02/02/2022
Workqueue: cgwb_release cgwb_release_workfn
Call Trace:
<TASK>
dump_stack_lvl+0x27/0x80
print_report+0x151/0x710
kasan_report+0xc0/0x100
blkcg_unpin_online+0x15a/0x270
cgwb_release_workfn+0x194/0x480
process_scheduled_works+0x71b/0xe20
worker_thread+0x82a/0xbd0
kthread+0x242/0x2c0
ret_from_fork+0x33/0x70
ret_from_fork_asm+0x1a/0x30
</TASK>
...
Freed by task 1944:
kasan_save_track+0x2b/0x70
kasan_save_free_info+0x3c/0x50
__kasan_slab_free+0x33/0x50
kfree+0x10c/0x330
css_free_rwork_fn+0xe6/0xb30
process_scheduled_works+0x71b/0xe20
worker_thread+0x82a/0xbd0
kthread+0x242/0x2c0
ret_from_fork+0x33/0x70
ret_from_fork_asm+0x1a/0x30
Note that the UAF is not easy to trigger as the free path is indirected
behind a couple RCU grace periods and a work item execution. I could only
trigger it with artifical msleep() injected in blkcg_unpin_online().
Fix it by reading the parent pointer before destroying the blkcg's blkg's.
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Reported-by: Abagail ren <renzezhongucas(a)gmail.com>
Suggested-by: Linus Torvalds <torvalds(a)linuxfoundation.org>
Fixes: 4308a434e5e0 ("blkcg: don't offline parent blkcg first")
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
[kovalev: in versions 5.10 and 5.15, the blkcg_unpin_online()
function was not moved to the block/blk-cgroup.c file]
Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org>
---
Backport to fix CVE-2024-56672
Link: https://www.cve.org/CVERecord/?id=CVE-2024-56672
---
include/linux/blk-cgroup.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 0e6e84db06f674..b89099360a8673 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -428,10 +428,14 @@ static inline void blkcg_pin_online(struct blkcg *blkcg)
static inline void blkcg_unpin_online(struct blkcg *blkcg)
{
do {
+ struct blkcg *parent;
+
if (!refcount_dec_and_test(&blkcg->online_pin))
break;
+
+ parent = blkcg_parent(blkcg);
blkcg_destroy_blkgs(blkcg);
- blkcg = blkcg_parent(blkcg);
+ blkcg = parent;
} while (blkcg);
}
--
2.33.8
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 38724591364e1e3b278b4053f102b49ea06ee17c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011310-spider-motor-0ef7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 38724591364e1e3b278b4053f102b49ea06ee17c Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Date: Mon, 25 Nov 2024 22:16:12 +0100
Subject: [PATCH] iio: adc: rockchip_saradc: fix information leak in triggered
buffer
The 'data' local struct is used to push data to user space from a
triggered buffer, but it does not set values for inactive channels, as
it only uses iio_for_each_active_channel() to assign new values.
Initialize the struct to zero before using it to avoid pushing
uninitialized information to userspace.
Cc: stable(a)vger.kernel.org
Fixes: 4e130dc7b413 ("iio: adc: rockchip_saradc: Add support iio buffers")
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Link: https://patch.msgid.link/20241125-iio_memset_scan_holes-v1-4-0cb6e98d895c@g…
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 240cfa391674..dfd47a6e1f4a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -368,6 +368,8 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p)
int ret;
int i, j = 0;
+ memset(&data, 0, sizeof(data));
+
mutex_lock(&info->lock);
iio_for_each_active_channel(i_dev, i) {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 38724591364e1e3b278b4053f102b49ea06ee17c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011309-swab-bootie-b4f4@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 38724591364e1e3b278b4053f102b49ea06ee17c Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Date: Mon, 25 Nov 2024 22:16:12 +0100
Subject: [PATCH] iio: adc: rockchip_saradc: fix information leak in triggered
buffer
The 'data' local struct is used to push data to user space from a
triggered buffer, but it does not set values for inactive channels, as
it only uses iio_for_each_active_channel() to assign new values.
Initialize the struct to zero before using it to avoid pushing
uninitialized information to userspace.
Cc: stable(a)vger.kernel.org
Fixes: 4e130dc7b413 ("iio: adc: rockchip_saradc: Add support iio buffers")
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Link: https://patch.msgid.link/20241125-iio_memset_scan_holes-v1-4-0cb6e98d895c@g…
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 240cfa391674..dfd47a6e1f4a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -368,6 +368,8 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p)
int ret;
int i, j = 0;
+ memset(&data, 0, sizeof(data));
+
mutex_lock(&info->lock);
iio_for_each_active_channel(i_dev, i) {
Currently, DWC3 driver attempts to acquire the USB power supply only
once during the probe. If the USB power supply is not ready at that
time, the driver simply ignores the failure and continues the probe,
leading to permanent non-functioning of the gadget vbus_draw callback.
Address this problem by delaying the dwc3 driver initialization until
the USB power supply is registered.
Fixes: 6f0764b5adea ("usb: dwc3: add a power supply for current control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kyle Tso <kyletso(a)google.com>
---
v1 -> v2:
- get the power supply in a dedicated function
---
drivers/usb/dwc3/core.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 7578c5133568..dfa1b5fe48dc 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1684,8 +1684,6 @@ static void dwc3_get_properties(struct dwc3 *dwc)
u8 tx_thr_num_pkt_prd = 0;
u8 tx_max_burst_prd = 0;
u8 tx_fifo_resize_max_num;
- const char *usb_psy_name;
- int ret;
/* default to highest possible threshold */
lpm_nyet_threshold = 0xf;
@@ -1720,13 +1718,6 @@ static void dwc3_get_properties(struct dwc3 *dwc)
dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
- ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
- if (ret >= 0) {
- dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
- if (!dwc->usb_psy)
- dev_err(dev, "couldn't get usb power supply\n");
- }
-
dwc->has_lpm_erratum = device_property_read_bool(dev,
"snps,has-lpm-erratum");
device_property_read_u8(dev, "snps,lpm-nyet-threshold",
@@ -2129,6 +2120,23 @@ static int dwc3_get_num_ports(struct dwc3 *dwc)
return 0;
}
+static struct power_supply *dwc3_get_usb_power_supply(struct dwc3 *dwc)
+{
+ struct power_supply *usb_psy;
+ const char *usb_psy_name;
+ int ret;
+
+ ret = device_property_read_string(dwc->dev, "usb-psy-name", &usb_psy_name);
+ if (ret < 0)
+ return NULL;
+
+ usb_psy = power_supply_get_by_name(usb_psy_name);
+ if (!usb_psy)
+ return ERR_PTR(-EPROBE_DEFER);
+
+ return usb_psy;
+}
+
static int dwc3_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -2185,6 +2193,10 @@ static int dwc3_probe(struct platform_device *pdev)
dwc3_get_software_properties(dwc);
+ dwc->usb_psy = dwc3_get_usb_power_supply(dwc);
+ if (IS_ERR(dwc->usb_psy))
+ return dev_err_probe(dev, PTR_ERR(dwc->usb_psy), "couldn't get usb power supply\n");
+
dwc->reset = devm_reset_control_array_get_optional_shared(dev);
if (IS_ERR(dwc->reset)) {
ret = PTR_ERR(dwc->reset);
--
2.48.0.rc2.279.g1de40edade-goog
The orig_a0 is missing in struct user_regs_struct of riscv, and there is
no way to add it without breaking UAPI. (See Link tag below)
Like NT_ARM_SYSTEM_CALL do, we add a new regset name NT_RISCV_ORIG_A0 to
access original a0 register from userspace via ptrace API.
Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Signed-off-by: Celeste Liu <uwu(a)coelacanthus.name>
---
Changes in v6:
- Fix obsolute comment.
- Copy include/linux/stddef.h to tools/include to use offsetofend in
selftests.
- Link to v5: https://lore.kernel.org/r/20250115-riscv-new-regset-v5-0-d0e6ec031a23@coela…
Changes in v5:
- Fix wrong usage in selftests.
- Link to v4: https://lore.kernel.org/r/20241226-riscv-new-regset-v4-0-4496a29d0436@coela…
Changes in v4:
- Fix a copy paste error in selftest. (Forget to commit...)
- Link to v3: https://lore.kernel.org/r/20241226-riscv-new-regset-v3-0-f5b96465826b@coela…
Changes in v3:
- Use return 0 directly for readability.
- Fix test for modify a0.
- Add Fixes: tag
- Remove useless Cc: stable.
- Selftest will check both a0 and orig_a0, but depends on the
correctness of PTRACE_GET_SYSCALL_INFO.
- Link to v2: https://lore.kernel.org/r/20241203-riscv-new-regset-v2-0-d37da8c0cba6@coela…
Changes in v2:
- Fix integer width.
- Add selftest.
- Link to v1: https://lore.kernel.org/r/20241201-riscv-new-regset-v1-1-c83c58abcc7b@coela…
---
Celeste Liu (3):
riscv/ptrace: add new regset to access original a0 register
tools: copy include/linux/stddef.h to tools/include
riscv: selftests: Add a ptrace test to verify a0 and orig_a0 access
arch/riscv/kernel/ptrace.c | 32 +++++
include/uapi/linux/elf.h | 1 +
tools/include/linux/stddef.h | 85 ++++++++++++
tools/include/uapi/linux/stddef.h | 6 +-
tools/testing/selftests/riscv/abi/.gitignore | 1 +
tools/testing/selftests/riscv/abi/Makefile | 6 +-
tools/testing/selftests/riscv/abi/ptrace.c | 193 +++++++++++++++++++++++++++
7 files changed, 319 insertions(+), 5 deletions(-)
---
base-commit: 0e287d31b62bb53ad81d5e59778384a40f8b6f56
change-id: 20241201-riscv-new-regset-d529b952ad0d
Best regards,
--
Celeste Liu <uwu(a)coelacanthus.name>
The quilt patch titled
Subject: mm/vmscan: fix pgdemote_* accounting with lru_gen_enabled
has been removed from the -mm tree. Its filename was
mm-vmscan-fix-pgdemote_-accounting-with-lru_gen_enabled.patch
This patch was dropped because it is obsolete
------------------------------------------------------
From: Li Zhijian <lizhijian(a)fujitsu.com>
Subject: mm/vmscan: fix pgdemote_* accounting with lru_gen_enabled
Date: Fri, 10 Jan 2025 20:21:33 +0800
Commit f77f0c751478 ("mm,memcg: provide per-cgroup counters for NUMA
balancing operations") moved the accounting of PGDEMOTE_* statistics to
shrink_inactive_list(). However, shrink_inactive_list() is not called
when lrugen_enabled is true, leading to incorrect demotion statistics
despite actual demotion events occurring.
Add the PGDEMOTE_* accounting in evict_folios(), ensuring that demotion
statistics are correctly updated regardless of the lru_gen_enabled state.
This fix is crucial for systems that rely on accurate NUMA balancing
metrics for performance tuning and resource management.
Link: https://lkml.kernel.org/r/20250110122133.423481-2-lizhijian@fujitsu.com
Fixes: f77f0c751478 ("mm,memcg: provide per-cgroup counters for NUMA balancing operations")
Signed-off-by: Li Zhijian <lizhijian(a)fujitsu.com>
Cc: Kaiyang Zhao <kaiyang2(a)cs.cmu.edu>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmscan.c | 2 ++
1 file changed, 2 insertions(+)
--- a/mm/vmscan.c~mm-vmscan-fix-pgdemote_-accounting-with-lru_gen_enabled
+++ a/mm/vmscan.c
@@ -4650,6 +4650,8 @@ retry:
__mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(),
stat.nr_demoted);
+ __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(),
+ stat.nr_demoted);
item = PGSTEAL_KSWAPD + reclaimer_offset();
if (!cgroup_reclaim(sc))
__count_vm_events(item, reclaimed);
_
Patches currently in -mm which might be from lizhijian(a)fujitsu.com are
mm-vmscan-accumulate-nr_demoted-for-accurate-demotion-statistics.patch
mm-vmscan-accumulate-nr_demoted-for-accurate-demotion-statistics-v2.patch
When porting librseq commit:
commit c7b45750fa85 ("Adapt to glibc __rseq_size feature detection")
from librseq to the kernel selftests, the following line was missed
at the end of rseq_init():
rseq_size = get_rseq_kernel_feature_size();
which effectively leaves rseq_size initialized to -1U when glibc does not
have rseq support. glibc supports rseq from version 2.35 onwards.
In a following librseq commit
commit c67d198627c2 ("Only set 'rseq_size' on first thread registration")
to mimic the libc behavior, a new approach is taken: don't set the
feature size in 'rseq_size' until at least one thread has successfully
registered. This allows using 'rseq_size' in fast-paths to test for both
registration status and available features. The caveat is that on libc
either all threads are registered or none are, while with bare librseq
it is the responsability of the user to register all threads using rseq.
This combines the changes from the following librseq commits:
commit c7b45750fa85 ("Adapt to glibc __rseq_size feature detection")
commit c67d198627c2 ("Only set 'rseq_size' on first thread registration")
Fixes: 73a4f5a704a2 ("selftests/rseq: Fix mm_cid test failure")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Raghavendra Rao Ananta <rananta(a)google.com>
Cc: Shuah Khan <skhan(a)linuxfoundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Boqun Feng <boqun.feng(a)gmail.com>
Cc: "Paul E. McKenney" <paulmck(a)kernel.org>
Cc: Carlos O'Donell <carlos(a)redhat.com>
Cc: Florian Weimer <fweimer(a)redhat.com>
Cc: Michael Jeanson <mjeanson(a)efficios.com>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
tools/testing/selftests/rseq/rseq.c | 32 ++++++++++++++++++++++-------
tools/testing/selftests/rseq/rseq.h | 9 +++++++-
2 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 5b9772cdf265..f6156790c3b4 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -61,7 +61,6 @@ unsigned int rseq_size = -1U;
unsigned int rseq_flags;
static int rseq_ownership;
-static int rseq_reg_success; /* At least one rseq registration has succeded. */
/* Allocate a large area for the TLS. */
#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
@@ -152,14 +151,27 @@ int rseq_register_current_thread(void)
}
rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
if (rc) {
- if (RSEQ_READ_ONCE(rseq_reg_success)) {
+ /*
+ * After at least one thread has registered successfully
+ * (rseq_size > 0), the registration of other threads should
+ * never fail.
+ */
+ if (RSEQ_READ_ONCE(rseq_size) > 0) {
/* Incoherent success/failure within process. */
abort();
}
return -1;
}
assert(rseq_current_cpu_raw() >= 0);
- RSEQ_WRITE_ONCE(rseq_reg_success, 1);
+
+ /*
+ * The first thread to register sets the rseq_size to mimic the libc
+ * behavior.
+ */
+ if (RSEQ_READ_ONCE(rseq_size) == 0) {
+ RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size());
+ }
+
return 0;
}
@@ -235,12 +247,18 @@ void rseq_init(void)
return;
}
rseq_ownership = 1;
- if (!rseq_available()) {
- rseq_size = 0;
- return;
- }
+
+ /* Calculate the offset of the rseq area from the thread pointer. */
rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+
+ /* rseq flags are deprecated, always set to 0. */
rseq_flags = 0;
+
+ /*
+ * Set the size to 0 until at least one thread registers to mimic the
+ * libc behavior.
+ */
+ rseq_size = 0;
}
static __attribute__((destructor))
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 4e217b620e0c..062d10925a10 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -60,7 +60,14 @@
extern ptrdiff_t rseq_offset;
/*
- * Size of the registered rseq area. 0 if the registration was
+ * The rseq ABI is composed of extensible feature fields. The extensions
+ * are done by appending additional fields at the end of the structure.
+ * The rseq_size defines the size of the active feature set which can be
+ * used by the application for the current rseq registration. Features
+ * starting at offset >= rseq_size are inactive and should not be used.
+ *
+ * The rseq_size is the intersection between the available allocation
+ * size for the rseq area and the feature size supported by the kernel.
* unsuccessful.
*/
extern unsigned int rseq_size;
--
2.39.5
On removal of the device or unloading of the kernel module a potential
NULL pointer dereference occurs.
The following sequence deletes the interface:
brcmf_detach()
brcmf_remove_interface()
brcmf_del_if()
Inside the brcmf_del_if() function the drvr->if2bss[ifidx] is updated to
BRCMF_BSSIDX_INVALID (-1) if the bsscfgidx matches.
After brcmf_remove_interface() call the brcmf_proto_detach() function is
called providing the following sequence:
brcmf_detach()
brcmf_proto_detach()
brcmf_proto_msgbuf_detach()
brcmf_flowring_detach()
brcmf_msgbuf_delete_flowring()
brcmf_msgbuf_remove_flowring()
brcmf_flowring_delete()
brcmf_get_ifp()
brcmf_txfinalize()
Since brcmf_get_ip() can and actually will return NULL in this case the
call to brcmf_txfinalize() will result in a NULL pointer dereference
inside brcmf_txfinalize() when trying to update
ifp->ndev->stats.tx_errors.
This will only happen if a flowring still has an skb.
Although the NULL pointer dereference has only been seen when trying to update
the tx statistic, all other uses of the ifp pointer have been guarded as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marcel Hamer <marcel.hamer(a)windriver.com>
Link: https://lore.kernel.org/all/b519e746-ddfd-421f-d897-7620d229e4b2@gmail.com/
---
v1 -> v2: guard all uses of the ifp pointer inside brcmf_txfinalize()
---
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index c3a57e30c855..791757a3ec13 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -543,13 +543,13 @@ void brcmf_txfinalize(struct brcmf_if *ifp, struct sk_buff *txp, bool success)
eh = (struct ethhdr *)(txp->data);
type = ntohs(eh->h_proto);
- if (type == ETH_P_PAE) {
+ if (type == ETH_P_PAE && ifp) {
atomic_dec(&ifp->pend_8021x_cnt);
if (waitqueue_active(&ifp->pend_8021x_wait))
wake_up(&ifp->pend_8021x_wait);
}
- if (!success)
+ if (!success && ifp)
ifp->ndev->stats.tx_errors++;
brcmu_pkt_buf_free_skb(txp);
--
2.34.1
The PE Reset State "0" returned by RTAS calls
"ibm_read_slot_reset_[state|state2]" indicates that the reset is
deactivated and the PE is in a state where MMIO and DMA are allowed.
However, the current implementation of "pseries_eeh_get_state()" does
not reflect this, causing drivers to incorrectly assume that MMIO and
DMA operations cannot be resumed.
The userspace drivers as a part of EEH recovery using VFIO ioctls fail
to detect when the recovery process is complete. The VFIO_EEH_PE_GET_STATE
ioctl does not report the expected EEH_PE_STATE_NORMAL state, preventing
userspace drivers from functioning properly on pseries systems.
The patch addresses this issue by updating 'pseries_eeh_get_state()'
to include "EEH_STATE_MMIO_ENABLED" and "EEH_STATE_DMA_ENABLED" in
the result mask for PE Reset State "0". This ensures correct state
reporting to the callers, aligning the behavior with the PAPR specification
and fixing the bug in EEH recovery for VFIO user workflows.
Fixes: 00ba05a12b3c ("powerpc/pseries: Cleanup on pseries_eeh_get_state()")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Narayana Murty N <nnmlinux(a)linux.ibm.com>
---
Changelog:
V1:https://lore.kernel.org/all/20241107042027.338065-1-nnmlinux@linux.ibm.c…
--added Fixes tag for "powerpc/pseries: Cleanup on
pseries_eeh_get_state()".
V2:https://lore.kernel.org/stable/20241212075044.10563-1-nnmlinux%40linux.i…
--Updated the patch description to include it in the stable kernel tree.
V3:https://lore.kernel.org/all/87v7vm8pwz.fsf@gmail.com/
--Updated commit description.
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1893f66371fa..b12ef382fec7 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -580,8 +580,10 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
switch(rets[0]) {
case 0:
- result = EEH_STATE_MMIO_ACTIVE |
- EEH_STATE_DMA_ACTIVE;
+ result = EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_MMIO_ENABLED |
+ EEH_STATE_DMA_ENABLED;
break;
case 1:
result = EEH_STATE_RESET_ACTIVE |
--
2.47.1
On 1/14/25 09:06, Tom Lendacky wrote:
> On 1/14/25 08:44, Kirill A. Shutemov wrote:
>> On Tue, Jan 14, 2025 at 08:33:39AM -0600, Tom Lendacky wrote:
>>> On 1/14/25 01:27, Kirill A. Shutemov wrote:
>>>> On Mon, Jan 13, 2025 at 02:47:56PM -0600, Tom Lendacky wrote:
>>>>> On 1/13/25 07:14, Kirill A. Shutemov wrote:
>>>>>> Currently memremap(MEMREMAP_WB) can produce decrypted/shared mapping:
>>>>>>
>>>>>> memremap(MEMREMAP_WB)
>>>>>> arch_memremap_wb()
>>>>>> ioremap_cache()
>>>>>> __ioremap_caller(.encrytped = false)
>>>>>>
>>>>>> In such cases, the IORES_MAP_ENCRYPTED flag on the memory will determine
>>>>>> if the resulting mapping is encrypted or decrypted.
>>>>>>
>>>>>> Creating a decrypted mapping without explicit request from the caller is
>>>>>> risky:
>>>>>>
>>>>>> - It can inadvertently expose the guest's data and compromise the
>>>>>> guest.
>>>>>>
>>>>>> - Accessing private memory via shared/decrypted mapping on TDX will
>>>>>> either trigger implicit conversion to shared or #VE (depending on
>>>>>> VMM implementation).
>>>>>>
>>>>>> Implicit conversion is destructive: subsequent access to the same
>>>>>> memory via private mapping will trigger a hard-to-debug #VE crash.
>>>>>>
>>>>>> The kernel already provides a way to request decrypted mapping
>>>>>> explicitly via the MEMREMAP_DEC flag.
>>>>>>
>>>>>> Modify memremap(MEMREMAP_WB) to produce encrypted/private mapping by
>>>>>> default unless MEMREMAP_DEC is specified.
>>>>>>
>>>>>> Fix the crash due to #VE on kexec in TDX guests if CONFIG_EISA is enabled.
>>>>>
>>>>> This patch causes my bare-metal system to crash during boot when using
>>>>> mem_encrypt=on:
>>>>>
>>>>> [ 2.392934] efi: memattr: Entry type should be RuntimeServiceCode/Data
>>>>> [ 2.393731] efi: memattr: ! 0x214c42f01f1162a-0xee70ac7bd1a9c629 [type=2028324321|attr=0x6590648fa4209879]
>>>>
>>>> Could you try if this helps?
>>>>
>>>> diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
>>>> index c38b1a335590..b5051dcb7c1d 100644
>>>> --- a/drivers/firmware/efi/memattr.c
>>>> +++ b/drivers/firmware/efi/memattr.c
>>>> @@ -160,7 +160,7 @@ int __init efi_memattr_apply_permissions(struct mm_struct *mm,
>>>> if (WARN_ON(!efi_enabled(EFI_MEMMAP)))
>>>> return 0;
>>>>
>>>> - tbl = memremap(efi_mem_attr_table, tbl_size, MEMREMAP_WB);
>>>> + tbl = memremap(efi_mem_attr_table, tbl_size, MEMREMAP_WB | MEMREMAP_DEC);
>>>
>>> Well that would work for SME where EFI tables/data are not encrypted,
>>> but will break for SEV where EFI tables/data are encrypted.
>>
>> Hm. Why would it break for SEV? It brings the situation back to what it
>> was before the patch.
>
> Ah, true. I can try it and see how much further SME gets. Hopefully it
> doesn't turn into a whack-a-mole thing.
Unfortunately, it is turning into a whack-a-mole thing.
But it looks the following works for SME:
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3c36f3f5e688..ff3cd5fc8508 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -505,7 +505,7 @@ EXPORT_SYMBOL(iounmap);
void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
{
- if (flags & MEMREMAP_DEC)
+ if (flags & MEMREMAP_DEC || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
return (void __force *)ioremap_cache(phys_addr, size);
return (void __force *)ioremap_encrypted(phys_addr, size);
I haven't had a chance to test the series on SEV, yet.
Thanks,
Tom
>
> Thanks,
> Tom
>
>>
>> Note that that __ioremap_caller() would still check io_desc.flags before
>> mapping it as decrypted.
>>
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 9322d1915f9d976ee48c09d800fbd5169bc2ddcc
Gitweb: https://git.kernel.org/tip/9322d1915f9d976ee48c09d800fbd5169bc2ddcc
Author: Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
AuthorDate: Sun, 15 Dec 2024 12:39:45 +09:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Wed, 15 Jan 2025 10:38:43 +01:00
irqchip: Plug a OF node reference leak in platform_irqchip_probe()
platform_irqchip_probe() leaks a OF node when irq_init_cb() fails. Fix it
by declaring par_np with the __free(device_node) cleanup construct.
This bug was found by an experimental static analysis tool that I am
developing.
Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros")
Signed-off-by: Joe Hattori <joe(a)pf.is.s.u-tokyo.ac.jp>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20241215033945.3414223-1-joe@pf.is.s.u-tokyo.ac…
---
drivers/irqchip/irqchip.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
index 1eeb0d0..0ee7b6b 100644
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -35,11 +35,10 @@ void __init irqchip_init(void)
int platform_irqchip_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
- struct device_node *par_np = of_irq_find_parent(np);
+ struct device_node *par_np __free(device_node) = of_irq_find_parent(np);
of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev);
if (!irq_init_cb) {
- of_node_put(par_np);
return -EINVAL;
}
@@ -55,7 +54,6 @@ int platform_irqchip_probe(struct platform_device *pdev)
* interrupt controller can check for specific domains as necessary.
*/
if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) {
- of_node_put(par_np);
return -EPROBE_DEFER;
}
Fixed a possible UAF problem in driver_override_show() in
drivers/bus/fsl-mc/fsl-mc-bus.c
This function driver_override_show() is part of DEVICE_ATTR_RW, which
includes both driver_override_show() and driver_override_store().
These functions can be executed concurrently in sysfs.
The driver_override_store() function uses driver_set_override() to
update the driver_override value, and driver_set_override() internally
locks the device (device_lock(dev)). If driver_override_show() reads
cdx_dev->driver_override without locking, it could potentially access
a freed pointer if driver_override_store() frees the string
concurrently. This could lead to printing a kernel address, which is a
security risk since DEVICE_ATTR can be read by all users.
Additionally, a similar pattern is used in drivers/amba/bus.c, as well
as many other bus drivers, where device_lock() is taken in the show
function, and it has been working without issues.
This potential bug was detected by our experimental static analysis tool,
which analyzes locking APIs and paired functions to identify data races
and atomicity violations.
Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
---
V2:
Revised the description to reduce the confusion it caused.
---
drivers/bus/fsl-mc/fsl-mc-bus.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 930d8a3ba722..62a9da88b4c9 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -201,8 +201,12 @@ static ssize_t driver_override_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
+ ssize_t len;
- return snprintf(buf, PAGE_SIZE, "%s\n", mc_dev->driver_override);
+ device_lock(dev);
+ len = snprintf(buf, PAGE_SIZE, "%s\n", mc_dev->driver_override);
+ device_unlock(dev);
+ return len;
}
static DEVICE_ATTR_RW(driver_override);
--
2.34.1
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: de31b3cd706347044e1a57d68c3a683d58e8cca4
Gitweb: https://git.kernel.org/tip/de31b3cd706347044e1a57d68c3a683d58e8cca4
Author: Xin Li (Intel) <xin(a)zytor.com>
AuthorDate: Fri, 10 Jan 2025 09:46:39 -08:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Tue, 14 Jan 2025 14:16:36 -08:00
x86/fred: Fix the FRED RSP0 MSR out of sync with its per-CPU cache
The FRED RSP0 MSR is only used for delivering events when running
userspace. Linux leverages this property to reduce expensive MSR
writes and optimize context switches. The kernel only writes the
MSR when about to run userspace *and* when the MSR has actually
changed since the last time userspace ran.
This optimization is implemented by maintaining a per-CPU cache of
FRED RSP0 and then checking that against the value for the top of
current task stack before running userspace.
However cpu_init_fred_exceptions() writes the MSR without updating
the per-CPU cache. This means that the kernel might return to
userspace with MSR_IA32_FRED_RSP0==0 when it needed to point to the
top of current task stack. This would induce a double fault (#DF),
which is bad.
A context switch after cpu_init_fred_exceptions() can paper over
the issue since it updates the cached value. That evidently
happens most of the time explaining how this bug got through.
Fix the bug through resynchronizing the FRED RSP0 MSR with its
per-CPU cache in cpu_init_fred_exceptions().
Fixes: fe85ee391966 ("x86/entry: Set FRED RSP0 on return to userspace instead of context switch")
Signed-off-by: Xin Li (Intel) <xin(a)zytor.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250110174639.1250829-1-xin%40zytor.com
---
arch/x86/kernel/fred.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 8d32c3f..5e2cd10 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -50,7 +50,13 @@ void cpu_init_fred_exceptions(void)
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
- wrmsrl(MSR_IA32_FRED_RSP0, 0);
+
+ /*
+ * Ater a CPU offline/online cycle, the FRED RSP0 MSR should be
+ * resynchronized with its per-CPU cache.
+ */
+ wrmsrl(MSR_IA32_FRED_RSP0, __this_cpu_read(fred_rsp0));
+
wrmsrl(MSR_IA32_FRED_RSP1, 0);
wrmsrl(MSR_IA32_FRED_RSP2, 0);
wrmsrl(MSR_IA32_FRED_RSP3, 0);
The following call-chain leads to misuse of spinlock_irq
when spinlock_irqsave was hold.
irq_set_vcpu_affinity
-> irq_get_desc_lock (spinlock_irqsave)
-> its_irq_set_vcpu_affinity
-> guard(raw_spin_lock_irq) <--- this enables interrupts
-> irq_put_desc_unlock // <--- WARN IRQs enabled
Fix the issue by using guard(raw_spinlock), since the function is
already called with irqsave and raw_spin_lock was used before the commit
b97e8a2f7130 ("irqchip/gic-v3-its: Fix potential race condition in its_vlpi_prop_update()")
introducing the guard as well.
This was discovered through the lock debugging, and the corresponding
log is as follows:
raw_local_irq_restore() called with IRQs enabled
WARNING: CPU: 38 PID: 444 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x2c/0x38
Call trace:
warn_bogus_irq_restore+0x2c/0x38
_raw_spin_unlock_irqrestore+0x68/0x88
__irq_put_desc_unlock+0x1c/0x48
irq_set_vcpu_affinity+0x74/0xc0
its_map_vlpi+0x44/0x88
kvm_vgic_v4_set_forwarding+0x148/0x230
kvm_arch_irq_bypass_add_producer+0x20/0x28
__connect+0x98/0xb8
irq_bypass_register_consumer+0x150/0x178
kvm_irqfd+0x6dc/0x744
kvm_vm_ioctl+0xe44/0x16b0
Fixes: b97e8a2f7130 ("irqchip/gic-v3-its: Fix potential race condition in its_vlpi_prop_update()")
Signed-off-by: Tomas Krcka <krckatom(a)amazon.de>
Reviewed-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/irqchip/irq-gic-v3-its.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 92244cfa0464..8c3ec5734f1e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2045,7 +2045,7 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
if (!is_v4(its_dev->its))
return -EINVAL;
- guard(raw_spinlock_irq)(&its_dev->event_map.vlpi_lock);
+ guard(raw_spinlock)(&its_dev->event_map.vlpi_lock);
/* Unmap request? */
if (!info)
--
2.40.1
Amazon Web Services Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 0d62a49ab55c99e8deb4593b8d9f923de1ab5c18
Gitweb: https://git.kernel.org/tip/0d62a49ab55c99e8deb4593b8d9f923de1ab5c18
Author: Yogesh Lal <quic_ylal(a)quicinc.com>
AuthorDate: Fri, 20 Dec 2024 15:09:07 +05:30
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Wed, 15 Jan 2025 09:42:44 +01:00
irqchip/gic-v3: Handle CPU_PM_ENTER_FAILED correctly
When a CPU attempts to enter low power mode, it disables the redistributor
and Group 1 interrupts and reinitializes the system registers upon wakeup.
If the transition into low power mode fails, then the CPU_PM framework
invokes the PM notifier callback with CPU_PM_ENTER_FAILED to allow the
drivers to undo the state changes.
The GIC V3 driver ignores CPU_PM_ENTER_FAILED, which leaves the GIC in
disabled state.
Handle CPU_PM_ENTER_FAILED in the same way as CPU_PM_EXIT to restore normal
operation.
[ tglx: Massage change log, add Fixes tag ]
Fixes: 3708d52fc6bb ("irqchip: gic-v3: Implement CPU PM notifier")
Signed-off-by: Yogesh Lal <quic_ylal(a)quicinc.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20241220093907.2747601-1-quic_ylal@quicinc.com
---
drivers/irqchip/irq-gic-v3.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 79d8cc8..76dce0a 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1522,7 +1522,7 @@ static int gic_retrigger(struct irq_data *data)
static int gic_cpu_pm_notifier(struct notifier_block *self,
unsigned long cmd, void *v)
{
- if (cmd == CPU_PM_EXIT) {
+ if (cmd == CPU_PM_EXIT || cmd == CPU_PM_ENTER_FAILED) {
if (gic_dist_security_disabled())
gic_enable_redist(true);
gic_cpu_sys_reg_enable();
There is a data race between the functions driver_override_show() and
driver_override_store(). In the driver_override_store() function, the
assignment to ret calls driver_set_override(), which frees the old value
while writing the new value to dev. If a race occurs, it may cause a
use-after-free (UAF) error in driver_override_show().
To fix this issue, we adopt a logic similar to the driver_override_show()
function in vmbus_drv.c, protecting dev within a lock to ensure its value
remains unchanged.
This possible bug is found by an experimental static analysis tool
developed by our team. This tool analyzes the locking APIs to extract
function pairs that can be concurrently executed, and then analyzes the
instructions in the paired functions to identify possible concurrency bugs
including data races and atomicity violations.
Fixes: 48a6c7bced2a ("cdx: add device attributes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
---
V2:
Modified the title and description.
Removed the changes to cdx_bus_match().
---
drivers/cdx/cdx.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c
index 07371cb653d3..4af1901c9d52 100644
--- a/drivers/cdx/cdx.c
+++ b/drivers/cdx/cdx.c
@@ -470,8 +470,12 @@ static ssize_t driver_override_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cdx_device *cdx_dev = to_cdx_device(dev);
+ ssize_t len;
- return sysfs_emit(buf, "%s\n", cdx_dev->driver_override);
+ device_lock(dev);
+ len = sysfs_emit(buf, "%s\n", cdx_dev->driver_override);
+ device_unlock(dev);
+ return len;
}
static DEVICE_ATTR_RW(driver_override);
--
2.34.1
From: Cheng Ming Lin <chengminglin(a)mxic.com.tw>
The default dummy cycle for Macronix SPI NOR flash in Octal Output
Read Mode(1-1-8) is 20.
Currently, the dummy buswidth is set according to the address bus width.
In the 1-1-8 mode, this means the dummy buswidth is 1. When converting
dummy cycles to bytes, this results in 20 x 1 / 8 = 2 bytes, causing the
host to read data 4 cycles too early.
Since the protocol data buswidth is always greater than or equal to the
address buswidth. Setting the dummy buswidth to match the data buswidth
increases the likelihood that the dummy cycle-to-byte conversion will be
divisible, preventing the host from reading data prematurely.
Fixes: 0e30f47232ab5 ("mtd: spi-nor: add support for DTR protocol")
Cc: stable(a)vger.kernel.org
Reviewd-by: Pratyush Yadav <pratyush(a)kernel.org>
Signed-off-by: Cheng Ming Lin <chengminglin(a)mxic.com.tw>
---
drivers/mtd/spi-nor/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index f9c189ed7353..c7aceaa8a43f 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -89,7 +89,7 @@ void spi_nor_spimem_setup_op(const struct spi_nor *nor,
op->addr.buswidth = spi_nor_get_protocol_addr_nbits(proto);
if (op->dummy.nbytes)
- op->dummy.buswidth = spi_nor_get_protocol_addr_nbits(proto);
+ op->dummy.buswidth = spi_nor_get_protocol_data_nbits(proto);
if (op->data.nbytes)
op->data.buswidth = spi_nor_get_protocol_data_nbits(proto);
--
2.25.1
From: Darrick J. Wong <djwong(a)kernel.org>
I received a report from the release engineering side of the house that
xfs_scrub without the -n flag (aka fix it mode) would try to fix a
broken filesystem even on a kernel that doesn't have online repair built
into it:
# xfs_scrub -dTvn /mnt/test
EXPERIMENTAL xfs_scrub program in use! Use at your own risk!
Phase 1: Find filesystem geometry.
/mnt/test: using 1 threads to scrub.
Phase 1: Memory used: 132k/0k (108k/25k), time: 0.00/ 0.00/ 0.00s
<snip>
Phase 4: Repair filesystem.
<snip>
Info: /mnt/test/some/victimdir directory entries: Attempting repair. (repair.c line 351)
Corruption: /mnt/test/some/victimdir directory entries: Repair unsuccessful; offline repair required. (repair.c line 204)
Source: https://blogs.oracle.com/linux/post/xfs-online-filesystem-repair
It is strange that xfs_scrub doesn't refuse to run, because the kernel
is supposed to return EOPNOTSUPP if we actually needed to run a repair,
and xfs_io's repair subcommand will perror that. And yet:
# xfs_io -x -c 'repair probe' /mnt/test
#
The first problem is commit dcb660f9222fd9 (4.15) which should have had
xchk_probe set the CORRUPT OFLAG so that any of the repair machinery
will get called at all.
It turns out that some refactoring that happened in the 6.6-6.8 era
broke the operation of this corner case. What we *really* want to
happen is that all the predicates that would steer xfs_scrub_metadata()
towards calling xrep_attempt() should function the same way that they do
when repair is compiled in; and then xrep_attempt gets to return the
fatal EOPNOTSUPP error code that causes the probe to fail.
Instead, commit 8336a64eb75cba (6.6) started the failwhale swimming by
hoisting OFLAG checking logic into a helper whose non-repair stub always
returns false, causing scrub to return "repair not needed" when in fact
the repair is not supported. Prior to that commit, the oflag checking
that was open-coded in scrub.c worked correctly.
Similarly, in commit 4bdfd7d15747b1 (6.8) we hoisted the IFLAG_REPAIR
and ALREADY_FIXED logic into a helper whose non-repair stub always
returns false, so we never enter the if test body that would have called
xrep_attempt, let alone fail to decode the OFLAGs correctly.
The final insult (yes, we're doing The Naked Gun now) is commit
48a72f60861f79 (6.8) in which we hoisted the "are we going to try a
repair?" predicate into yet another function with a non-repair stub
always returns false.
So. Fix xchk_probe to trigger xrep_probe, then fix all the other helper
predicates in scrub.h so that we always point to xrep_attempt, even if
online repair is disabled. Commit 48a72 is tagged here because the
scrub code prior to LTS 6.12 are incomplete and not worth patching.
Reported-by: David Flynn <david.flynn(a)oracle.com>
Cc: <stable(a)vger.kernel.org> # v6.8
Fixes: 48a72f60861f79 ("xfs: don't complain about unfixed metadata when repairs were injected")
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
---
fs/xfs/scrub/common.h | 5 -----
fs/xfs/scrub/repair.h | 11 ++++++++++-
fs/xfs/scrub/scrub.c | 9 +++++++++
3 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index eecb6d54c0f953..53a6b34ce54271 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -212,7 +212,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
bool xchk_pptr_looks_zapped(struct xfs_inode *ip);
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/* Decide if a repair is required. */
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
{
@@ -232,10 +231,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
-#else
-# define xchk_needs_repair(sc) (false)
-# define xchk_could_repair(sc) (false)
-#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index b649da1a93eb8c..b3b1fe62814e7b 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -173,7 +173,16 @@ bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
#else
#define xrep_ino_dqattach(sc) (0)
-#define xrep_will_attempt(sc) (false)
+
+/*
+ * When online repair is not built into the kernel, we still want to attempt
+ * the repair so that the stub xrep_attempt below will return EOPNOTSUPP.
+ */
+static inline bool xrep_will_attempt(const struct xfs_scrub *sc)
+{
+ return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ xchk_needs_repair(sc->sm);
+}
static inline int
xrep_attempt(
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 950f5a58dcd967..09468f50781b24 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -149,6 +149,15 @@ xchk_probe(
if (xchk_should_terminate(sc, &error))
return error;
+ /*
+ * If the caller is probing to see if repair works, set the CORRUPT
+ * flag (without any of the usual tracing/logging) to force us into
+ * the repair codepaths. If repair is compiled into the kernel, we'll
+ * call xrep_probe and simulate a repair; otherwise, the repair
+ * codepaths return EOPNOTSUPP.
+ */
+ if (xchk_could_repair(sc))
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
return 0;
}
Currently, DWC3 driver attempts to acquire the USB power supply only
once during the probe. If the USB power supply is not ready at that
time, the driver simply ignores the failure and continues the probe,
leading to permanent non-functioning of the gadget vbus_draw callback.
Address this problem by delaying the dwc3 driver initialization until
the USB power supply is registered.
Fixes: 6f0764b5adea ("usb: dwc3: add a power supply for current control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kyle Tso <kyletso(a)google.com>
---
Note: This is a follow-up of https://lore.kernel.org/all/20240804084612.2561230-1-kyletso@google.com/
---
drivers/usb/dwc3/core.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 7578c5133568..1550c39e792a 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1669,7 +1669,7 @@ static void dwc3_get_software_properties(struct dwc3 *dwc)
}
}
-static void dwc3_get_properties(struct dwc3 *dwc)
+static int dwc3_get_properties(struct dwc3 *dwc)
{
struct device *dev = dwc->dev;
u8 lpm_nyet_threshold;
@@ -1724,7 +1724,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
if (!dwc->usb_psy)
- dev_err(dev, "couldn't get usb power supply\n");
+ return dev_err_probe(dev, -EPROBE_DEFER, "couldn't get usb power supply\n");
}
dwc->has_lpm_erratum = device_property_read_bool(dev,
@@ -1847,6 +1847,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
dwc->imod_interval = 0;
dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num;
+
+ return 0;
}
/* check whether the core supports IMOD */
@@ -2181,7 +2183,9 @@ static int dwc3_probe(struct platform_device *pdev)
dwc->regs = regs;
dwc->regs_size = resource_size(&dwc_res);
- dwc3_get_properties(dwc);
+ ret = dwc3_get_properties(dwc);
+ if (ret)
+ return ret;
dwc3_get_software_properties(dwc);
--
2.47.1.688.g23fc6f90ad-goog
It is observed sometimes when tethering is used over NCM with Windows 11
as host, at some instances, the gadget_giveback has one byte appended at
the end of a proper NTB. When the NTB is parsed, unwrap call looks for
any leftover bytes in SKB provided by u_ether and if there are any pending
bytes, it treats them as a separate NTB and parses it. But in case the
second NTB (as per unwrap call) is faulty/corrupt, all the datagrams that
were parsed properly in the first NTB and saved in rx_list are dropped.
Adding a few custom traces showed the following:
[002] d..1 7828.532866: dwc3_gadget_giveback: ep1out:
req 000000003868811a length 1025/16384 zsI ==> 0
[002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb toprocess: 1025
[002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342
[002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb seq: 0xce67
[002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x400
[002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb ndp_len: 0x10
[002] d..1 7828.532869: ncm_unwrap_ntb: K: Parsed NTB with 1 frames
In this case, the giveback is of 1025 bytes and block length is 1024.
The rest 1 byte (which is 0x00) won't be parsed resulting in drop of
all datagrams in rx_list.
Same is case with packets of size 2048:
[002] d..1 7828.557948: dwc3_gadget_giveback: ep1out:
req 0000000011dfd96e length 2049/16384 zsI ==> 0
[002] d..1 7828.557949: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342
[002] d..1 7828.557950: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x800
Lecroy shows one byte coming in extra confirming that the byte is coming
in from PC:
Transfer 2959 - Bytes Transferred(1025) Timestamp((18.524 843 590)
- Transaction 8391 - Data(1025 bytes) Timestamp(18.524 843 590)
--- Packet 4063861
Data(1024 bytes)
Duration(2.117us) Idle(14.700ns) Timestamp(18.524 843 590)
--- Packet 4063863
Data(1 byte)
Duration(66.160ns) Time(282.000ns) Timestamp(18.524 845 722)
According to Windows driver, no ZLP is needed if wBlockLength is non-zero,
because the non-zero wBlockLength has already told the function side the
size of transfer to be expected. However, there are in-market NCM devices
that rely on ZLP as long as the wBlockLength is multiple of wMaxPacketSize.
To deal with such devices, it pads an extra 0 at end so the transfer is no
longer multiple of wMaxPacketSize.
Cc: <stable(a)vger.kernel.org>
Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added")
Signed-off-by: Krishna Kurapati <quic_kriskura(a)quicinc.com>
---
Link to v2:
https://lore.kernel.org/all/20240131150332.1326523-1-quic_kriskura@quicinc.…
Changes in v2:
Added check to see if the padded byte is 0x00.
Changes in v3:
Removed wMaxPacketSize check from v2.
drivers/usb/gadget/function/f_ncm.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index ca5d5f564998..e2a059cfda2c 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1338,7 +1338,15 @@ static int ncm_unwrap_ntb(struct gether *port,
"Parsed NTB with %d frames\n", dgram_counter);
to_process -= block_len;
- if (to_process != 0) {
+
+ /*
+ * Windows NCM driver avoids USB ZLPs by adding a 1-byte
+ * zero pad as needed.
+ */
+ if (to_process == 1 &&
+ (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) {
+ to_process--;
+ } else if (to_process > 0) {
ntb_ptr = (unsigned char *)(ntb_ptr + block_len);
goto parse_ntb;
}
--
2.34.1
Last week, Jakub reported [1] that the MPTCP Connect selftest was
unstable. It looked like it started after the introduction of some fixes
[2]. After analysis from Paolo, these patches revealed existing bugs,
that should be fixed by the following patches.
- Patch 1: Make sure ACK are sent when MPTCP-level window re-opens. In
some corner cases, the other peer was not notified when more data
could be sent. A fix for v5.11, but depending on a feature introduced
in v5.19.
- Patch 2: Fix spurious wake-up under memory pressure. In this
situation, the userspace could be invited to read data not being there
yet. A fix for v6.7.
- Patch 3: Fix a false positive error when running the MPTCP Connect
selftest with the "disconnect" cases. The userspace could disconnect
the socket too soon, which would reset (MP_FASTCLOSE) the connection,
interpreted as an error by the test. A fix for v5.17.
Link: https://lore.kernel.org/20250107131845.5e5de3c5@kernel.org [1]
Link: https://lore.kernel.org/20241230-net-mptcp-rbuf-fixes-v1-0-8608af434ceb@ker… [2]
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Paolo Abeni (3):
mptcp: be sure to send ack when mptcp-level window re-opens
mptcp: fix spurious wake-up on under memory pressure
selftests: mptcp: avoid spurious errors on disconnect
net/mptcp/options.c | 6 ++--
net/mptcp/protocol.h | 9 +++--
tools/testing/selftests/net/mptcp/mptcp_connect.c | 43 +++++++++++++++++------
3 files changed, 43 insertions(+), 15 deletions(-)
---
base-commit: 76201b5979768500bca362871db66d77cb4c225e
change-id: 20250113-net-mptcp-connect-st-flakes-4af6389808de
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
The orig_a0 is missing in struct user_regs_struct of riscv, and there is
no way to add it without breaking UAPI. (See Link tag below)
Like NT_ARM_SYSTEM_CALL do, we add a new regset name NT_RISCV_ORIG_A0 to
access original a0 register from userspace via ptrace API.
Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Signed-off-by: Celeste Liu <uwu(a)coelacanthus.name>
---
Changes in v5:
- Fix wrong usage in selftests.
- Link to v4: https://lore.kernel.org/r/20241226-riscv-new-regset-v4-0-4496a29d0436@coela…
Changes in v4:
- Fix a copy paste error in selftest. (Forget to commit...)
- Link to v3: https://lore.kernel.org/r/20241226-riscv-new-regset-v3-0-f5b96465826b@coela…
Changes in v3:
- Use return 0 directly for readability.
- Fix test for modify a0.
- Add Fixes: tag
- Remove useless Cc: stable.
- Selftest will check both a0 and orig_a0, but depends on the
correctness of PTRACE_GET_SYSCALL_INFO.
- Link to v2: https://lore.kernel.org/r/20241203-riscv-new-regset-v2-0-d37da8c0cba6@coela…
Changes in v2:
- Fix integer width.
- Add selftest.
- Link to v1: https://lore.kernel.org/r/20241201-riscv-new-regset-v1-1-c83c58abcc7b@coela…
---
Celeste Liu (2):
riscv/ptrace: add new regset to access original a0 register
riscv: selftests: Add a ptrace test to verify a0 and orig_a0 access
arch/riscv/kernel/ptrace.c | 32 +++++
include/uapi/linux/elf.h | 1 +
tools/testing/selftests/riscv/abi/.gitignore | 1 +
tools/testing/selftests/riscv/abi/Makefile | 6 +-
tools/testing/selftests/riscv/abi/ptrace.c | 201 +++++++++++++++++++++++++++
5 files changed, 240 insertions(+), 1 deletion(-)
---
base-commit: 0e287d31b62bb53ad81d5e59778384a40f8b6f56
change-id: 20241201-riscv-new-regset-d529b952ad0d
Best regards,
--
Celeste Liu <uwu(a)coelacanthus.name>
Since commit 02fb4f008433 ("clk: clk-loongson2: Fix potential buffer
overflow in flexible-array member access"), the clk provider register is
failed.
The count of `clks_num` is shown below:
for (p = data; p->name; p++)
clks_num++;
In fact, `clks_num` represents the number of SoC clocks and should be
expressed as the maximum value of the clock binding id in use (p->id + 1).
Now we fix it to avoid the following error when trying to register a clk
provider:
[ 13.409595] of_clk_hw_onecell_get: invalid index 17
Cc: stable(a)vger.kernel.org
Cc: Gustavo A. R. Silva <gustavoars(a)kernel.org>
Fixes: 02fb4f008433 ("clk: clk-loongson2: Fix potential buffer overflow in flexible-array member access")
Signed-off-by: Binbin Zhou <zhoubinbin(a)loongson.cn>
---
drivers/clk/clk-loongson2.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/clk/clk-loongson2.c b/drivers/clk/clk-loongson2.c
index 6bf51d5a49a1..27e632edd484 100644
--- a/drivers/clk/clk-loongson2.c
+++ b/drivers/clk/clk-loongson2.c
@@ -294,7 +294,7 @@ static int loongson2_clk_probe(struct platform_device *pdev)
return -EINVAL;
for (p = data; p->name; p++)
- clks_num++;
+ clks_num = max(clks_num, p->id + 1);
clp = devm_kzalloc(dev, struct_size(clp, clk_data.hws, clks_num),
GFP_KERNEL);
@@ -309,6 +309,9 @@ static int loongson2_clk_probe(struct platform_device *pdev)
clp->clk_data.num = clks_num;
clp->dev = dev;
+ /* Avoid returning NULL for unused id */
+ memset_p((void **)clp->clk_data.hws, ERR_PTR(-ENOENT), clks_num);
+
for (i = 0; i < clks_num; i++) {
p = &data[i];
switch (p->type) {
--
2.43.5
When IORING_REGISTER_CLONE_BUFFERS is used to clone buffers from uring
instance A to uring instance B, where A and B use different MMs for
accounting, the accounting can go wrong:
If uring instance A is closed before uring instance B, the pinned memory
counters for uring instance B will be decremented, even though the pinned
memory was originally accounted through uring instance A; so the MM of
uring instance B can end up with negative locked memory.
Cc: stable(a)vger.kernel.org
Closes: https://lore.kernel.org/r/CAG48ez1zez4bdhmeGLEFxtbFADY4Czn3CV0u9d_TMcbvRA01…
Fixes: 7cc2a6eadcd7 ("io_uring: add IORING_REGISTER_COPY_BUFFERS method")
Signed-off-by: Jann Horn <jannh(a)google.com>
---
To be clear, I think this is a very minor issue, feel free to take your
time landing it.
I put a stable marker on this, but I'm ambivalent about whether this
issue even warrants landing a fix in stable - feel free to remove the
Cc stable marker if you think it's unnecessary.
---
io_uring/rsrc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 077f84684c18a0b3f5e622adb4978b6a00353b2f..caecc18dd5be03054ae46179bc0918887bf609a4 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -931,6 +931,13 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
int i, ret, off, nr;
unsigned int nbufs;
+ /*
+ * Accounting state is shared between the two rings; that only works if
+ * both rings are accounted towards the same counters.
+ */
+ if (ctx->user != src_ctx->user || ctx->mm_account != src_ctx->mm_account)
+ return -EINVAL;
+
/* if offsets are given, must have nr specified too */
if (!arg->nr && (arg->dst_off || arg->src_off))
return -EINVAL;
---
base-commit: c45323b7560ec87c37c729b703c86ee65f136d75
change-id: 20250114-uring-check-accounting-4356f8b91c37
--
Jann Horn <jannh(a)google.com>
From: Zijun Hu <quic_zijuhu(a)quicinc.com>
For WCN6855, board ID specific NVM needs to be downloaded once board ID
is available, but the default NVM is always downloaded currently.
The wrong NVM causes poor RF performance, and effects user experience
for several types of laptop with WCN6855 on the market.
Fix by downloading board ID specific NVM if board ID is available.
Fixes: 095327fede00 ("Bluetooth: hci_qca: Add support for QTI Bluetooth chip wcn6855")
Cc: stable(a)vger.kernel.org # 6.4
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
Changes in v3:
- Rework over tip of bluetooth-next tree.
- Remove both Reviewed-by and Tested-by tags.
- Link to v2: https://lore.kernel.org/r/20241116-x13s_wcn6855_fix-v2-1-c08c298d5fbf@quici…
Changes in v2:
- Correct subject and commit message
- Temporarily add nvm fallback logic to speed up backport.
- Add fix/stable tags as suggested by Luiz and Johan
- Link to v1: https://lore.kernel.org/r/20241113-x13s_wcn6855_fix-v1-1-15af0aa2549c@quici…
---
drivers/bluetooth/btqca.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index a6b53d1f23dbd4666b93e10635f5f154f38d80a5..cdf09d9a9ad27c080f27c5fe8d61d76085e1fd2c 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -909,8 +909,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
"qca/msnv%02x.bin", rom_ver);
break;
case QCA_WCN6855:
- snprintf(config.fwname, sizeof(config.fwname),
- "qca/hpnv%02x.bin", rom_ver);
+ qca_read_fw_board_id(hdev, &boardid);
+ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname),
+ "hpnv", soc_type, ver, rom_ver, boardid);
break;
case QCA_WCN7850:
qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname),
---
base-commit: a723753d039fd9a6c5998340ac65f4d9e2966ba8
change-id: 20250113-wcn6855_fix-036ca2fa5559
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
Commit 9604eea5bd3a ("scsi: st: Add third party poweron reset handling")
in v6.6 added new code to handle the Power On/Reset Unit Attention
(POR UA) sense data. This was in addition to the existing method. When
this Unit Attention is received, the driver blocks attempts to read,
write and some other operations because the reset may have rewinded
the tape. Because of the added code, also the initial POR UA resulted
in blocking operations, including those that are used to set the driver
options after the device is recognized. Also, reading and writing are
refused, whereas they succeeded before this commit.
This patch adds code to not set pos_unknown to block operations if the
POR UA is received from the first test_ready() call after the st device
has been created. This restores the behavior before v6.6.
Signed-off-by: Kai Mäkisara <Kai.Makisara(a)kolumbus.fi>
Fixes: 9604eea5bd3a ("scsi: st: Add third party poweron reset handling")
Closes: https://lore.kernel.org/linux-scsi/2201CF73-4795-4D3B-9A79-6EE5215CF58D@kol…
CC: stable(a)vger.kernel.org
---
drivers/scsi/st.c | 6 ++++++
drivers/scsi/st.h | 1 +
2 files changed, 7 insertions(+)
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e8ef27d7ef61..ebbd50ec0cda 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -1030,6 +1030,11 @@ static int test_ready(struct scsi_tape *STp, int do_wait)
retval = new_session ? CHKRES_NEW_SESSION : CHKRES_READY;
break;
}
+ if (STp->first_tur) {
+ /* Don't set pos_unknown right after device recognition */
+ STp->pos_unknown = 0;
+ STp->first_tur = 0;
+ }
if (SRpnt != NULL)
st_release_request(SRpnt);
@@ -4328,6 +4333,7 @@ static int st_probe(struct device *dev)
blk_queue_rq_timeout(tpnt->device->request_queue, ST_TIMEOUT);
tpnt->long_timeout = ST_LONG_TIMEOUT;
tpnt->try_dio = try_direct_io;
+ tpnt->first_tur = 1;
for (i = 0; i < ST_NBR_MODES; i++) {
STm = &(tpnt->modes[i]);
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
index 7a68eaba7e81..1aaaf5369a40 100644
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -170,6 +170,7 @@ struct scsi_tape {
unsigned char rew_at_close; /* rewind necessary at close */
unsigned char inited;
unsigned char cleaning_req; /* cleaning requested? */
+ unsigned char first_tur; /* first TEST UNIT READY */
int block_size;
int min_block;
int max_block;
--
2.43.0
Currently the rseq constructor, rseq_init(), assumes that glibc always
has the support for rseq symbols (__rseq_size for instance). However,
glibc supports rseq from version 2.35 onwards. As a result, for the
systems that run glibc less than 2.35, the global rseq_size remains
initialized to -1U. When a thread then tries to register for rseq,
get_rseq_min_alloc_size() would end up returning -1U, which is
incorrect. Hence, initialize rseq_size for the cases where glibc doesn't
have the support for rseq symbols.
Cc: stable(a)vger.kernel.org
Fixes: 73a4f5a704a2 ("selftests/rseq: Fix mm_cid test failure")
Signed-off-by: Raghavendra Rao Ananta <rananta(a)google.com>
---
tools/testing/selftests/rseq/rseq.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 5b9772cdf265..9eb5356f25fa 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -142,6 +142,16 @@ unsigned int get_rseq_kernel_feature_size(void)
return ORIG_RSEQ_FEATURE_SIZE;
}
+static void set_default_rseq_size(void)
+{
+ unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size();
+
+ if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE)
+ rseq_size = rseq_kernel_feature_size;
+ else
+ rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+}
+
int rseq_register_current_thread(void)
{
int rc;
@@ -219,12 +229,7 @@ void rseq_init(void)
fallthrough;
case ORIG_RSEQ_ALLOC_SIZE:
{
- unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size();
-
- if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE)
- rseq_size = rseq_kernel_feature_size;
- else
- rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+ set_default_rseq_size();
break;
}
default:
@@ -239,8 +244,10 @@ void rseq_init(void)
rseq_size = 0;
return;
}
+
rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
rseq_flags = 0;
+ set_default_rseq_size();
}
static __attribute__((destructor))
base-commit: 40384c840ea1944d7c5a392e8975ed088ecf0b37
--
2.47.0.338.g60cca15819-goog
[ upstream commit bd2703b42decebdcddf76e277ba76b4c4a142d73 ]
With IORING_SETUP_SQPOLL all requests are created by the SQPOLL task,
which means that req->task should always match sqd->thread. Since
accesses to sqd->thread should be separately protected, use req->task
in io_req_normal_work_add() instead.
Note, in the eyes of io_req_normal_work_add(), the SQPOLL task struct
is always pinned and alive, and sqd->thread can either be the task or
NULL. It's only problematic if the compiler decides to reload the value
after the null check, which is not so likely.
Cc: stable(a)vger.kernel.org
Cc: Bui Quang Minh <minhquangbui99(a)gmail.com>
Reported-by: lizetao <lizetao1(a)huawei.com>
Fixes: 78f9b61bd8e54 ("io_uring: wake SQPOLL task when task_work is added to an empty queue")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/1cbbe72cf32c45a8fee96026463024cd8564a7d7.17365413…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
io_uring/io_uring.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9849da128364..21f1bcba2f52 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1244,10 +1244,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
/* SQPOLL doesn't need the task_work added, it'll run it itself */
if (ctx->flags & IORING_SETUP_SQPOLL) {
- struct io_sq_data *sqd = ctx->sq_data;
-
- if (sqd->thread)
- __set_notify_signal(sqd->thread);
+ __set_notify_signal(req->task);
return;
}
--
2.47.1
Currently memremap(MEMREMAP_WB) can produce decrypted/shared mapping:
memremap(MEMREMAP_WB)
arch_memremap_wb()
ioremap_cache()
__ioremap_caller(.encrytped = false)
In such cases, the IORES_MAP_ENCRYPTED flag on the memory will determine
if the resulting mapping is encrypted or decrypted.
Creating a decrypted mapping without explicit request from the caller is
risky:
- It can inadvertently expose the guest's data and compromise the
guest.
- Accessing private memory via shared/decrypted mapping on TDX will
either trigger implicit conversion to shared or #VE (depending on
VMM implementation).
Implicit conversion is destructive: subsequent access to the same
memory via private mapping will trigger a hard-to-debug #VE crash.
The kernel already provides a way to request decrypted mapping
explicitly via the MEMREMAP_DEC flag.
Modify memremap(MEMREMAP_WB) to produce encrypted/private mapping by
default unless MEMREMAP_DEC is specified.
Fix the crash due to #VE on kexec in TDX guests if CONFIG_EISA is enabled.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # 6.11+
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: Ashish Kalra <ashish.kalra(a)amd.com>
Cc: "Maciej W. Rozycki" <macro(a)orcam.me.uk>
---
arch/x86/include/asm/io.h | 3 +++
arch/x86/mm/ioremap.c | 8 ++++++++
2 files changed, 11 insertions(+)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ed580c7f9d0a..1a0dc2b2bf5b 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -175,6 +175,9 @@ extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, un
extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
#define ioremap_encrypted ioremap_encrypted
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
+#define arch_memremap_wb arch_memremap_wb
+
/**
* ioremap - map bus memory into CPU space
* @offset: bus address of the memory
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8d29163568a7..3c36f3f5e688 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -503,6 +503,14 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
+{
+ if (flags & MEMREMAP_DEC)
+ return (void __force *)ioremap_cache(phys_addr, size);
+
+ return (void __force *)ioremap_encrypted(phys_addr, size);
+}
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
--
2.45.2
On SoCFPGA/Sodia board, mdio bus cannot be probed, so the PHY cannot be
found and the network device does not work.
```
stmmaceth ff702000.ethernet eth0: __stmmac_open: Cannot attach to PHY (error: -19)
```
To probe the mdio bus, add "snps,dwmac-mdio" as compatible string of the
mdio bus. Also the PHY address connected to this board is 4. Therefore,
change to 4.
Cc: stable(a)vger.kernel.org # 6.3+
Signed-off-by: Nobuhiro Iwamatsu <iwamatsu(a)nigauri.org>
---
v2: Update commit message from 'ID' to 'address'.
Drop Fixes tag, because that commit is not the cause.
arch/arm/boot/dts/intel/socfpga/socfpga_cyclone5_sodia.dts | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/arm/boot/dts/intel/socfpga/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/intel/socfpga/socfpga_cyclone5_sodia.dts
index ce0d6514eeb571..e4794ccb8e413f 100644
--- a/arch/arm/boot/dts/intel/socfpga/socfpga_cyclone5_sodia.dts
+++ b/arch/arm/boot/dts/intel/socfpga/socfpga_cyclone5_sodia.dts
@@ -66,8 +66,10 @@ &gmac1 {
mdio0 {
#address-cells = <1>;
#size-cells = <0>;
- phy0: ethernet-phy@0 {
- reg = <0>;
+ compatible = "snps,dwmac-mdio";
+
+ phy0: ethernet-phy@4 {
+ reg = <4>;
rxd0-skew-ps = <0>;
rxd1-skew-ps = <0>;
rxd2-skew-ps = <0>;
--
2.45.2
Since commit 02fb4f008433 ("clk: clk-loongson2: Fix potential buffer
overflow in flexible-array member access"), the clk provider register is
failed.
The count of `clks_num` is shown below:
for (p = data; p->name; p++)
clks_num++;
In fact, `clks_num` represents the number of SoC clocks and should be
expressed as the maximum value of the clock binding id in use (p->id + 1).
Now we fix it to avoid the following error when trying to register a clk
provider:
[ 13.409595] of_clk_hw_onecell_get: invalid index 17
Cc: stable(a)vger.kernel.org
Cc: Gustavo A. R. Silva <gustavoars(a)kernel.org>
Fixes: 02fb4f008433 ("clk: clk-loongson2: Fix potential buffer overflow in flexible-array member access")
Signed-off-by: Binbin Zhou <zhoubinbin(a)loongson.cn>
---
V2:
- Add Gustavo A. R. Silva to cc list;
- Populate the onecell data with -ENOENT error pointers to avoid
returning NULL, for it is a valid clock.
Link to V1:
https://lore.kernel.org/all/20241225060600.3094154-1-zhoubinbin@loongson.cn/
drivers/clk/clk-loongson2.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/clk/clk-loongson2.c b/drivers/clk/clk-loongson2.c
index 6bf51d5a49a1..9c240a2308f5 100644
--- a/drivers/clk/clk-loongson2.c
+++ b/drivers/clk/clk-loongson2.c
@@ -294,7 +294,7 @@ static int loongson2_clk_probe(struct platform_device *pdev)
return -EINVAL;
for (p = data; p->name; p++)
- clks_num++;
+ clks_num = max(clks_num, p->id + 1);
clp = devm_kzalloc(dev, struct_size(clp, clk_data.hws, clks_num),
GFP_KERNEL);
@@ -309,6 +309,9 @@ static int loongson2_clk_probe(struct platform_device *pdev)
clp->clk_data.num = clks_num;
clp->dev = dev;
+ /* Avoid returning NULL for unused id */
+ memset_p((void **)&clp->clk_data.hws, ERR_PTR(-ENOENT), clks_num);
+
for (i = 0; i < clks_num; i++) {
p = &data[i];
switch (p->type) {
--
2.43.5
Hi, I'm experiencing UBSAN array-index-out-of-bounds errors while using
my Framework 13" AMD laptop with its Mediatek MT7922 wifi adapter
(mt7921e).
It seems to happen only once on boot, and occurs with both kernel
versions 6.12.7 and 6.13-rc4, both compiled from vanilla upstream kernel
sources on Fedora 41 using the kernel.org LLVM toolchain (19.1.6).
I can try some other kernel series if necessary, and also a bisect if I
find a working version, but that may take me a while.
I wasn't sure if I should mark this as a regression, as I'm not sure
which/if there is a working kernel version at this point.
Thanks.
----
[ 17.754417] UBSAN: array-index-out-of-bounds in /data/linux/net/wireless/scan.c:766:2
[ 17.754423] index 0 is out of range for type 'struct ieee80211_channel *[] __counted_by(n_channels)' (aka 'struct ieee80211_channel *[]')
[ 17.754427] CPU: 13 UID: 0 PID: 620 Comm: kworker/u64:10 Tainted: G T 6.13.0-rc4 #9
[ 17.754433] Tainted: [T]=RANDSTRUCT
[ 17.754435] Hardware name: Framework Laptop 13 (AMD Ryzen 7040Series)/FRANMDCP07, BIOS 03.05 03/29/2024
[ 17.754438] Workqueue: events_unbound cfg80211_wiphy_work
[ 17.754446] Call Trace:
[ 17.754449] <TASK>
[ 17.754452] dump_stack_lvl+0x82/0xc0
[ 17.754459] __ubsan_handle_out_of_bounds+0xe7/0x110
[ 17.754464] ? srso_alias_return_thunk+0x5/0xfbef5
[ 17.754470] ? __kmalloc_noprof+0x1a7/0x280
[ 17.754477] cfg80211_scan_6ghz+0x3bb/0xfd0
[ 17.754482] ? srso_alias_return_thunk+0x5/0xfbef5
[ 17.754486] ? try_to_wake_up+0x368/0x4c0
[ 17.754491] ? try_to_wake_up+0x1a9/0x4c0
[ 17.754496] ___cfg80211_scan_done+0xa9/0x1e0
[ 17.754500] cfg80211_wiphy_work+0xb7/0xe0
[ 17.754504] process_scheduled_works+0x205/0x3a0
[ 17.754509] worker_thread+0x24a/0x300
[ 17.754514] ? __cfi_worker_thread+0x10/0x10
[ 17.754519] kthread+0x158/0x180
[ 17.754524] ? __cfi_kthread+0x10/0x10
[ 17.754528] ret_from_fork+0x40/0x50
[ 17.754534] ? __cfi_kthread+0x10/0x10
[ 17.754538] ret_from_fork_asm+0x11/0x30
[ 17.754544] </TASK>
On Tue, Jan 14, 2025 at 10:47:33AM +0100, Johan Hovold wrote:
> On Mon, Jan 13, 2025 at 06:00:34PM +0000, Qasim Ijaz wrote:
> > This patch addresses a null-ptr-deref in qt2_process_read_urb() due to
> > an incorrect bounds check in the following:
> >
> > if (newport > serial->num_ports) {
> > dev_err(&port->dev,
> > "%s - port change to invalid port: %i\n",
> > __func__, newport);
> > break;
> > }
> >
> > The condition doesn't account for the valid range of the serial->port
> > buffer, which is from 0 to serial->num_ports - 1. When newport is equal
> > to serial->num_ports, the assignment of "port" in the
> > following code is out-of-bounds and NULL:
> >
> > serial_priv->current_port = newport;
> > port = serial->port[serial_priv->current_port];
> >
> > The fix checks if newport is greater than or equal to serial->num_ports
> > indicating it is out-of-bounds.
> >
> > Reported-by: syzbot <syzbot+506479ebf12fe435d01a(a)syzkaller.appspotmail.com>
> > Closes: https://syzkaller.appspot.com/bug?extid=506479ebf12fe435d01a
> > Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver")
> > Cc: <stable(a)vger.kernel.org> # 3.5
> > Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
> > ---
>
> Thanks for the update. I've applied the patch now after adding Greg's
> Reviewed-by tag (for v2).
>
> For your future contributions, try to remember to include any
> Reviewed-by or Tested-by tags when updating the patch unless the changes
> are non-trivial.
>
> There should typically also be a short change log here under the ---
> line to indicate what changes from previous versions.
>
> It is also encouraged to write the commit message in imperative mood
> (add, change, fix) and to avoid the phrase "this patch". There are some
> more details in
>
> Documentation/process/submitting-patches.rst
>
> Something to keep in mind for the future, but this patch already looks
> really good.
>
> Johan
Hi Johan,
Thanks for reviewing and applying the patch. I appreciate the guidance on patch style and process, and I'll incorporate your suggestions in future submissions.
Best regards,
Qasim
Hi Carlos,
Please pull this branch with changes for xfs.
As usual, I did a test-merge with the main upstream branch as of a few
minutes ago, and didn't see any conflicts. Please let me know if you
encounter any problems.
--D
The following changes since commit 05290bd5c6236b8ad659157edb36bd2d38f46d3e:
xfs: allow inode-based btrees to reserve space in the data device (2024-12-23 13:06:03 -0800)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git tags/realtime-rmap_2024-12-23
for you to fetch changes up to c2358439af374cad47f771797875d0beb8256738:
xfs: enable realtime rmap btree (2024-12-23 13:06:09 -0800)
----------------------------------------------------------------
xfs: realtime reverse-mapping support [v6.2 04/14]
This is the latest revision of a patchset that adds to XFS kernel
support for reverse mapping for the realtime device. This time around
I've fixed some of the bitrot that I've noticed over the past few
months, and most notably have converted rtrmapbt to use the metadata
inode directory feature instead of burning more space in the superblock.
At the beginning of the set are patches to implement storing B+tree
leaves in an inode root, since the realtime rmapbt is rooted in an
inode, unlike the regular rmapbt which is rooted in an AG block.
Prior to this, the only btree that could be rooted in the inode fork
was the block mapping btree; if all the extent records fit in the
inode, format would be switched from 'btree' to 'extents'.
The next few patches enhance the reverse mapping routines to handle
the parts that are specific to rtgroups -- adding the new btree type,
adding a new log intent item type, and wiring up the metadata directory
tree entries.
Finally, implement GETFSMAP with the rtrmapbt and scrub functionality
for the rtrmapbt and rtbitmap and online fsck functionality.
This has been running on the djcloud for months with no problems. Enjoy!
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
----------------------------------------------------------------
Darrick J. Wong (37):
xfs: add some rtgroup inode helpers
xfs: prepare rmap btree cursor tracepoints for realtime
xfs: simplify the xfs_rmap_{alloc,free}_extent calling conventions
xfs: introduce realtime rmap btree ondisk definitions
xfs: realtime rmap btree transaction reservations
xfs: add realtime rmap btree operations
xfs: prepare rmap functions to deal with rtrmapbt
xfs: add a realtime flag to the rmap update log redo items
xfs: support recovering rmap intent items targetting realtime extents
xfs: pretty print metadata file types in error messages
xfs: support file data forks containing metadata btrees
xfs: add realtime reverse map inode to metadata directory
xfs: add metadata reservations for realtime rmap btrees
xfs: wire up a new metafile type for the realtime rmap
xfs: wire up rmap map and unmap to the realtime rmapbt
xfs: create routine to allocate and initialize a realtime rmap btree inode
xfs: wire up getfsmap to the realtime reverse mapping btree
xfs: check that the rtrmapbt maxlevels doesn't increase when growing fs
xfs: report realtime rmap btree corruption errors to the health system
xfs: allow queued realtime intents to drain before scrubbing
xfs: scrub the realtime rmapbt
xfs: cross-reference realtime bitmap to realtime rmapbt scrubber
xfs: cross-reference the realtime rmapbt
xfs: scan rt rmap when we're doing an intense rmap check of bmbt mappings
xfs: scrub the metadir path of rt rmap btree files
xfs: walk the rt reverse mapping tree when rebuilding rmap
xfs: online repair of realtime file bmaps
xfs: repair inodes that have realtime extents
xfs: repair rmap btree inodes
xfs: online repair of realtime bitmaps for a realtime group
xfs: support repairing metadata btrees rooted in metadir inodes
xfs: online repair of the realtime rmap btree
xfs: create a shadow rmap btree during realtime rmap repair
xfs: hook live realtime rmap operations during a repair operation
xfs: don't shut down the filesystem for media failures beyond end of log
xfs: react to fsdax failure notifications on the rt device
xfs: enable realtime rmap btree
fs/xfs/Makefile | 3 +
fs/xfs/libxfs/xfs_btree.c | 73 +++
fs/xfs/libxfs/xfs_btree.h | 8 +-
fs/xfs/libxfs/xfs_btree_mem.c | 1 +
fs/xfs/libxfs/xfs_btree_staging.c | 1 +
fs/xfs/libxfs/xfs_defer.h | 1 +
fs/xfs/libxfs/xfs_exchmaps.c | 4 +-
fs/xfs/libxfs/xfs_format.h | 28 +-
fs/xfs/libxfs/xfs_fs.h | 7 +-
fs/xfs/libxfs/xfs_health.h | 4 +-
fs/xfs/libxfs/xfs_inode_buf.c | 32 +-
fs/xfs/libxfs/xfs_inode_fork.c | 25 +
fs/xfs/libxfs/xfs_log_format.h | 6 +-
fs/xfs/libxfs/xfs_log_recover.h | 2 +
fs/xfs/libxfs/xfs_metafile.c | 18 +
fs/xfs/libxfs/xfs_metafile.h | 2 +
fs/xfs/libxfs/xfs_ondisk.h | 2 +
fs/xfs/libxfs/xfs_refcount.c | 6 +-
fs/xfs/libxfs/xfs_rmap.c | 171 +++++--
fs/xfs/libxfs/xfs_rmap.h | 12 +-
fs/xfs/libxfs/xfs_rtbitmap.c | 2 +-
fs/xfs/libxfs/xfs_rtbitmap.h | 9 +
fs/xfs/libxfs/xfs_rtgroup.c | 53 +-
fs/xfs/libxfs/xfs_rtgroup.h | 49 +-
fs/xfs/libxfs/xfs_rtrmap_btree.c | 1011 +++++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_rtrmap_btree.h | 210 ++++++++
fs/xfs/libxfs/xfs_sb.c | 6 +
fs/xfs/libxfs/xfs_shared.h | 14 +
fs/xfs/libxfs/xfs_trans_resv.c | 12 +-
fs/xfs/libxfs/xfs_trans_space.h | 13 +
fs/xfs/scrub/alloc_repair.c | 5 +-
fs/xfs/scrub/bmap.c | 108 +++-
fs/xfs/scrub/bmap_repair.c | 129 ++++-
fs/xfs/scrub/common.c | 160 +++++-
fs/xfs/scrub/common.h | 23 +-
fs/xfs/scrub/health.c | 1 +
fs/xfs/scrub/inode.c | 10 +-
fs/xfs/scrub/inode_repair.c | 136 ++++-
fs/xfs/scrub/metapath.c | 3 +
fs/xfs/scrub/newbt.c | 42 ++
fs/xfs/scrub/newbt.h | 1 +
fs/xfs/scrub/reap.c | 41 ++
fs/xfs/scrub/reap.h | 2 +
fs/xfs/scrub/repair.c | 191 +++++++
fs/xfs/scrub/repair.h | 17 +
fs/xfs/scrub/rgsuper.c | 6 +-
fs/xfs/scrub/rmap_repair.c | 84 ++-
fs/xfs/scrub/rtbitmap.c | 75 ++-
fs/xfs/scrub/rtbitmap.h | 55 ++
fs/xfs/scrub/rtbitmap_repair.c | 429 +++++++++++++++-
fs/xfs/scrub/rtrmap.c | 271 ++++++++++
fs/xfs/scrub/rtrmap_repair.c | 903 +++++++++++++++++++++++++++++++++
fs/xfs/scrub/rtsummary.c | 17 +-
fs/xfs/scrub/rtsummary_repair.c | 3 +-
fs/xfs/scrub/scrub.c | 11 +-
fs/xfs/scrub/scrub.h | 14 +
fs/xfs/scrub/stats.c | 1 +
fs/xfs/scrub/tempexch.h | 2 +-
fs/xfs/scrub/tempfile.c | 20 +-
fs/xfs/scrub/trace.c | 1 +
fs/xfs/scrub/trace.h | 228 ++++++++-
fs/xfs/xfs_buf.c | 1 +
fs/xfs/xfs_buf_item_recover.c | 4 +
fs/xfs/xfs_drain.c | 20 +-
fs/xfs/xfs_drain.h | 7 +-
fs/xfs/xfs_fsmap.c | 174 ++++++-
fs/xfs/xfs_fsops.c | 11 +
fs/xfs/xfs_health.c | 1 +
fs/xfs/xfs_inode.c | 19 +-
fs/xfs/xfs_inode_item.c | 2 +
fs/xfs/xfs_inode_item_recover.c | 44 +-
fs/xfs/xfs_log_recover.c | 2 +
fs/xfs/xfs_mount.c | 5 +-
fs/xfs/xfs_mount.h | 9 +
fs/xfs/xfs_notify_failure.c | 230 +++++----
fs/xfs/xfs_notify_failure.h | 11 +
fs/xfs/xfs_qm.c | 8 +-
fs/xfs/xfs_rmap_item.c | 216 +++++++-
fs/xfs/xfs_rtalloc.c | 82 ++-
fs/xfs/xfs_rtalloc.h | 10 +
fs/xfs/xfs_stats.c | 4 +-
fs/xfs/xfs_stats.h | 2 +
fs/xfs/xfs_super.c | 6 -
fs/xfs/xfs_super.h | 1 -
fs/xfs/xfs_trace.h | 104 ++--
85 files changed, 5381 insertions(+), 366 deletions(-)
create mode 100644 fs/xfs/libxfs/xfs_rtrmap_btree.c
create mode 100644 fs/xfs/libxfs/xfs_rtrmap_btree.h
create mode 100644 fs/xfs/scrub/rtrmap.c
create mode 100644 fs/xfs/scrub/rtrmap_repair.c
create mode 100644 fs/xfs/xfs_notify_failure.h
Hi Carlos,
Please pull this branch with changes for xfs.
As usual, I did a test-merge with the main upstream branch as of a few
minutes ago, and didn't see any conflicts. Please let me know if you
encounter any problems.
--D
The following changes since commit 4bbf9020becbfd8fc2c3da790855b7042fad455b:
Linux 6.13-rc4 (2024-12-22 13:22:21 -0800)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git tags/xfs-6.13-fixes_2024-12-23
for you to fetch changes up to 1aacd3fac248902ea1f7607f2d12b93929a4833b:
xfs: release the dquot buf outside of qli_lock (2024-12-23 13:06:01 -0800)
----------------------------------------------------------------
xfs: bug fixes for 6.13 [01/14]
Bug fixes for 6.13.
This has been running on the djcloud for months with no problems. Enjoy!
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
----------------------------------------------------------------
Darrick J. Wong (2):
xfs: don't over-report free space or inodes in statvfs
xfs: release the dquot buf outside of qli_lock
fs/xfs/xfs_dquot.c | 12 ++++++++----
fs/xfs/xfs_qm_bhv.c | 27 +++++++++++++++++----------
2 files changed, 25 insertions(+), 14 deletions(-)
This patch addresses a null-ptr-deref in qt2_process_read_urb() due to
an incorrect bounds check in the following:
if (newport > serial->num_ports) {
dev_err(&port->dev,
"%s - port change to invalid port: %i\n",
__func__, newport);
break;
}
The condition doesn't account for the valid range of the serial->port
buffer, which is from 0 to serial->num_ports - 1. When newport is equal
to serial->num_ports, the assignment of "port" in the
following code is out-of-bounds and NULL:
serial_priv->current_port = newport;
port = serial->port[serial_priv->current_port];
The fix checks if newport is greater than or equal to serial->num_ports
indicating it is out-of-bounds.
Reported-by: syzbot <syzbot+506479ebf12fe435d01a(a)syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=506479ebf12fe435d01a
Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver")
Cc: <stable(a)vger.kernel.org> # 3.5
Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
---
drivers/usb/serial/quatech2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index a317bdbd00ad..72fe83a6c978 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -503,7 +503,7 @@ static void qt2_process_read_urb(struct urb *urb)
newport = *(ch + 3);
- if (newport > serial->num_ports) {
+ if (newport >= serial->num_ports) {
dev_err(&port->dev,
"%s - port change to invalid port: %i\n",
__func__, newport);
--
2.39.5
This is a resend of the patchset discussed here[1] for the 5.15 tree.
[1] https://lore.kernel.org/r/2025011052-backpedal-coat-2fec@gregkh
I've picked the "do not keep dangling zcomp pointer" patch from the
linux-rc tree at the time, so kept Sasha's SOB and added mine on top
-- please let me know if it wasn't appropriate.
I've also prepared the 5.10 patches, but I hadn't realized there were so
many stable deps (8 patchs total!); I'm honestly not sure the problem is
worth the churn but since it's done and tested I'll send the patches if
there is no problem with this 5.15 version.
Thanks!
Dominique Martinet (1):
zram: check comp is non-NULL before calling comp_destroy
Kairui Song (1):
zram: fix uninitialized ZRAM not releasing backing device
Sergey Senozhatsky (1):
drivers/block/zram/zram_drv.c: do not keep dangling zcomp pointer
after zram reset
drivers/block/zram/zram_drv.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
--
2.39.5
The quilt patch titled
Subject: mm/hugetlb: fix avoid_reserve to allow taking folio from subpool
has been removed from the -mm tree. Its filename was
mm-hugetlb-fix-avoid_reserve-to-allow-taking-folio-from-subpool.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/hugetlb: fix avoid_reserve to allow taking folio from subpool
Date: Tue, 7 Jan 2025 15:39:56 -0500
Patch series "mm/hugetlb: Refactor hugetlb allocation resv accounting",
v2.
This is a follow up on Ackerley's series here as replacement:
https://lore.kernel.org/r/cover.1728684491.git.ackerleytng@google.com
The goal of this series is to cleanup hugetlb resv accounting, especially
during folio allocation, to decouple a few things:
- Hugetlb folios v.s. Hugetlbfs: IOW, the hope is in the future hugetlb
folios can be allocated completely without hugetlbfs.
- Decouple VMA v.s. hugetlb folio allocations: allocating a hugetlb folio
should not always require a hugetlbfs VMA. For example, either it got
allocated from the inode level (see hugetlbfs_fallocate() where it used
a pesudo VMA for allocation), or it can be allocated by other kernel
subsystems.
It paves way for other users to allocate hugetlb folios out of either
system reservations, or subpools (instead of hugetlbfs, as a file system).
For longer term, this prepares hugetlb as a separate concept versus
hugetlbfs, so that hugetlb folios can be allocated by not only hugetlbfs
and other things.
Tests I've done:
- I had a reproducer in patch 1 for the bug I found, this will start to
work after patch 1 or the whole set applied.
- Hugetlb regression tests (on x86_64 2MBs), includes:
- All vmtests on hugetlbfs
- libhugetlbfs test suite (which may fail some tests, but no new failures
will be introduced by this series, so all such failures happen before
this series so shouldn't be relevant).
This patch (of 7):
Since commit 04f2cbe35699 ("hugetlb: guarantee that COW faults for a
process that called mmap(MAP_PRIVATE) on hugetlbfs will succeed"),
avoid_reserve was introduced for a special case of CoW on hugetlb private
mappings, and only if the owner VMA is trying to allocate yet another
hugetlb folio that is not reserved within the private vma reserved map.
Later on, in commit d85f69b0b533 ("mm/hugetlb: alloc_huge_page handle
areas hole punched by fallocate"), alloc_huge_page() enforced to not
consume any global reservation as long as avoid_reserve=true. This
operation doesn't look correct, because even if it will enforce the
allocation to not use global reservation at all, it will still try to take
one reservation from the spool (if the subpool existed). Then since the
spool reserved pages take from global reservation, it'll also take one
reservation globally.
Logically it can cause global reservation to go wrong.
I wrote a reproducer below, trigger this special path, and every run of
such program will cause global reservation count to increment by one, until
it hits the number of free pages:
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#define MSIZE (2UL << 20)
int main(int argc, char *argv[])
{
const char *path;
int *buf;
int fd, ret;
pid_t child;
if (argc < 2) {
printf("usage: %s <hugetlb_file>\n", argv[0]);
return -1;
}
path = argv[1];
fd = open(path, O_RDWR | O_CREAT, 0666);
if (fd < 0) {
perror("open failed");
return -1;
}
ret = fallocate(fd, 0, 0, MSIZE);
if (ret != 0) {
perror("fallocate");
return -1;
}
buf = mmap(NULL, MSIZE, PROT_READ|PROT_WRITE,
MAP_PRIVATE, fd, 0);
if (buf == MAP_FAILED) {
perror("mmap() failed");
return -1;
}
/* Allocate a page */
*buf = 1;
child = fork();
if (child == 0) {
/* child doesn't need to do anything */
exit(0);
}
/* Trigger CoW from owner */
*buf = 2;
munmap(buf, MSIZE);
close(fd);
unlink(path);
return 0;
}
It can only reproduce with a sub-mount when there're reserved pages on the
spool, like:
# sysctl vm.nr_hugepages=128
# mkdir ./hugetlb-pool
# mount -t hugetlbfs -o min_size=8M,pagesize=2M none ./hugetlb-pool
Then run the reproducer on the mountpoint:
# ./reproducer ./hugetlb-pool/test
Fix it by taking the reservation from spool if available. In general,
avoid_reserve is IMHO more about "avoid vma resv map", not spool's.
I copied stable, however I have no intention for backporting if it's not a
clean cherry-pick, because private hugetlb mapping, and then fork() on top
is too rare to hit.
Link: https://lkml.kernel.org/r/20250107204002.2683356-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20250107204002.2683356-2-peterx@redhat.com
Fixes: d85f69b0b533 ("mm/hugetlb: alloc_huge_page handle areas hole punched by fallocate")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reviewed-by: Ackerley Tng <ackerleytng(a)google.com>
Tested-by: Ackerley Tng <ackerleytng(a)google.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 22 +++-------------------
1 file changed, 3 insertions(+), 19 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-fix-avoid_reserve-to-allow-taking-folio-from-subpool
+++ a/mm/hugetlb.c
@@ -1398,8 +1398,7 @@ static unsigned long available_huge_page
static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h,
struct vm_area_struct *vma,
- unsigned long address, int avoid_reserve,
- long chg)
+ unsigned long address, long chg)
{
struct folio *folio = NULL;
struct mempolicy *mpol;
@@ -1415,10 +1414,6 @@ static struct folio *dequeue_hugetlb_fol
if (!vma_has_reserves(vma, chg) && !available_huge_pages(h))
goto err;
- /* If reserves cannot be used, ensure enough pages are in the pool */
- if (avoid_reserve && !available_huge_pages(h))
- goto err;
-
gfp_mask = htlb_alloc_mask(h);
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
@@ -1434,7 +1429,7 @@ static struct folio *dequeue_hugetlb_fol
folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
nid, nodemask);
- if (folio && !avoid_reserve && vma_has_reserves(vma, chg)) {
+ if (folio && vma_has_reserves(vma, chg)) {
folio_set_hugetlb_restore_reserve(folio);
h->resv_huge_pages--;
}
@@ -3051,17 +3046,6 @@ struct folio *alloc_hugetlb_folio(struct
gbl_chg = hugepage_subpool_get_pages(spool, 1);
if (gbl_chg < 0)
goto out_end_reservation;
-
- /*
- * Even though there was no reservation in the region/reserve
- * map, there could be reservations associated with the
- * subpool that can be used. This would be indicated if the
- * return value of hugepage_subpool_get_pages() is zero.
- * However, if avoid_reserve is specified we still avoid even
- * the subpool reservations.
- */
- if (avoid_reserve)
- gbl_chg = 1;
}
/* If this allocation is not consuming a reservation, charge it now.
@@ -3084,7 +3068,7 @@ struct folio *alloc_hugetlb_folio(struct
* from the global free pool (global change). gbl_chg == 0 indicates
* a reservation exists for the allocation.
*/
- folio = dequeue_hugetlb_folio_vma(h, vma, addr, avoid_reserve, gbl_chg);
+ folio = dequeue_hugetlb_folio_vma(h, vma, addr, gbl_chg);
if (!folio) {
spin_unlock_irq(&hugetlb_lock);
folio = alloc_buddy_hugetlb_folio_with_mpol(h, vma, addr);
_
Patches currently in -mm which might be from peterx(a)redhat.com are
The quilt patch titled
Subject: maple_tree: simplify split calculation
has been removed from the -mm tree. Its filename was
maple_tree-simplify-split-calculation.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Wei Yang <richard.weiyang(a)gmail.com>
Subject: maple_tree: simplify split calculation
Date: Wed, 13 Nov 2024 03:16:14 +0000
Patch series "simplify split calculation", v3.
This patch (of 3):
The current calculation for splitting nodes tries to enforce a minimum
span on the leaf nodes. This code is complex and never worked correctly
to begin with, due to the min value being passed as 0 for all leaves.
The calculation should just split the data as equally as possible
between the new nodes. Note that b_end will be one more than the data,
so the left side is still favoured in the calculation.
The current code may also lead to a deficient node by not leaving enough
data for the right side of the split. This issue is also addressed with
the split calculation change.
[Liam.Howlett(a)Oracle.com: rephrase the change log]
Link: https://lkml.kernel.org/r/20241113031616.10530-1-richard.weiyang@gmail.com
Link: https://lkml.kernel.org/r/20241113031616.10530-2-richard.weiyang@gmail.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Cc: Sidhartha Kumar <sidhartha.kumar(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 23 ++++++-----------------
1 file changed, 6 insertions(+), 17 deletions(-)
--- a/lib/maple_tree.c~maple_tree-simplify-split-calculation
+++ a/lib/maple_tree.c
@@ -1863,11 +1863,11 @@ static inline int mab_no_null_split(stru
* Return: The first split location. The middle split is set in @mid_split.
*/
static inline int mab_calc_split(struct ma_state *mas,
- struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
+ struct maple_big_node *bn, unsigned char *mid_split)
{
unsigned char b_end = bn->b_end;
int split = b_end / 2; /* Assume equal split. */
- unsigned char slot_min, slot_count = mt_slots[bn->type];
+ unsigned char slot_count = mt_slots[bn->type];
/*
* To support gap tracking, all NULL entries are kept together and a node cannot
@@ -1900,18 +1900,7 @@ static inline int mab_calc_split(struct
split = b_end / 3;
*mid_split = split * 2;
} else {
- slot_min = mt_min_slots[bn->type];
-
*mid_split = 0;
- /*
- * Avoid having a range less than the slot count unless it
- * causes one node to be deficient.
- * NOTE: mt_min_slots is 1 based, b_end and split are zero.
- */
- while ((split < slot_count - 1) &&
- ((bn->pivot[split] - min) < slot_count - 1) &&
- (b_end - split > slot_min))
- split++;
}
/* Avoid ending a node on a NULL entry */
@@ -2377,7 +2366,7 @@ static inline struct maple_enode
static inline unsigned char mas_mab_to_node(struct ma_state *mas,
struct maple_big_node *b_node, struct maple_enode **left,
struct maple_enode **right, struct maple_enode **middle,
- unsigned char *mid_split, unsigned long min)
+ unsigned char *mid_split)
{
unsigned char split = 0;
unsigned char slot_count = mt_slots[b_node->type];
@@ -2390,7 +2379,7 @@ static inline unsigned char mas_mab_to_n
if (b_node->b_end < slot_count) {
split = b_node->b_end;
} else {
- split = mab_calc_split(mas, b_node, mid_split, min);
+ split = mab_calc_split(mas, b_node, mid_split);
*right = mas_new_ma_node(mas, b_node);
}
@@ -2877,7 +2866,7 @@ static void mas_spanning_rebalance(struc
mast->bn->b_end--;
mast->bn->type = mte_node_type(mast->orig_l->node);
split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
- &mid_split, mast->orig_l->min);
+ &mid_split);
mast_set_split_parents(mast, left, middle, right, split,
mid_split);
mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
@@ -3365,7 +3354,7 @@ static void mas_split(struct ma_state *m
if (mas_push_data(mas, height, &mast, false))
break;
- split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
+ split = mab_calc_split(mas, b_node, &mid_split);
mast_split_data(&mast, mas, split);
/*
* Usually correct, mab_mas_cp in the above call overwrites
_
Patches currently in -mm which might be from richard.weiyang(a)gmail.com are
On Mon, 13 Jan 2025 06:03:53 -0800, Sasha Levin wrote:
>
Hello Sasha,
> This is a note to let you know that I've just added the patch titled
>
> drm/xe/oa: Separate batch submission from waiting for completion
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> drm-xe-oa-separate-batch-submission-from-waiting-for.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
I am writing about 3 emails I received (including this one) about 3 commits
being added to stable. These are the 3 commits I am referring to (all
commit SHA's refer to Linus' tree and first commit is at the bottom,
everywhere in this email):
2fb4350a283af drm/xe/oa: Add input fence dependencies
c8507a25cebd1 drm/xe/oa/uapi: Define and parse OA sync properties
dddcb19ad4d4b drm/xe/oa: Separate batch submission from waiting for completion
So none of these commits had any "Fixes:" tag or "Cc: stable" so not sure
why they are being added to stable. Also, they are part of a 7 commit
series so not sure why only 3 of the 7 commits are being added to stable?
In addition, for this commit which is also added to stable:
f0ed39830e606 xe/oa: Fix query mode of operation for OAR/OAC
We modified this commit for stable because it will otherwise with the
previous 3 commits mentioned above, which we were assuming would not be in
stable.
Now, if we want all these commits in stable (I actually prefer it), we
should just pick them straight from Linus' tree. This would be all these
commits:
f0ed39830e606 xe/oa: Fix query mode of operation for OAR/OAC
c0403e4ceecae drm/xe/oa: Fix "Missing outer runtime PM protection" warning
85d3f9e84e062 drm/xe/oa: Allow only certain property changes from config
9920c8b88c5cf drm/xe/oa: Add syncs support to OA config ioctl
cc4e6994d5a23 drm/xe/oa: Move functions up so they can be reused for config ioctl
343dd246fd9b5 drm/xe/oa: Signal output fences
2fb4350a283af drm/xe/oa: Add input fence dependencies
c8507a25cebd1 drm/xe/oa/uapi: Define and parse OA sync properties
dddcb19ad4d4b drm/xe/oa: Separate batch submission from waiting for completion
So: we should either drop the 3 patches at the top, or just pick all 9
patches above. Doing the latter will probably be the simplest and I don't
expect any conflicts, or if there are, I can help to resolve those.
The above list can be generated by running the following in Linus' tree:
git log --oneline -- drivers/gpu/drm/xe/xe_oa.c
Thanks.
--
Ashutosh
>
>
>
> commit 9aeced687e728b9de067a502a0780f8029e61763
> Author: Ashutosh Dixit <ashutosh.dixit(a)intel.com>
> Date: Tue Oct 22 13:03:46 2024 -0700
>
> drm/xe/oa: Separate batch submission from waiting for completion
>
> [ Upstream commit dddcb19ad4d4bbe943a72a1fb3266c6e8aa8d541 ]
>
> When we introduce xe_syncs, we don't wait for internal OA programming
> batches to complete. That is, xe_syncs are signaled asynchronously. In
> anticipation for this, separate out batch submission from waiting for
> completion of those batches.
>
> v2: Change return type of xe_oa_submit_bb to "struct dma_fence *" (Matt B)
> v3: Retain init "int err = 0;" in xe_oa_submit_bb (Jose)
>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt(a)intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit(a)intel.com>
> Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-2-ashu…
> Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC")
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index 78823f53d290..4962c9eb9a81 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -567,11 +567,10 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
> return ret;
> }
>
> -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> +static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> {
> struct xe_sched_job *job;
> struct dma_fence *fence;
> - long timeout;
> int err = 0;
>
> /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
> @@ -585,14 +584,9 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> fence = dma_fence_get(&job->drm.s_fence->finished);
> xe_sched_job_push(job);
>
> - timeout = dma_fence_wait_timeout(fence, false, HZ);
> - dma_fence_put(fence);
> - if (timeout < 0)
> - err = timeout;
> - else if (!timeout)
> - err = -ETIME;
> + return fence;
> exit:
> - return err;
> + return ERR_PTR(err);
> }
>
> static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
> @@ -656,6 +650,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> const struct flex *flex, u32 count)
> {
> + struct dma_fence *fence;
> struct xe_bb *bb;
> int err;
>
> @@ -667,7 +662,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
>
> xe_oa_store_flex(stream, lrc, bb, flex, count);
>
> - err = xe_oa_submit_bb(stream, bb);
> + fence = xe_oa_submit_bb(stream, bb);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto free_bb;
> + }
> + xe_bb_free(bb, fence);
> + dma_fence_put(fence);
> +
> + return 0;
> +free_bb:
> xe_bb_free(bb, NULL);
> exit:
> return err;
> @@ -675,6 +679,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
>
> static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
> {
> + struct dma_fence *fence;
> struct xe_bb *bb;
> int err;
>
> @@ -686,7 +691,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
>
> write_cs_mi_lri(bb, reg_lri, 1);
>
> - err = xe_oa_submit_bb(stream, bb);
> + fence = xe_oa_submit_bb(stream, bb);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto free_bb;
> + }
> + xe_bb_free(bb, fence);
> + dma_fence_put(fence);
> +
> + return 0;
> +free_bb:
> xe_bb_free(bb, NULL);
> exit:
> return err;
> @@ -914,15 +928,32 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
> {
> #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
> struct xe_oa_config_bo *oa_bo;
> - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> + int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> + struct dma_fence *fence;
> + long timeout;
>
> + /* Emit OA configuration batch */
> oa_bo = xe_oa_alloc_config_buffer(stream, config);
> if (IS_ERR(oa_bo)) {
> err = PTR_ERR(oa_bo);
> goto exit;
> }
>
> - err = xe_oa_submit_bb(stream, oa_bo->bb);
> + fence = xe_oa_submit_bb(stream, oa_bo->bb);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto exit;
> + }
> +
> + /* Wait till all previous batches have executed */
> + timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
> + dma_fence_put(fence);
> + if (timeout < 0)
> + err = timeout;
> + else if (!timeout)
> + err = -ETIME;
> + if (err)
> + drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
>
> /* Additional empirical delay needed for NOA programming after registers are written */
> usleep_range(us, 2 * us);
On Fri, 10 Jan 2025 06:02:04 +0000 Potin Lai (賴柏廷) wrote:
> > Neat!
> > Potin, please give this a test ASAP.
>
> Thanks for the new patch.
> I am currently tied up with other tasks, but I’ll make sure to test
> it as soon as possible and share the results with you.
Understood, would you be able to test it by January 13th?
Depending on how long we need to wait we may be better off
applying the patch already or waiting with committing..
The quilt patch titled
Subject: selftests/mm: virtual_address_range: avoid reading VVAR mappings
has been removed from the -mm tree. Its filename was
selftests-mm-virtual_address_range-avoid-reading-vvar-mappings.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Subject: selftests/mm: virtual_address_range: avoid reading VVAR mappings
Date: Tue, 07 Jan 2025 16:14:46 +0100
The virtual_address_range selftest reads from the start of each mapping
listed in /proc/self/maps.
However not all mappings are valid to be arbitrarily accessed. For
example the vvar data used for virtual clocks on x86 can only be accessed
if 1) the kernel configuration enables virtual clocks and 2) the
hypervisor provided the data for it, which can only determined by the VDSO
code itself.
Since commit e93d2521b27f ("x86/vdso: Split virtual clock pages into
dedicated mapping") the virtual clock data was split out into its own
mapping, triggering faulting accesses by virtual_address_range.
Skip the various vvar mappings in virtual_address_range to avoid errors.
Link: https://lkml.kernel.org/r/20250107-virtual_address_range-tests-v1-2-3834a2f…
Fixes: e93d2521b27f ("x86/vdso: Split virtual clock pages into dedicated mapping")
Fixes: 010409649885 ("selftests/mm: confirm VA exhaustion without reliance on correctness of mmap()")
Signed-off-by: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202412271148.2656e485-lkp@intel.com
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/virtual_address_range.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/tools/testing/selftests/mm/virtual_address_range.c~selftests-mm-virtual_address_range-avoid-reading-vvar-mappings
+++ a/tools/testing/selftests/mm/virtual_address_range.c
@@ -116,10 +116,11 @@ static int validate_complete_va_space(vo
prev_end_addr = 0;
while (fgets(line, sizeof(line), file)) {
+ int path_offset = 0;
unsigned long hop;
- if (sscanf(line, "%lx-%lx %s[rwxp-]",
- &start_addr, &end_addr, prot) != 3)
+ if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n",
+ &start_addr, &end_addr, prot, &path_offset) != 3)
ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
/* end of userspace mappings; ignore vsyscall mapping */
@@ -135,6 +136,10 @@ static int validate_complete_va_space(vo
if (prot[0] != 'r')
continue;
+ /* Only the VDSO can know if a VVAR mapping is really readable */
+ if (path_offset && !strncmp(line + path_offset, "[vvar", 5))
+ continue;
+
/*
* Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
* If write succeeds, no need to check MAP_CHUNK_SIZE - 1
_
Patches currently in -mm which might be from thomas.weissschuh(a)linutronix.de are
The quilt patch titled
Subject: selftests/mm: virtual_address_range: fix error when CommitLimit < 1GiB
has been removed from the -mm tree. Its filename was
selftests-mm-virtual_address_range-fix-error-when-commitlimit-1gib.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Subject: selftests/mm: virtual_address_range: fix error when CommitLimit < 1GiB
Date: Tue, 07 Jan 2025 16:14:45 +0100
If not enough physical memory is available the kernel may fail mmap(); see
__vm_enough_memory() and vm_commit_limit(). In that case the logic in
validate_complete_va_space() does not make sense and will even incorrectly
fail. Instead skip the test if no mmap() succeeded.
Link: https://lkml.kernel.org/r/20250107-virtual_address_range-tests-v1-1-3834a2f…
Fixes: 010409649885 ("selftests/mm: confirm VA exhaustion without reliance on correctness of mmap()")
Signed-off-by: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: kernel test robot <oliver.sang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/virtual_address_range.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/tools/testing/selftests/mm/virtual_address_range.c~selftests-mm-virtual_address_range-fix-error-when-commitlimit-1gib
+++ a/tools/testing/selftests/mm/virtual_address_range.c
@@ -178,6 +178,12 @@ int main(int argc, char *argv[])
validate_addr(ptr[i], 0);
}
lchunks = i;
+
+ if (!lchunks) {
+ ksft_test_result_skip("Not enough memory for a single chunk\n");
+ ksft_finished();
+ }
+
hptr = (char **) calloc(NR_CHUNKS_HIGH, sizeof(char *));
if (hptr == NULL) {
ksft_test_result_skip("Memory constraint not fulfilled\n");
_
Patches currently in -mm which might be from thomas.weissschuh(a)linutronix.de are
selftests-mm-virtual_address_range-avoid-reading-vvar-mappings.patch
On Mon, Jan 13, 2025 at 6:04 AM Sasha Levin <sashal(a)kernel.org> wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> mm: zswap: fix race between [de]compression and CPU hotunplug
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> mm-zswap-fix-race-between-de-compression-and-cpu-hot.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch from v6.12 (and all stable trees) as it has a
bug, as I pointed out in [1] (was that the correct place to surface
this?).
Andrew's latest PR contains a revert of this patch (and alternative fix) [2].
Thanks!
[1]https://lore.kernel.org/stable/CAJD7tkaiUA6J1nb0=1ELpUD0OgjoD+tft-iPqbPdy…
[2]https://lore.kernel.org/lkml/20250113000543.862792e948c2646032a477e0@linu…
>
>
>
> commit c56e79d453ef5b5fc6ded252bc6ba461f12946ba
> Author: Yosry Ahmed <yosryahmed(a)google.com>
> Date: Thu Dec 19 21:24:37 2024 +0000
>
> mm: zswap: fix race between [de]compression and CPU hotunplug
>
> [ Upstream commit eaebeb93922ca6ab0dd92027b73d0112701706ef ]
>
> In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
> current CPU at the beginning of the operation is retrieved and used
> throughout. However, since neither preemption nor migration are disabled,
> it is possible that the operation continues on a different CPU.
>
> If the original CPU is hotunplugged while the acomp_ctx is still in use,
> we run into a UAF bug as the resources attached to the acomp_ctx are freed
> during hotunplug in zswap_cpu_comp_dead().
>
> The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use
> crypto_acomp API for hardware acceleration") when the switch to the
> crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was
> retrieved using get_cpu_ptr() which disables preemption and makes sure the
> CPU cannot go away from under us. Preemption cannot be disabled with the
> crypto_acomp API as a sleepable context is needed.
>
> Commit 8ba2f844f050 ("mm/zswap: change per-cpu mutex and buffer to
> per-acomp_ctx") increased the UAF surface area by making the per-CPU
> buffers dynamic, adding yet another resource that can be freed from under
> zswap compression/decompression by CPU hotunplug.
>
> There are a few ways to fix this:
> (a) Add a refcount for acomp_ctx.
> (b) Disable migration while using the per-CPU acomp_ctx.
> (c) Disable CPU hotunplug while using the per-CPU acomp_ctx by holding
> the CPUs read lock.
>
> Implement (c) since it's simpler than (a), and (b) involves using
> migrate_disable() which is apparently undesired (see huge comment in
> include/linux/preempt.h).
>
> Link: https://lkml.kernel.org/r/20241219212437.2714151-1-yosryahmed@google.com
> Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
> Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
> Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
> Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
> Reported-by: Sam Sun <samsun1006219(a)gmail.com>
> Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tP…
> Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
> Acked-by: Barry Song <baohua(a)kernel.org>
> Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
> Cc: Vitaly Wool <vitalywool(a)gmail.com>
> Cc: <stable(a)vger.kernel.org>
> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0030ce8fecfc..c86d4bcbb447 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -875,6 +875,18 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
> return 0;
> }
>
> +/* Prevent CPU hotplug from freeing up the per-CPU acomp_ctx resources */
> +static struct crypto_acomp_ctx *acomp_ctx_get_cpu(struct crypto_acomp_ctx __percpu *acomp_ctx)
> +{
> + cpus_read_lock();
> + return raw_cpu_ptr(acomp_ctx);
> +}
> +
> +static void acomp_ctx_put_cpu(void)
> +{
> + cpus_read_unlock();
> +}
> +
> static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> {
> struct crypto_acomp_ctx *acomp_ctx;
> @@ -887,8 +899,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> gfp_t gfp;
> u8 *dst;
>
> - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> -
> + acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
> mutex_lock(&acomp_ctx->mutex);
>
> dst = acomp_ctx->buffer;
> @@ -944,6 +955,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> zswap_reject_alloc_fail++;
>
> mutex_unlock(&acomp_ctx->mutex);
> + acomp_ctx_put_cpu();
> return comp_ret == 0 && alloc_ret == 0;
> }
>
> @@ -954,7 +966,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> struct crypto_acomp_ctx *acomp_ctx;
> u8 *src;
>
> - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> + acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
> mutex_lock(&acomp_ctx->mutex);
>
> src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
> @@ -984,6 +996,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>
> if (src != acomp_ctx->buffer)
> zpool_unmap_handle(zpool, entry->handle);
> + acomp_ctx_put_cpu();
> }
>
> /*********************************
This partially reverts commit fe4f5d4b6616 ("drm/xe: Clean up VM / exec
queue file lock usage."). While it's desired to have the mutex to
protect only the reference to the exec queue, getting and dropping each
mutex and then later getting the GPU timestamp, doesn't produce a
correct result: it introduces multiple opportunities for the task to be
scheduled out and thus wrecking havoc the deltas reported to userspace.
Also, to better correlate the timestamp from the exec queues with the
GPU, disable preemption so they can be updated without allowing the task
to be scheduled out. We leave interrupts enabled as that shouldn't be
enough disturbance for the deltas to matter to userspace.
Test scenario:
* IGT'S `xe_drm_fdinfo --r utilization-single-full-load`
* Platform: LNL, where CI occasionally reports failures
* `stress -c $(nproc)` running in parallel to disturb the
system
This brings a first failure from "after ~150 executions" to "never
occurs after 1000 attempts".
v2: Also keep xe_hw_engine_read_timestamp() call inside the
preemption-disabled section (Umesh)
Cc: stable(a)vger.kernel.org # v6.11+
Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3512
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
---
drivers/gpu/drm/xe/xe_drm_client.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 7d55ad846bac5..2220a09bf9751 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -337,20 +337,18 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
return;
}
+ /* Let both the GPU timestamp and exec queue be updated together */
+ preempt_disable();
+ gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q) {
- xe_exec_queue_get(q);
- mutex_unlock(&xef->exec_queue.lock);
+ xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q);
- mutex_lock(&xef->exec_queue.lock);
- xe_exec_queue_put(q);
- }
mutex_unlock(&xef->exec_queue.lock);
-
- gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+ preempt_enable();
xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref);
xe_pm_runtime_put(xe);
--
2.47.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x d1cacd74776895f6435941f86a1130e58f6dd226
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011253-citable-monstrous-3ae9@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1cacd74776895f6435941f86a1130e58f6dd226 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Mon, 6 Jan 2025 10:01:36 -0800
Subject: [PATCH] netdev: prevent accessing NAPI instances from another
namespace
The NAPI IDs were not fully exposed to user space prior to the netlink
API, so they were never namespaced. The netlink API must ensure that
at the very least NAPI instance belongs to the same netns as the owner
of the genl sock.
napi_by_id() can become static now, but it needs to move because of
dev_get_by_napi_id().
Cc: stable(a)vger.kernel.org
Fixes: 1287c1ae0fc2 ("netdev-genl: Support setting per-NAPI config values")
Fixes: 27f91aaf49b3 ("netdev-genl: Add netlink framework functions for napi")
Reviewed-by: Sridhar Samudrala <sridhar.samudrala(a)intel.com>
Reviewed-by: Joe Damato <jdamato(a)fastly.com>
Link: https://patch.msgid.link/20250106180137.1861472-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/core/dev.c b/net/core/dev.c
index faa23042df38..a9f62f5aeb84 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -753,6 +753,36 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
}
EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+/* must be called under rcu_read_lock(), as we dont take a reference */
+static struct napi_struct *napi_by_id(unsigned int napi_id)
+{
+ unsigned int hash = napi_id % HASH_SIZE(napi_hash);
+ struct napi_struct *napi;
+
+ hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
+ if (napi->napi_id == napi_id)
+ return napi;
+
+ return NULL;
+}
+
+/* must be called under rcu_read_lock(), as we dont take a reference */
+struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ napi = napi_by_id(napi_id);
+ if (!napi)
+ return NULL;
+
+ if (WARN_ON_ONCE(!napi->dev))
+ return NULL;
+ if (!net_eq(net, dev_net(napi->dev)))
+ return NULL;
+
+ return napi;
+}
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
@@ -6293,19 +6323,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
}
EXPORT_SYMBOL(napi_complete_done);
-/* must be called under rcu_read_lock(), as we dont take a reference */
-struct napi_struct *napi_by_id(unsigned int napi_id)
-{
- unsigned int hash = napi_id % HASH_SIZE(napi_hash);
- struct napi_struct *napi;
-
- hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
- if (napi->napi_id == napi_id)
- return napi;
-
- return NULL;
-}
-
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
diff --git a/net/core/dev.h b/net/core/dev.h
index d043dee25a68..deb5eae5749f 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -22,6 +22,8 @@ struct sd_flow_limit {
extern int netdev_flow_limit_table_len;
+struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
+
#ifdef CONFIG_PROC_FS
int __init dev_proc_init(void);
#else
@@ -269,7 +271,6 @@ void xdp_do_check_flushed(struct napi_struct *napi);
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
-struct napi_struct *napi_by_id(unsigned int napi_id);
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 125b660004d3..a3bdaf075b6b 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -167,8 +167,6 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
void *hdr;
pid_t pid;
- if (WARN_ON_ONCE(!napi->dev))
- return -EINVAL;
if (!(napi->dev->flags & IFF_UP))
return 0;
@@ -234,7 +232,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
rcu_read_lock();
- napi = napi_by_id(napi_id);
+ napi = netdev_napi_by_id(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
} else {
@@ -355,7 +353,7 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
rcu_read_lock();
- napi = napi_by_id(napi_id);
+ napi = netdev_napi_by_id(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_set_config(napi, info);
} else {
The patch titled
Subject: mm: zswap: move allocations during CPU init outside the lock
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yosry Ahmed <yosryahmed(a)google.com>
Subject: mm: zswap: move allocations during CPU init outside the lock
Date: Mon, 13 Jan 2025 21:44:58 +0000
In zswap_cpu_comp_prepare(), allocations are made and assigned to various
members of acomp_ctx under acomp_ctx->mutex. However, allocations may
recurse into zswap through reclaim, trying to acquire the same mutex and
deadlocking.
Move the allocations before the mutex critical section. Only the
initialization of acomp_ctx needs to be done with the mutex held.
Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zswap.c | 42 ++++++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 18 deletions(-)
--- a/mm/zswap.c~mm-zswap-move-allocations-during-cpu-init-outside-the-lock
+++ a/mm/zswap.c
@@ -820,15 +820,15 @@ static int zswap_cpu_comp_prepare(unsign
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
+ struct crypto_acomp *acomp = NULL;
+ struct acomp_req *req = NULL;
+ u8 *buffer = NULL;
int ret;
- mutex_lock(&acomp_ctx->mutex);
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer) {
+ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer) {
ret = -ENOMEM;
- goto buffer_fail;
+ goto fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
@@ -836,21 +836,25 @@ static int zswap_cpu_comp_prepare(unsign
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
ret = PTR_ERR(acomp);
- goto acomp_fail;
+ goto fail;
}
- acomp_ctx->acomp = acomp;
- acomp_ctx->is_sleepable = acomp_is_async(acomp);
- req = acomp_request_alloc(acomp_ctx->acomp);
+ req = acomp_request_alloc(acomp);
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
ret = -ENOMEM;
- goto req_fail;
+ goto fail;
}
- acomp_ctx->req = req;
+ /*
+ * Only hold the mutex after completing allocations, otherwise we may
+ * recurse into zswap through reclaim and attempt to hold the mutex
+ * again resulting in a deadlock.
+ */
+ mutex_lock(&acomp_ctx->mutex);
crypto_init_wait(&acomp_ctx->wait);
+
/*
* if the backend of acomp is async zip, crypto_req_done() will wakeup
* crypto_wait_req(); if the backend of acomp is scomp, the callback
@@ -859,15 +863,17 @@ static int zswap_cpu_comp_prepare(unsign
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
+ acomp_ctx->buffer = buffer;
+ acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ acomp_ctx->req = req;
mutex_unlock(&acomp_ctx->mutex);
return 0;
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
-buffer_fail:
- mutex_unlock(&acomp_ctx->mutex);
+fail:
+ if (acomp)
+ crypto_free_acomp(acomp);
+ kfree(buffer);
return ret;
}
_
Patches currently in -mm which might be from yosryahmed(a)google.com are
mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 65a60a590142c54a3f3be11ff162db2d5b0e1e06
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011353-untried-juniper-fe92@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65a60a590142c54a3f3be11ff162db2d5b0e1e06 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol(a)tdk.com>
Date: Wed, 13 Nov 2024 21:25:45 +0100
Subject: [PATCH] iio: imu: inv_icm42600: fix timestamps after suspend if
sensor is on
Currently suspending while sensors are one will result in timestamping
continuing without gap at resume. It can work with monotonic clock but
not with other clocks. Fix that by resetting timestamping.
Fixes: ec74ae9fd37c ("iio: imu: inv_icm42600: add accurate timestamping")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol(a)tdk.com>
Link: https://patch.msgid.link/20241113-inv_icm42600-fix-timestamps-after-suspend…
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
index e43538e536f0..ef9875d3b79d 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
@@ -829,6 +829,8 @@ static int inv_icm42600_suspend(struct device *dev)
static int inv_icm42600_resume(struct device *dev)
{
struct inv_icm42600_state *st = dev_get_drvdata(dev);
+ struct inv_icm42600_sensor_state *gyro_st = iio_priv(st->indio_gyro);
+ struct inv_icm42600_sensor_state *accel_st = iio_priv(st->indio_accel);
int ret;
mutex_lock(&st->lock);
@@ -849,9 +851,12 @@ static int inv_icm42600_resume(struct device *dev)
goto out_unlock;
/* restore FIFO data streaming */
- if (st->fifo.on)
+ if (st->fifo.on) {
+ inv_sensors_timestamp_reset(&gyro_st->ts);
+ inv_sensors_timestamp_reset(&accel_st->ts);
ret = regmap_write(st->map, INV_ICM42600_REG_FIFO_CONFIG,
INV_ICM42600_FIFO_CONFIG_STREAM);
+ }
out_unlock:
mutex_unlock(&st->lock);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 65a60a590142c54a3f3be11ff162db2d5b0e1e06
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011306-sedan-synthesis-c459@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65a60a590142c54a3f3be11ff162db2d5b0e1e06 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol(a)tdk.com>
Date: Wed, 13 Nov 2024 21:25:45 +0100
Subject: [PATCH] iio: imu: inv_icm42600: fix timestamps after suspend if
sensor is on
Currently suspending while sensors are one will result in timestamping
continuing without gap at resume. It can work with monotonic clock but
not with other clocks. Fix that by resetting timestamping.
Fixes: ec74ae9fd37c ("iio: imu: inv_icm42600: add accurate timestamping")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol(a)tdk.com>
Link: https://patch.msgid.link/20241113-inv_icm42600-fix-timestamps-after-suspend…
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
index e43538e536f0..ef9875d3b79d 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
@@ -829,6 +829,8 @@ static int inv_icm42600_suspend(struct device *dev)
static int inv_icm42600_resume(struct device *dev)
{
struct inv_icm42600_state *st = dev_get_drvdata(dev);
+ struct inv_icm42600_sensor_state *gyro_st = iio_priv(st->indio_gyro);
+ struct inv_icm42600_sensor_state *accel_st = iio_priv(st->indio_accel);
int ret;
mutex_lock(&st->lock);
@@ -849,9 +851,12 @@ static int inv_icm42600_resume(struct device *dev)
goto out_unlock;
/* restore FIFO data streaming */
- if (st->fifo.on)
+ if (st->fifo.on) {
+ inv_sensors_timestamp_reset(&gyro_st->ts);
+ inv_sensors_timestamp_reset(&accel_st->ts);
ret = regmap_write(st->map, INV_ICM42600_REG_FIFO_CONFIG,
INV_ICM42600_FIFO_CONFIG_STREAM);
+ }
out_unlock:
mutex_unlock(&st->lock);