The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024071559-reptilian-chaffing-a991@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
5596d9e8b553 ("mm/hugetlb: fix potential race in __update_and_free_hugetlb_folio()")
bd225530a4c7 ("mm/hugetlb_vmemmap: fix race with speculative PFN walkers")
51718e25c53f ("mm: convert arch_clear_hugepage_flags to take a folio")
831bc31a5e82 ("mm: hugetlb: improve the handling of hugetlb allocation failure for freed or in-use hugetlb")
ebc20dcac4ce ("mm: hugetlb_vmemmap: convert page to folio")
c5ad3233ead5 ("hugetlb_vmemmap: use folio argument for hugetlb_vmemmap_* functions")
c24f188b2289 ("hugetlb: batch TLB flushes when restoring vmemmap")
f13b83fdd996 ("hugetlb: batch TLB flushes when freeing vmemmap")
f4b7e3efaddb ("hugetlb: batch PMD split for bulk vmemmap dedup")
91f386bf0772 ("hugetlb: batch freeing of vmemmap pages")
cfb8c75099db ("hugetlb: perform vmemmap restoration on a list of pages")
79359d6d24df ("hugetlb: perform vmemmap optimization on a list of pages")
d67e32f26713 ("hugetlb: restructure pool allocations")
d2cf88c27f51 ("hugetlb: optimize update_and_free_pages_bulk to avoid lock cycles")
30a89adf872d ("hugetlb: check for hugetlb folio before vmemmap_restore")
d5b43e9683ec ("hugetlb: convert remove_pool_huge_page() to remove_pool_hugetlb_folio()")
04bbfd844b99 ("hugetlb: remove a few calls to page_folio()")
fde1c4ecf916 ("mm: hugetlb: skip initialization of gigantic tail struct pages if freed by HVO")
3ee0aa9f0675 ("mm: move some shrinker-related function declarations to mm/internal.h")
d8f5f7e445f0 ("hugetlb: set hugetlb page flag before optimizing vmemmap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe(a)huawei.com>
Date: Mon, 8 Jul 2024 10:51:27 +0800
Subject: [PATCH] mm/hugetlb: fix potential race in
__update_and_free_hugetlb_folio()
There is a potential race between __update_and_free_hugetlb_folio() and
try_memory_failure_hugetlb():
CPU1 CPU2
__update_and_free_hugetlb_folio try_memory_failure_hugetlb
folio_test_hugetlb
-- It's still hugetlb folio.
folio_clear_hugetlb_hwpoison
spin_lock_irq(&hugetlb_lock);
__get_huge_page_for_hwpoison
folio_set_hugetlb_hwpoison
spin_unlock_irq(&hugetlb_lock);
spin_lock_irq(&hugetlb_lock);
__folio_clear_hugetlb(folio);
-- Hugetlb flag is cleared but too late.
spin_unlock_irq(&hugetlb_lock);
When the above race occurs, raw error page info will be leaked. Even
worse, raw error pages won't have hwpoisoned flag set and hit
pcplists/buddy. Fix this issue by deferring
folio_clear_hugetlb_hwpoison() until __folio_clear_hugetlb() is done. So
all raw error pages will have hwpoisoned flag set.
Link: https://lkml.kernel.org/r/20240708025127.107713-1-linmiaohe@huawei.com
Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Acked-by: Muchun Song <muchun.song(a)linux.dev>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2afb70171b76..fe44324d6383 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1725,13 +1725,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
return;
}
- /*
- * Move PageHWPoison flag from head page to the raw error pages,
- * which makes any healthy subpages reusable.
- */
- if (unlikely(folio_test_hwpoison(folio)))
- folio_clear_hugetlb_hwpoison(folio);
-
/*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
@@ -1742,6 +1735,13 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
spin_unlock_irq(&hugetlb_lock);
}
+ /*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(folio_test_hwpoison(folio)))
+ folio_clear_hugetlb_hwpoison(folio);
+
folio_ref_unfreeze(folio, 1);
/*
On Wed, Aug 07, 2024 at 06:00:11AM +0300, ahmed Ehab wrote:
> On Sat, Aug 3, 2024 at 3:51 AM Boqun Feng <boqun.feng(a)gmail.com> wrote:
>
> > On Mon, Jul 15, 2024 at 04:26:38PM +0300, botta633 wrote:
> > > From: Ahmed Ehab <bottaawesome633(a)gmail.com>
> > >
> > > Checking if the lockdep_map->name will change when setting the subclass.
> > > It shouldn't change so that the lock class and subclass will have the
> > same
> > > name
> > >
> > > Reported-by: <syzbot+7f4a6f7f7051474e40ad(a)syzkaller.appspotmail.com>
> > > Fixes: de8f5e4f2dc1f ("lockdep: Introduce wait-type checks")
> > > Cc: <stable(a)vger.kernel.org>
> >
> > You seems to miss my comment at v2:
> >
> > https://lore.kernel.org/lkml/ZpRKcHNZfsMuACRG@boqun-archlinux/
> >
> > , i.e. you don't need the Reported-by, Fixes and Cc tag for the patch
> > that adds a test case.
> >
> > > Signed-off-by: Ahmed Ehab <bottaawesome633(a)gmail.com>
> > > ---
> > > v3->v4:
> > > - Fixed subject line truncation.
> > >
> > > lib/locking-selftest.c | 21 +++++++++++++++++++++
> > > 1 file changed, 21 insertions(+)
> > >
> > > diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
> > > index 6f6a5fc85b42..aeed613799ca 100644
> > > --- a/lib/locking-selftest.c
> > > +++ b/lib/locking-selftest.c
> > > @@ -2710,6 +2710,25 @@ static void local_lock_3B(void)
> > >
> > > }
> > >
> > > + /**
> >
> > ^ there is a tailing space here, next time you can detect this by using
> > checkpatch. Also "/**" style is especially for function signature
> > comment, you could just use a "/*" here.
> >
> > > + * after setting the subclass the lockdep_map.name changes
> > > + * if we initialize a new string literal for the subclass
> > > + * we will have a new name pointer
> > > + */
> > > +static void class_subclass_X1_name_test(void)
> > > +{
> > > + printk("
> > --------------------------------------------------------------------------\n");
> > > + printk(" | class and subclass name test|\n");
> > > + printk(" ---------------------\n");
> > > + const char *name_before_setting_subclass = rwsem_X1.dep_map.name;
> > > + const char *name_after_setting_subclass;
> > > +
> > > + WARN_ON(!rwsem_X1.dep_map.name);
> > > + lockdep_set_subclass(&rwsem_X1, 1);
> > > + name_after_setting_subclass = rwsem_X1.dep_map.name;
> > > + WARN_ON(name_before_setting_subclass !=
> > name_after_setting_subclass);
> > > +}
> > > +
> > > static void local_lock_tests(void)
> > > {
> > > printk("
> > --------------------------------------------------------------------------\n");
> > > @@ -2916,6 +2935,8 @@ void locking_selftest(void)
> > >
> > > local_lock_tests();
> > >
> > > + class_subclass_X1_name_test();
> > > +
> >
> > I got this in the serial log:
> >
> > [ 0.619454]
> > --------------------------------------------------------------------------
> > [ 0.621463] | local_lock tests |
> > [ 0.622326] ---------------------
> > [ 0.623211] local_lock inversion 2: ok |
> > [ 0.624904] local_lock inversion 3A: ok |
> > [ 0.626740] local_lock inversion 3B: ok |
> > [ 0.628492]
> > --------------------------------------------------------------------------
> > [ 0.630513] | class and subclass name test|
> > [ 0.631614] ---------------------
> > [ 0.632502] hardirq_unsafe_softirq_safe: ok |
> >
> > two problems here:
> >
> > 1) The "class and subclass name test" line interrupts the output of
> > testsuite "local_lock tests".
> >
> > 2) Instead of a WARN_ON(), could you look into using dotest() to
> > print "ok" if the test passes, which is consistent with other
> >
> tests.
> >
>
> I wrote it this way:
> static void lock_class_subclass_X1(void)
> {
> const char *name_before_setting_subclass = rwsem_X1.dep_map.name;
> const char *name_after_setting_subclass;
>
> lockdep_set_subclass(&rwsem_X1, 1);
> name_after_setting_subclass = rwsem_X1.dep_map.name;
> debug_locks = name_before_setting_subclass == name_after_setting_subclass;
I think you could use:
DEBUG_LOCK_WARN_ON(name_before_setting_subclass != name_after_setting_subclass);
here.
Regards,
Boqun
> }
> ...
> static void class_subclass_X1_name_test(void)
> {
> printk("
> --------------------------------------------------------------------------\n");
> printk(" | class and subclass name test|\n");
> printk(" ---------------------\n");
>
> print_testname("lock class and subclass same name");
> dotest(lock_class_subclass_X1, SUCCESS, LOCKTYPE_RWSEM);
> pr_cont("\n");
> }
> However, assigning a value to debug_locks seems very uncommon. I tried to
> check other test cases; however, they seem to rely on the method they are
> testing. Do you have a suggestion for my scenario if I want to compare the
> names before and after setting the subclass?
> Or you suggest that I follow a different approach other than comparing the
> names such as checking debug_locks in lockdep_init_map_type and returning
> when we have multiple instantiations for lock->name?
>
> >
> > Could you please fix all above problems and send another version of this
> > patch (no need to resend the first one)? Thanks!
> >
> > Regards,
> > Boqun
> >
> > > print_testname("hardirq_unsafe_softirq_safe");
> > > dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE,
> > LOCKTYPE_SPECIAL);
> > > pr_cont("\n");
> > > --
> > > 2.45.2
> > >
> >
>
> Regards,
> Ahmed
From: Michal Kubiak <michal.kubiak(a)intel.com>
The initialization of vport interrupt consists of two functions:
1) idpf_vport_intr_init() where a generic configuration is done
2) idpf_vport_intr_req_irq() where the irq for each q_vector is
requested.
The first function used to create a base name for each interrupt using
"kasprintf()" call. Unfortunately, although that call allocated memory
for a text buffer, that memory was never released.
Fix this by removing creating the interrupt base name in 1).
Instead, always create a full interrupt name in the function 2), because
there is no need to create a base name separately, considering that the
function 2) is never called out of idpf_vport_intr_init() context.
Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport")
Cc: stable(a)vger.kernel.org # 6.7
Signed-off-by: Michal Kubiak <michal.kubiak(a)intel.com>
Reviewed-by: Pavan Kumar Linga <pavan.kumar.linga(a)intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Krishneil Singh <krishneil.k.singh(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
---
drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index af2879f03b8d..a2f9f252694a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3780,13 +3780,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
/**
* idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport
* @vport: main vport structure
- * @basename: name for the vector
*/
-static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
+static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
{
struct idpf_adapter *adapter = vport->adapter;
+ const char *drv_name, *if_name, *vec_name;
int vector, err, irq_num, vidx;
- const char *vec_name;
+
+ drv_name = dev_driver_string(&adapter->pdev->dev);
+ if_name = netdev_name(vport->netdev);
for (vector = 0; vector < vport->num_q_vectors; vector++) {
struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
@@ -3804,8 +3806,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
else
continue;
- name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name,
- vidx);
+ name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name,
+ vec_name, vidx);
err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
name, q_vector);
@@ -4326,7 +4328,6 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
*/
int idpf_vport_intr_init(struct idpf_vport *vport)
{
- char *int_name;
int err;
err = idpf_vport_intr_init_vec_idx(vport);
@@ -4340,11 +4341,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
if (err)
goto unroll_vectors_alloc;
- int_name = kasprintf(GFP_KERNEL, "%s-%s",
- dev_driver_string(&vport->adapter->pdev->dev),
- vport->netdev->name);
-
- err = idpf_vport_intr_req_irq(vport, int_name);
+ err = idpf_vport_intr_req_irq(vport);
if (err)
goto unroll_vectors_alloc;
--
2.42.0
To prevent potential error return values, it is necessary to check the
return value of btf__type_by_id. We can add a kind checking to fix the
issue.
Cc: stable(a)vger.kernel.org
Fixes: 430025e5dca5 ("libbpf: Add subskeleton scaffolding")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
tools/lib/bpf/libbpf.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a3be6f8fac09..d1eb45d16054 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -13850,6 +13850,9 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
var = btf_var_secinfos(map_type);
for (i = 0; i < len; i++, var++) {
var_type = btf__type_by_id(btf, var->type);
+ if (!var_type)
+ return libbpf_err(-ENOENT);
+
var_name = btf__name_by_offset(btf, var_type->name_off);
if (strcmp(var_name, var_skel->name) == 0) {
*var_skel->addr = map->mmaped + var->offset;
--
2.25.1
The patch titled
Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Waiman Long <longman(a)redhat.com>
Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
Date: Tue, 6 Aug 2024 12:41:07 -0400
The memory_failure_cpu structure is a per-cpu structure. Access to its
content requires the use of get_cpu_var() to lock in the current CPU and
disable preemption. The use of a regular spinlock_t for locking purpose
is fine for a non-RT kernel.
Since the integration of RT spinlock support into the v5.15 kernel, a
spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping
lock in a preemption disabled context is illegal resulting in the
following kind of warning.
[12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
[12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0
[12135.732252] preempt_count: 1, expected: 0
[12135.732255] RCU nest depth: 2, expected: 2
:
[12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021
[12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred
[12135.732433] Call Trace:
[12135.732436] <TASK>
[12135.732450] dump_stack_lvl+0x57/0x81
[12135.732461] __might_resched.cold+0xf4/0x12f
[12135.732479] rt_spin_lock+0x4c/0x100
[12135.732491] memory_failure_queue+0x40/0xe0
[12135.732503] ghes_do_memory_failure+0x53/0x390
[12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0
[12135.732575] ghes_proc+0xf9/0x1a0
[12135.732591] ghes_notify_hed+0x6a/0x150
[12135.732602] notifier_call_chain+0x43/0xb0
[12135.732626] blocking_notifier_call_chain+0x43/0x60
[12135.732637] acpi_ev_notify_dispatch+0x47/0x70
[12135.732648] acpi_os_execute_deferred+0x13/0x20
[12135.732654] process_one_work+0x41f/0x500
[12135.732695] worker_thread+0x192/0x360
[12135.732715] kthread+0x111/0x140
[12135.732733] ret_from_fork+0x29/0x50
[12135.732779] </TASK>
Fix it by using a raw_spinlock_t for locking instead. Also move the
pr_err() out of the lock critical section to avoid indeterminate latency
of this call.
Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com
Fixes: ea8f5fb8a71f ("HWPoison: add memory_failure_queue()")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Juri Lelli <juri.lelli(a)redhat.com>
Cc: Len Brown <len.brown(a)intel.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu
+++ a/mm/memory-failure.c
@@ -2417,7 +2417,7 @@ struct memory_failure_entry {
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
- spinlock_t lock;
+ raw_spinlock_t lock;
struct work_struct work;
};
@@ -2443,19 +2443,21 @@ void memory_failure_queue(unsigned long
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
+ bool buffer_overflow;
struct memory_failure_entry entry = {
.pfn = pfn,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
- spin_lock_irqsave(&mf_cpu->lock, proc_flags);
- if (kfifo_put(&mf_cpu->fifo, entry))
+ raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
+ if (!buffer_overflow)
schedule_work_on(smp_processor_id(), &mf_cpu->work);
- else
+ raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ if (buffer_overflow)
pr_err("buffer overflow when queuing memory failure at %#lx\n",
pfn);
- spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
@@ -2469,9 +2471,9 @@ static void memory_failure_work_func(str
mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) {
- spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
- spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
if (entry.flags & MF_SOFT_OFFLINE)
@@ -2501,7 +2503,7 @@ static int __init memory_failure_init(vo
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
- spin_lock_init(&mf_cpu->lock);
+ raw_spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
_
Patches currently in -mm which might be from longman(a)redhat.com are
padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch
mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
watchdog-handle-the-enodev-failure-case-of-lockup_detector_delay_init-separately.patch
The patch titled
Subject: padata: Fix possible divide-by-0 panic in padata_mt_helper()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Waiman Long <longman(a)redhat.com>
Subject: padata: Fix possible divide-by-0 panic in padata_mt_helper()
Date: Tue, 6 Aug 2024 13:46:47 -0400
We are hit with a not easily reproducible divide-by-0 panic in padata.c at
bootup time.
[ 10.017908] Oops: divide error: 0000 1 PREEMPT SMP NOPTI
[ 10.017908] CPU: 26 PID: 2627 Comm: kworker/u1666:1 Not tainted 6.10.0-15.el10.x86_64 #1
[ 10.017908] Hardware name: Lenovo ThinkSystem SR950 [7X12CTO1WW]/[7X12CTO1WW], BIOS [PSE140J-2.30] 07/20/2021
[ 10.017908] Workqueue: events_unbound padata_mt_helper
[ 10.017908] RIP: 0010:padata_mt_helper+0x39/0xb0
:
[ 10.017963] Call Trace:
[ 10.017968] <TASK>
[ 10.018004] ? padata_mt_helper+0x39/0xb0
[ 10.018084] process_one_work+0x174/0x330
[ 10.018093] worker_thread+0x266/0x3a0
[ 10.018111] kthread+0xcf/0x100
[ 10.018124] ret_from_fork+0x31/0x50
[ 10.018138] ret_from_fork_asm+0x1a/0x30
[ 10.018147] </TASK>
Looking at the padata_mt_helper() function, the only way a divide-by-0
panic can happen is when ps->chunk_size is 0. The way that chunk_size is
initialized in padata_do_multithreaded(), chunk_size can be 0 when the
min_chunk in the passed-in padata_mt_job structure is 0.
Fix this divide-by-0 panic by making sure that chunk_size will be at least
1 no matter what the input parameters are.
Link: https://lkml.kernel.org/r/20240806174647.1050398-1-longman@redhat.com
Fixes: 004ed42638f4 ("padata: add basic support for multithreaded jobs")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Cc: Steffen Klassert <steffen.klassert(a)secunet.com>
Cc: Waiman Long <longman(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/padata.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/kernel/padata.c~padata-fix-possible-divide-by-0-panic-in-padata_mt_helper
+++ a/kernel/padata.c
@@ -517,6 +517,13 @@ void __init padata_do_multithreaded(stru
ps.chunk_size = max(ps.chunk_size, job->min_chunk);
ps.chunk_size = roundup(ps.chunk_size, job->align);
+ /*
+ * chunk_size can be 0 if the caller sets min_chunk to 0. So force it
+ * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().`
+ */
+ if (!ps.chunk_size)
+ ps.chunk_size = 1U;
+
list_for_each_entry(pw, &works, pw_list)
if (job->numa_aware) {
int old_node = atomic_read(&last_used_nid);
_
Patches currently in -mm which might be from longman(a)redhat.com are
padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch
watchdog-handle-the-enodev-failure-case-of-lockup_detector_delay_init-separately.patch
The following changes since commit 6d834691da474ed1c648753d3d3a3ef8379fa1c1:
virtio_pci_modern: remove admin queue serialization lock (2024-07-17 05:43:21 -0400)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus
for you to fetch changes up to 0823dc64586ba5ea13a7d200a5d33e4c5fa45950:
vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler (2024-07-26 03:26:02 -0400)
----------------------------------------------------------------
virtio: bugfix
Fixes a single, long-standing issue with kick pass-through vdpa.
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
----------------------------------------------------------------
Jason Wang (1):
vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler
drivers/vhost/vdpa.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
Hi,
Cannon Estimation,LLC brings you a major discount on Cost Estimating & Quantities Take-Off Services. We claim a 98% accuracy guarantee with a refund policy in case of any error in quantities. We are using certified software’s like PlanSwift, BlueBeams, Accu-Bid, Auto-Bid & RSmeans etc
Send us your plans for a quote on our service charges before getting started. Please reply to that email, so I can share some sample estimates.
Thanks & Have a Great Day.
Regards,
Elvis Lehmann
Business Development Manager
Cannon Estimation, LLC
From: Willem de Bruijn <willemb(a)google.com>
[ Upstream commit 89add40066f9ed9abe5f7f886fe5789ff7e0c50e ]
Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb
for GSO packets.
The function already checks that a checksum requested with
VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets
this might not hold for segs after segmentation.
Syzkaller demonstrated to reach this warning in skb_checksum_help
offset = skb_checksum_start_offset(skb);
ret = -EINVAL;
if (WARN_ON_ONCE(offset >= skb_headlen(skb)))
By injecting a TSO packet:
WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0
ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774
ip_finish_output_gso net/ipv4/ip_output.c:279 [inline]
__ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301
iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4850 [inline]
netdev_start_xmit include/linux/netdevice.h:4864 [inline]
xmit_one net/core/dev.c:3595 [inline]
dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611
__dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261
packet_snd net/packet/af_packet.c:3073 [inline]
The geometry of the bad input packet at tcp_gso_segment:
[ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0
[ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244
[ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0))
[ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536
ip_summed=3 complete_sw=0 valid=0 level=0)
Mitigate with stricter input validation.
csum_offset: for GSO packets, deduce the correct value from gso_type.
This is already done for USO. Extend it to TSO. Let UFO be:
udp[46]_ufo_fragment ignores these fields and always computes the
checksum in software.
csum_start: finding the real offset requires parsing to the transport
header. Do not add a parser, use existing segmentation parsing. Thanks
to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded.
Again test both TSO and USO. Do not test UFO for the above reason, and
do not test UDP tunnel offload.
GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be
CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit
from devices with no checksum offload"), but then still these fields
are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no
need to test for ip_summed == CHECKSUM_PARTIAL first.
This revises an existing fix mentioned in the Fixes tag, which broke
small packets with GSO offload, as detected by kselftests.
Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871
Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org
Fixes: e269d79c7d35 ("net: missing check virtio")
Cc: stable(a)vger.kernel.org
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gma…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
---
Hi,
This patch fixes network failures on OpenStack VMs running with Kernel
6.6.44 (it was working fine with 6.6.43).
```
[ 237.422038] eth0: bad gso: type: 1, size: 1432
```
This has been tested on Flatcar Linux CI with Kernel 6.6.44.
I think it has to be backported on Linux branches that have the "net:
missing check virtio" commit.
At this moment, I know those releases to be concerned:
* 6.1.y (with 6.1.103)
* 6.6.y (with 6.6.44)
* 6.10.y (with 6.10.3)
Thanks,
Mathieu - @tormath1
include/linux/virtio_net.h | 16 +++++-----------
net/ipv4/tcp_offload.c | 3 +++
net/ipv4/udp_offload.c | 4 ++++
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d1d7825318c3..6c395a2600e8 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
unsigned int thlen = 0;
unsigned int p_off = 0;
unsigned int ip_proto;
- u64 ret, remainder, gso_size;
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
@@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
- if (hdr->gso_size) {
- gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
- ret = div64_u64_rem(skb->len, gso_size, &remainder);
- if (!(ret && (hdr->gso_size > needed) &&
- ((remainder > needed) || (remainder == 0)))) {
- return -EINVAL;
- }
- skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG;
- }
-
if (!pskb_may_pull(skb, needed))
return -EINVAL;
@@ -182,6 +171,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (gso_type != SKB_GSO_UDP_L4)
return -EINVAL;
break;
+ case SKB_GSO_TCPV4:
+ case SKB_GSO_TCPV6:
+ if (skb->csum_offset != offsetof(struct tcphdr, check))
+ return -EINVAL;
+ break;
}
/* Kernel has a special handling for GSO_BY_FRAGS. */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8311c38267b5..69e6012ae82f 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -73,6 +73,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index e5971890d637..9cb13a50011e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -278,6 +278,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb)))
+ return ERR_PTR(-EINVAL);
+
if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
--
2.44.2
From: Francesco Dolcini <francesco.dolcini(a)toradex.com>
This reverts commit 3935fbc87ddebea5439f3ab6a78b1e83e976bf88.
CTRL_SLEEP_MOCI# is a signal that is defined for all the SoM
implementing the Verdin family specification, this signal is supposed to
control the power enable in the carrier board when the system is in deep
sleep mode. However this is not possible with Texas Instruments AM62
SoC, IOs output buffer is disabled in deep sleep and IOs are in
tri-state mode.
Given that we cannot properly control this pin, force it to be always
high to minimize potential issues.
Fixes: 3935fbc87dde ("arm64: dts: ti: k3-am62-verdin-dahlia: support sleep-moci")
Cc: <stable(a)vger.kernel.org>
Link: https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1…
Signed-off-by: Francesco Dolcini <francesco.dolcini(a)toradex.com>
---
.../boot/dts/ti/k3-am62-verdin-dahlia.dtsi | 22 -------------------
arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 6 -----
2 files changed, 28 deletions(-)
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi
index e8f4d136e5df..9202181fbd65 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi
@@ -43,15 +43,6 @@ simple-audio-card,cpu {
sound-dai = <&mcasp0>;
};
};
-
- reg_usb_hub: regulator-usb-hub {
- compatible = "regulator-fixed";
- enable-active-high;
- /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */
- gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>;
- regulator-boot-on;
- regulator-name = "HUB_PWR_EN";
- };
};
/* Verdin ETHs */
@@ -193,11 +184,6 @@ &ospi0 {
status = "okay";
};
-/* Do not force CTRL_SLEEP_MOCI# always enabled */
-®_force_sleep_moci {
- status = "disabled";
-};
-
/* Verdin SD_1 */
&sdhci1 {
status = "okay";
@@ -218,15 +204,7 @@ &usbss1 {
};
&usb1 {
- #address-cells = <1>;
- #size-cells = <0>;
status = "okay";
-
- usb-hub@1 {
- compatible = "usb424,2744";
- reg = <1>;
- vdd-supply = <®_usb_hub>;
- };
};
/* Verdin CTRL_WAKE1_MICO# */
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
index 359f53f3e019..5bef31b8577b 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
@@ -138,12 +138,6 @@ reg_1v8_eth: regulator-1v8-eth {
vin-supply = <®_1v8>;
};
- /*
- * By default we enable CTRL_SLEEP_MOCI#, this is required to have
- * peripherals on the carrier board powered.
- * If more granularity or power saving is required this can be disabled
- * in the carrier board device tree files.
- */
reg_force_sleep_moci: regulator-force-sleep-moci {
compatible = "regulator-fixed";
enable-active-high;
--
2.39.2
Unaccepted memory is considered unusable free memory, which is not
counted as free on the zone watermark check. This causes
get_page_from_freelist() to accept more memory to hit the high
watermark, but it creates problems in the reclaim path.
The reclaim path encounters a failed zone watermark check and attempts
to reclaim memory. This is usually successful, but if there is little or
no reclaimable memory, it can result in endless reclaim with little to
no progress. This can occur early in the boot process, just after start
of the init process when the only reclaimable memory is the page cache
of the init executable and its libraries.
Make unaccepted memory free from watermark check point of view. This way
unaccepted memory will never be the trigger of memory reclaim.
Accept more memory in the get_page_from_freelist() if needed.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Reported-by: Jianxiong Gao <jxgao(a)google.com>
Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
Cc: stable(a)vger.kernel.org # v6.5+
---
mm/page_alloc.c | 42 ++++++++++++++++++++----------------------
1 file changed, 20 insertions(+), 22 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 28f80daf5c04..aa9b1eaa638c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
static bool page_contains_unaccepted(struct page *page, unsigned int order);
static void accept_page(struct page *page, unsigned int order);
-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
+static bool cond_accept_memory(struct zone *zone, unsigned int order);
static inline bool has_unaccepted_memory(void);
static bool __free_unaccepted(struct page *page);
@@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
if (!(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
-#ifdef CONFIG_UNACCEPTED_MEMORY
- unusable_free += zone_page_state(z, NR_UNACCEPTED);
-#endif
return unusable_free;
}
@@ -3368,6 +3365,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
}
}
+ cond_accept_memory(zone, order);
+
/*
* Detect whether the number of free pages is below high
* watermark. If so, we will decrease pcp->high and free
@@ -3393,10 +3392,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
gfp_mask)) {
int ret;
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
@@ -3450,10 +3447,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
return page;
} else {
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* Try again if zone has deferred pages */
@@ -6951,9 +6946,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
struct page *page;
bool last;
- if (list_empty(&zone->unaccepted_pages))
- return false;
-
spin_lock_irqsave(&zone->lock, flags);
page = list_first_entry_or_null(&zone->unaccepted_pages,
struct page, lru);
@@ -6979,23 +6971,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
long to_accept;
- int ret = false;
+ bool ret = false;
+
+ if (!has_unaccepted_memory())
+ return false;
+
+ if (list_empty(&zone->unaccepted_pages))
+ return false;
/* How much to accept to get to high watermark? */
to_accept = high_wmark_pages(zone) -
(zone_page_state(zone, NR_FREE_PAGES) -
- __zone_watermark_unusable_free(zone, order, 0));
+ __zone_watermark_unusable_free(zone, order, 0) -
+ zone_page_state(zone, NR_UNACCEPTED));
- /* Accept at least one page */
- do {
+ while (to_accept > 0) {
if (!try_to_accept_memory_one(zone))
break;
ret = true;
to_accept -= MAX_ORDER_NR_PAGES;
- } while (to_accept > 0);
+ }
return ret;
}
@@ -7038,7 +7036,7 @@ static void accept_page(struct page *page, unsigned int order)
{
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
return false;
}
--
2.43.0
uevent_show() wants to de-reference dev->driver->name. There is no clean
way for a device attribute to de-reference dev->driver unless that
attribute is defined via (struct device_driver).dev_groups. Instead, the
anti-pattern of taking the device_lock() in the attribute handler risks
deadlocks with code paths that remove device attributes while holding
the lock.
This deadlock is typically invisible to lockdep given the device_lock()
is marked lockdep_set_novalidate_class(), but some subsystems allocate a
local lockdep key for @dev->mutex to reveal reports of the form:
======================================================
WARNING: possible circular locking dependency detected
6.10.0-rc7+ #275 Tainted: G OE N
------------------------------------------------------
modprobe/2374 is trying to acquire lock:
ffff8c2270070de0 (kn->active#6){++++}-{0:0}, at: __kernfs_remove+0xde/0x220
but task is already holding lock:
ffff8c22016e88f8 (&cxl_root_key){+.+.}-{3:3}, at: device_release_driver_internal+0x39/0x210
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&cxl_root_key){+.+.}-{3:3}:
__mutex_lock+0x99/0xc30
uevent_show+0xac/0x130
dev_attr_show+0x18/0x40
sysfs_kf_seq_show+0xac/0xf0
seq_read_iter+0x110/0x450
vfs_read+0x25b/0x340
ksys_read+0x67/0xf0
do_syscall_64+0x75/0x190
entry_SYSCALL_64_after_hwframe+0x76/0x7e
-> #0 (kn->active#6){++++}-{0:0}:
__lock_acquire+0x121a/0x1fa0
lock_acquire+0xd6/0x2e0
kernfs_drain+0x1e9/0x200
__kernfs_remove+0xde/0x220
kernfs_remove_by_name_ns+0x5e/0xa0
device_del+0x168/0x410
device_unregister+0x13/0x60
devres_release_all+0xb8/0x110
device_unbind_cleanup+0xe/0x70
device_release_driver_internal+0x1c7/0x210
driver_detach+0x47/0x90
bus_remove_driver+0x6c/0xf0
cxl_acpi_exit+0xc/0x11 [cxl_acpi]
__do_sys_delete_module.isra.0+0x181/0x260
do_syscall_64+0x75/0x190
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The observation though is that driver objects are typically much longer
lived than device objects. It is reasonable to perform lockless
de-reference of a @driver pointer even if it is racing detach from a
device. Given the infrequency of driver unregistration, use
synchronize_rcu() in module_remove_driver() to close any potential
races. It is potentially overkill to suffer synchronize_rcu() just to
handle the rare module removal racing uevent_show() event.
Thanks to Tetsuo Handa for the debug analysis of the syzbot report [1].
Fixes: c0a40097f0bc ("drivers: core: synchronize really_probe() and dev_uevent()")
Reported-by: syzbot+4762dd74e32532cda5ff(a)syzkaller.appspotmail.com
Reported-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Closes: http://lore.kernel.org/5aa5558f-90a4-4864-b1b1-5d6784c5607d@I-love.SAKURA.n… [1]
Link: http://lore.kernel.org/669073b8ea479_5fffa294c1@dwillia2-xfh.jf.intel.com.n…
Cc: stable(a)vger.kernel.org
Cc: Ashish Sangwan <a.sangwan(a)samsung.com>
Cc: Namjae Jeon <namjae.jeon(a)samsung.com>
Cc: Dirk Behme <dirk.behme(a)de.bosch.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/base/core.c | 13 ++++++++-----
drivers/base/module.c | 4 ++++
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2b4c0624b704..b5399262198a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -25,6 +25,7 @@
#include <linux/mutex.h>
#include <linux/pm_runtime.h>
#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/string_helpers.h>
@@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const struct kobject *kobj)
static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
{
const struct device *dev = kobj_to_dev(kobj);
+ struct device_driver *driver;
int retval = 0;
/* add device node properties if present */
@@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
if (dev->type && dev->type->name)
add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
- if (dev->driver)
- add_uevent_var(env, "DRIVER=%s", dev->driver->name);
+ /* Synchronize with module_remove_driver() */
+ rcu_read_lock();
+ driver = READ_ONCE(dev->driver);
+ if (driver)
+ add_uevent_var(env, "DRIVER=%s", driver->name);
+ rcu_read_unlock();
/* Add common DT information about the device */
of_device_uevent(dev, env);
@@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
if (!env)
return -ENOMEM;
- /* Synchronize with really_probe() */
- device_lock(dev);
/* let the kset specific function add its keys */
retval = kset->uevent_ops->uevent(&dev->kobj, env);
- device_unlock(dev);
if (retval)
goto out;
diff --git a/drivers/base/module.c b/drivers/base/module.c
index a1b55da07127..b0b79b9c189d 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -7,6 +7,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/rcupdate.h>
#include "base.h"
static char *make_driver_name(struct device_driver *drv)
@@ -97,6 +98,9 @@ void module_remove_driver(struct device_driver *drv)
if (!drv)
return;
+ /* Synchronize with dev_uevent() */
+ synchronize_rcu();
+
sysfs_remove_link(&drv->p->kobj, "module");
if (drv->owner)
In _emif_get_id(), of_get_address() may return NULL which is later
dereferenced. Fix this bug by adding NULL check. of_translate_address() is
the same.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 86a18ee21e5e ("EDAC, ti: Add support for TI keystone and DRA7xx EDAC")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v4:
- added the check of of_translate_address() as suggestions.
Changes in v3:
- added the patch operations omitted in PATCH v2 RESEND compared to PATCH
v2. Sorry for my oversight.
Changes in v2:
- added Cc stable line.
---
drivers/edac/ti_edac.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index 29723c9592f7..f466f12630d3 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -207,14 +207,24 @@ static int _emif_get_id(struct device_node *node)
int my_id = 0;
addrp = of_get_address(node, 0, NULL, NULL);
+ if (!addrp)
+ return -EINVAL;
+
my_addr = (u32)of_translate_address(node, addrp);
+ if (my_addr == OF_BAD_ADDR)
+ return -EINVAL;
for_each_matching_node(np, ti_edac_of_match) {
if (np == node)
continue;
addrp = of_get_address(np, 0, NULL, NULL);
+ if (!addrp)
+ return -EINVAL;
+
addr = (u32)of_translate_address(np, addrp);
+ if (addr == OF_BAD_ADDR)
+ return -EINVAL;
edac_printk(KERN_INFO, EDAC_MOD_NAME,
"addr=%x, my_addr=%x\n",
--
2.25.1
Le 03/08/2024 à 16:55, Sasha Levin a écrit :
> This is a note to let you know that I've just added the patch titled
>
> ipv4: fix source address selection with route leak
>
> to the 5.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> ipv4-fix-source-address-selection-with-route-leak.patch
> and it can be found in the queue-5.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
I'm not sure I fully understand the process, but Greg already sent a mail
because this patch doesn't compile on the 5.15 stable branch.
I sent a backport:
https://lore.kernel.org/stable/20240802085305.2749750-1-nicolas.dichtel@6wi…
Regards,
Nicolas
>
>
>
> commit dfd009372d960dc1ccf694e7369d58e63cd133c4
> Author: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
> Date: Wed Jul 10 10:14:27 2024 +0200
>
> ipv4: fix source address selection with route leak
>
> [ Upstream commit 6807352353561187a718e87204458999dbcbba1b ]
>
> By default, an address assigned to the output interface is selected when
> the source address is not specified. This is problematic when a route,
> configured in a vrf, uses an interface from another vrf (aka route leak).
> The original vrf does not own the selected source address.
>
> Let's add a check against the output interface and call the appropriate
> function to select the source address.
>
> CC: stable(a)vger.kernel.org
> Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF")
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
> Reviewed-by: David Ahern <dsahern(a)kernel.org>
> Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com
> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index 3d00253afbb8d..4f1236458c214 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -2286,6 +2286,15 @@ void fib_select_path(struct net *net, struct fib_result *res,
> fib_select_default(fl4, res);
>
> check_saddr:
> - if (!fl4->saddr)
> - fl4->saddr = fib_result_prefsrc(net, res);
> + if (!fl4->saddr) {
> + struct net_device *l3mdev;
> +
> + l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
> +
> + if (!l3mdev ||
> + l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
> + fl4->saddr = fib_result_prefsrc(net, res);
> + else
> + fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
> + }
> }
Hi Greg,
> Andi Shyti (2):
> drm/i915/gem: Adjust vma offset for framebuffer mmap offset
> drm/i915/gem: Fix Virtual Memory mapping boundaries calculation
I have forgotten to Cc the stable mailing list here. These two
patches need to be merged together even if only the second patch
has the "Fixes:" tag.
Is there anything I should still do here?
I could have used the "Requires:" tag, but the commit id would
change in between merges and rebases.
Andi
On 05/08/2024 13:19, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> net: move ethtool-related netdev state into its own struct
>
> to the 6.10-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> net-move-ethtool-related-netdev-state-into-its-own-s.patch
> and it can be found in the queue-6.10 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
This, and the series it's from, are absolutely not -stable material.
The commits do not fix any existing bugs, they are in support of new
features (netlink dumping of RSS contexts), and are a fairly large
and complex set of changes, which have not even stabilised yet — we
have already found issues both within the set and exposed by it in
other code, which are being fixed for 6.11.
> commit e331e73ff4c5c89a7f51a465ae40a7ad9fcd7a28
> Author: Edward Cree <ecree.xilinx(a)gmail.com>
> Date: Thu Jun 27 16:33:46 2024 +0100
>
> net: move ethtool-related netdev state into its own struct
>
> [ Upstream commit 3ebbd9f6de7ec6d538639ebb657246f629ace81e ]
>
> net_dev->ethtool is a pointer to new struct ethtool_netdev_state, which
> currently contains only the wol_enabled field.
>
> Suggested-by: Jakub Kicinski <kuba(a)kernel.org>
> Signed-off-by: Edward Cree <ecree.xilinx(a)gmail.com>
> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
> Link: https://patch.msgid.link/293a562278371de7534ed1eb17531838ca090633.171950223…
> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
> Stable-dep-of: 7195f0ef7f5b ("ethtool: fix setting key and resetting indir at once")
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
As far as I can tell, 7195f0ef7f5b should backport fairly cleanly
to 6.10 with only simple textual fuzz.
It should not be necessary to backport the "ethtool: track custom
RSS contexts in the core" series to support this.
The above NAK also applies to the backports of:
net-ethtool-attach-an-xarray-of-custom-rss-contexts-.patch
net-ethtool-record-custom-rss-contexts-in-the-xarray.patch
net-ethtool-add-a-mutex-protecting-rss-contexts.patch
which were notified at the same time.
-ed
The result of multiplication between values derived from functions
dir_buckets() and bucket_blocks() *could* technically reach
2^30 * 2^2 = 2^32.
While unlikely to happen, it is prudent to ensure that it will not
lead to integer overflow. Thus, use mul_u32_u32() as it's more
appropriate to mitigate the issue.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: 3843154598a0 ("f2fs: introduce large directory support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
fs/f2fs/dir.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index cbd7a5e96a37..14900ca8a9ff 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -166,7 +166,8 @@ static unsigned long dir_block_index(unsigned int level,
unsigned long bidx = 0;
for (i = 0; i < level; i++)
- bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
+ bidx += mul_u32_u32(dir_buckets(i, dir_level),
+ bucket_blocks(i));
bidx += idx * bucket_blocks(level);
return bidx;
}
While calculating the end addresses of main area and segment 0, u32
may be not enough to hold the result without the danger of int
overflow.
Just in case, play it safe and cast one of the operands to a
wider type (u64).
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: fd694733d523 ("f2fs: cover large section in sanity check of super")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
fs/f2fs/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3959fd137cc9..4d8f38ca6fcd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3356,9 +3356,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
u32 segment_count = le32_to_cpu(raw_super->segment_count);
u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
u64 main_end_blkaddr = main_blkaddr +
- (segment_count_main << log_blocks_per_seg);
+ ((u64)segment_count_main << log_blocks_per_seg);
u64 seg_end_blkaddr = segment0_blkaddr +
- (segment_count << log_blocks_per_seg);
+ ((u64)segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
As the patch covers several code fragments, one singular Fixes: tag
is hard to pinpoint. Hopefully, it's not critical at this stage.
fs/f2fs/extent_cache.c | 4 ++--
fs/f2fs/file.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
It should wait all existing dio write IOs before block removal,
otherwise, previous direct write IO may overwrite data in the
block which may be reused by other inode.
Cc: stable(a)vger.kernel.org
Signed-off-by: Chao Yu <chao(a)kernel.org>
---
fs/f2fs/file.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 76a6043caf27..f2d0e0de775f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1056,6 +1056,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return err;
}
+ /*
+ * wait for inflight dio, blocks should be removed after
+ * IO completion.
+ */
+ if (attr->ia_size < old_size)
+ inode_dio_wait(inode);
+
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
@@ -1892,6 +1899,12 @@ static long f2fs_fallocate(struct file *file, int mode,
if (ret)
goto out;
+ /*
+ * wait for inflight dio, blocks should be removed after IO
+ * completion.
+ */
+ inode_dio_wait(inode);
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
goto out;
--
2.40.1
From: Qiuxu Zhuo <qiuxu.zhuo(a)intel.com>
[ Upstream commit 833cd3e9ad8360785b6c23c82dd3856df00732d9 ]
Sometimes the system [1] hangs on x86 I/O machine checks. However, the
expected behavior is to reboot the system, as the machine check handler
ultimately triggers a panic(), initiating a reboot in the last step.
The root cause is that sometimes the panic() is blocked when
drm_fb_helper_damage() invoking schedule_work() to flush the frame buffer.
This occurs during the process of flushing all messages to the frame
buffer driver as shown in the following call trace:
Machine check occurs [2]:
panic()
console_flush_on_panic()
console_flush_all()
console_emit_next_record()
con->write()
vt_console_print()
hide_cursor()
vc->vc_sw->con_cursor()
fbcon_cursor()
ops->cursor()
bit_cursor()
soft_cursor()
info->fbops->fb_imageblit()
drm_fbdev_generic_defio_imageblit()
drm_fb_helper_damage_area()
drm_fb_helper_damage()
schedule_work() // <--- blocked here
...
emergency_restart() // wasn't invoked, so no reboot.
During panic(), except the panic CPU, all the other CPUs are stopped.
In schedule_work(), the panic CPU requires the lock of worker_pool to
queue the work on that pool, while the lock may have been token by some
other stopped CPU. So schedule_work() is blocked.
Additionally, during a panic(), since there is no opportunity to execute
any scheduled work, it's safe to fix this issue by skipping schedule_work()
on 'oops_in_progress' in drm_fb_helper_damage().
[1] Enable the kernel option CONFIG_FRAMEBUFFER_CONSOLE,
CONFIG_DRM_FBDEV_EMULATION, and boot with the 'console=tty0'
kernel command line parameter.
[2] Set 'panic_timeout' to a non-zero value before calling panic().
Acked-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reported-by: Yudong Wang <yudong.wang(a)intel.com>
Tested-by: Yudong Wang <yudong.wang(a)intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703141737.75378-1-qiuxu.…
Signed-off-by: Maarten Lankhorst,,, <maarten.lankhorst(a)linux.intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/drm_fb_helper.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 117237d3528bd..618b045230336 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -631,6 +631,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u
static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y,
u32 width, u32 height)
{
+ /*
+ * This function may be invoked by panic() to flush the frame
+ * buffer, where all CPUs except the panic CPU are stopped.
+ * During the following schedule_work(), the panic CPU needs
+ * the worker_pool lock, which might be held by a stopped CPU,
+ * causing schedule_work() and panic() to block. Return early on
+ * oops_in_progress to prevent this blocking.
+ */
+ if (oops_in_progress)
+ return;
+
drm_fb_helper_add_damage_clip(helper, x, y, width, height);
schedule_work(&helper->damage_work);
--
2.43.0
From: Qiuxu Zhuo <qiuxu.zhuo(a)intel.com>
[ Upstream commit 833cd3e9ad8360785b6c23c82dd3856df00732d9 ]
Sometimes the system [1] hangs on x86 I/O machine checks. However, the
expected behavior is to reboot the system, as the machine check handler
ultimately triggers a panic(), initiating a reboot in the last step.
The root cause is that sometimes the panic() is blocked when
drm_fb_helper_damage() invoking schedule_work() to flush the frame buffer.
This occurs during the process of flushing all messages to the frame
buffer driver as shown in the following call trace:
Machine check occurs [2]:
panic()
console_flush_on_panic()
console_flush_all()
console_emit_next_record()
con->write()
vt_console_print()
hide_cursor()
vc->vc_sw->con_cursor()
fbcon_cursor()
ops->cursor()
bit_cursor()
soft_cursor()
info->fbops->fb_imageblit()
drm_fbdev_generic_defio_imageblit()
drm_fb_helper_damage_area()
drm_fb_helper_damage()
schedule_work() // <--- blocked here
...
emergency_restart() // wasn't invoked, so no reboot.
During panic(), except the panic CPU, all the other CPUs are stopped.
In schedule_work(), the panic CPU requires the lock of worker_pool to
queue the work on that pool, while the lock may have been token by some
other stopped CPU. So schedule_work() is blocked.
Additionally, during a panic(), since there is no opportunity to execute
any scheduled work, it's safe to fix this issue by skipping schedule_work()
on 'oops_in_progress' in drm_fb_helper_damage().
[1] Enable the kernel option CONFIG_FRAMEBUFFER_CONSOLE,
CONFIG_DRM_FBDEV_EMULATION, and boot with the 'console=tty0'
kernel command line parameter.
[2] Set 'panic_timeout' to a non-zero value before calling panic().
Acked-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reported-by: Yudong Wang <yudong.wang(a)intel.com>
Tested-by: Yudong Wang <yudong.wang(a)intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703141737.75378-1-qiuxu.…
Signed-off-by: Maarten Lankhorst,,, <maarten.lankhorst(a)linux.intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/drm_fb_helper.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 117237d3528bd..618b045230336 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -631,6 +631,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u
static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y,
u32 width, u32 height)
{
+ /*
+ * This function may be invoked by panic() to flush the frame
+ * buffer, where all CPUs except the panic CPU are stopped.
+ * During the following schedule_work(), the panic CPU needs
+ * the worker_pool lock, which might be held by a stopped CPU,
+ * causing schedule_work() and panic() to block. Return early on
+ * oops_in_progress to prevent this blocking.
+ */
+ if (oops_in_progress)
+ return;
+
drm_fb_helper_add_damage_clip(helper, x, y, width, height);
schedule_work(&helper->damage_work);
--
2.43.0
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0
Gitweb: https://git.kernel.org/tip/06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0
Author: Justin Stitt <justinstitt(a)google.com>
AuthorDate: Fri, 17 May 2024 00:47:10
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Mon, 05 Aug 2024 16:14:14 +02:00
ntp: Safeguard against time_constant overflow
Using syzkaller with the recently reintroduced signed integer overflow
sanitizer produces this UBSAN report:
UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:738:18
9223372036854775806 + 4 cannot be represented in type 'long'
Call Trace:
handle_overflow+0x171/0x1b0
__do_adjtimex+0x1236/0x1440
do_adjtimex+0x2be/0x740
The user supplied time_constant value is incremented by four and then
clamped to the operating range.
Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user
supplied value was sanity checked to be in the operating range. That change
removed the sanity check and relied on clamping after incrementing which
does not work correctly when the user supplied value is in the overflow
zone of the '+ 4' operation.
The operation requires CAP_SYS_TIME and the side effect of the overflow is
NTP getting out of sync.
Similar to the fixups for time_maxerror and time_esterror, clamp the user
space supplied value to the operating range.
[ tglx: Switch to clamping ]
Fixes: eea83d896e31 ("ntp: NTP4 user space bits update")
Signed-off-by: Justin Stitt <justinstitt(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Miroslav Lichvar <mlichvar(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-c-v2-1-f3a80096f36f@google.…
Closes: https://github.com/KSPP/linux/issues/352
---
kernel/time/ntp.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 502e1e5..8d2dd21 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -733,11 +733,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT);
if (txc->modes & ADJ_TIMECONST) {
- time_constant = txc->constant;
+ time_constant = clamp(txc->constant, 0, MAXTC);
if (!(time_status & STA_NANO))
time_constant += 4;
- time_constant = min(time_constant, (long)MAXTC);
- time_constant = max(time_constant, 0l);
+ time_constant = clamp(time_constant, 0, MAXTC);
}
if (txc->modes & ADJ_TAI &&
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 5916be8a53de6401871bdd953f6c60237b47d6d3
Gitweb: https://git.kernel.org/tip/5916be8a53de6401871bdd953f6c60237b47d6d3
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Sat, 03 Aug 2024 17:07:51 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Mon, 05 Aug 2024 16:14:14 +02:00
timekeeping: Fix bogus clock_was_set() invocation in do_adjtimex()
The addition of the bases argument to clock_was_set() fixed up all call
sites correctly except for do_adjtimex(). This uses CLOCK_REALTIME
instead of CLOCK_SET_WALL as argument. CLOCK_REALTIME is 0.
As a result the effect of that clock_was_set() notification is incomplete
and might result in timers expiring late because the hrtimer code does
not re-evaluate the affected clock bases.
Use CLOCK_SET_WALL instead of CLOCK_REALTIME to tell the hrtimers code
which clock bases need to be re-evaluated.
Fixes: 17a1b8826b45 ("hrtimer: Add bases argument to clock_was_set()")
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/877ccx7igo.ffs@tglx
---
kernel/time/timekeeping.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2fa87dc..5391e41 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2606,7 +2606,7 @@ int do_adjtimex(struct __kernel_timex *txc)
clock_set |= timekeeping_advance(TK_ADV_FREQ);
if (clock_set)
- clock_was_set(CLOCK_REALTIME);
+ clock_was_set(CLOCK_SET_WALL);
ntp_notify_cmos_timer();
RT tasks do not have any timerslack, as this induces jitter. By
that, the timer slack is already ignored in the nanosleep family and
schedule_hrtimeout_range() (fixed in 0c52310f2600).
The hrtimer_start_range_ns function is indirectly used by glibc-2.33+
for timed waits on condition variables. These are sometimes used in
RT applications for realtime queue processing. At least on the
combination of kernel 5.10 and glibc-2.31, the timed wait on condition
variables in rt tasks was precise (no slack), however glibc-2.33
changed the internal wait implementation, exposing this oversight.
Make the timer slack consistent across all hrtimer programming code,
by ignoring the timerslack for tasks with rt policies also in the last
remaining location in hrtimer_start_range_ns().
Cc: stable(a)vger.kernel.org
Signed-off-by: Felix Moessbauer <felix.moessbauer(a)siemens.com>
---
kernel/time/hrtimer.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index b8ee320208d4..e8b44e7c281f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1274,7 +1274,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* hrtimer_start_range_ns - (re)start an hrtimer
* @timer: the timer to be added
* @tim: expiry time
- * @delta_ns: "slack" range for the timer
+ * @delta_ns: "slack" range for the timer for SCHED_OTHER tasks
* @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
* relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
* softirq based mode is considered for debug purpose only!
@@ -1299,6 +1299,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
base = lock_hrtimer_base(timer, &flags);
+ /* rt-tasks do not have a timer slack for obvious reasons */
+ if (task_is_realtime(current))
+ delta_ns = 0;
+
if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
hrtimer_reprogram(timer, true);
--
2.39.2
RT tasks do not have any timerslack, as this induces jitter. By
that, the timer slack is already ignored in the nanosleep family and
schedule_hrtimeout_range() (fixed in 0c52310f2600).
The hrtimer_start_range_ns function is indirectly used by glibc-2.33+
for timed waits on condition variables. These are sometimes used in
RT applications for realtime queue processing. At least on the
combination of kernel 5.10 and glibc-2.31, the timed wait on condition
variables in rt tasks was precise (no slack), however glibc-2.33
changed the internal wait implementation, exposing the kernel bug.
This patch makes the timer slack consistent across all hrtimer
programming code, by ignoring the timerslack for rt tasks also in the
last remaining location in hrtimer_start_range_ns().
Similar to 0c52310f2600, this fix should be backported as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Felix Moessbauer <felix.moessbauer(a)siemens.com>
---
kernel/time/hrtimer.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 2b1469f61d9c..1b26e095114d 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1274,7 +1274,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* hrtimer_start_range_ns - (re)start an hrtimer
* @timer: the timer to be added
* @tim: expiry time
- * @delta_ns: "slack" range for the timer
+ * @delta_ns: "slack" range for the timer for SCHED_OTHER tasks
* @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
* relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
* softirq based mode is considered for debug purpose only!
@@ -1299,6 +1299,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
base = lock_hrtimer_base(timer, &flags);
+ /* rt-tasks do not have a timer slack for obvious reasons */
+ if (rt_task(current))
+ delta_ns = 0;
+
if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
hrtimer_reprogram(timer, true);
--
2.39.2
Greg,
Kernel CI is reporting a build failure on v6.6-rt:
https://grafana.kernelci.org/d/build/build?orgId=1&var-datasource=default&v…
It's in arch/riscv/kernel/cpufeature.c where a return statement in
check_unaligned_access() doesn't have a value (and
check_unaligned_access returns int).
Is 6.6 stable supporting RiscV? If so then we either have to fix that
return, or backport the refactor of arch/riscv/kernel/cpufeature.c
(f413aae96cda0). If it's not then who should I talk to about turning
off riscv CI builds for v6.6-rt?
Thanks,
Clark
On Mon, Aug 5, 2024 at 2:10 PM Blake Sperling <breakingspell(a)gmail.com> wrote:
>
> Hello, I noticed a regression from v.6.6.43 to v6.6.44 caused by this commit.
>
> When using virtio NIC with a QEMU/KVM Windows guest, network traffic from the VM stalls in the outbound (upload) direction.This affects remote access and file shares most noticeably, and the inbound (download) direction does not have the issue.
>
> iperf3 will show consistent results, 0 bytes/sec when initiating a test within the guest to a server on LAN, and reverse will be full speed. Nothing out of the ordinary in host dmesg or guest Event Viewer while the behavior is being displayed.
>
> Crucially, this only seems to affect Windows guests, Ubuntu guest with the same NIC configuration tests fine both directions.
> I wonder if NetKVM guest drivers may be related, the current latest version of the drivers (v248) did not make a difference, but it is several months old.
>
> Let me know if there are any further tests or info I can provide, thanks!
Does Willem's patch fix the issue?
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=8…
Thanks
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: ipu-bridge: fix ipu6 Kconfig dependencies
Author: Arnd Bergmann <arnd(a)arndb.de>
Date: Fri Jul 19 11:53:50 2024 +0200
Commit 4670c8c3fb04 ("media: ipu-bridge: Fix Kconfig dependencies") changed
how IPU_BRIDGE dependencies are handled for all drivers, but the IPU6
variant was added the old way, which causes build time warnings when I2C is
turned off:
WARNING: unmet direct dependencies detected for IPU_BRIDGE
Depends on [n]: MEDIA_SUPPORT [=m] && PCI [=y] && MEDIA_PCI_SUPPORT [=y] && (ACPI [=y] || COMPILE_TEST [=y]) && I2C [=n]
Selected by [m]:
- VIDEO_INTEL_IPU6 [=m] && MEDIA_SUPPORT [=m] && PCI [=y] && MEDIA_PCI_SUPPORT [=y] && (ACPI [=y] || COMPILE_TEST [=y]) && VIDEO_DEV [=m] && X86 [=y] && X86_64 [=y] && HAS_DMA [=y]
To make it consistent with the other IPU drivers as well as avoid this
warning, change the 'select' into 'depends on'.
Fixes: c70281cc83d6 ("media: intel/ipu6: add Kconfig and Makefile")
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
[Sakari Ailus: Alternatively depend on !IPU_BRIDGE.]
Cc: stable(a)vger.kernel.org # for v6.10
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/pci/intel/ipu6/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
---
diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig
index 154343080c82..b7ab24b89836 100644
--- a/drivers/media/pci/intel/ipu6/Kconfig
+++ b/drivers/media/pci/intel/ipu6/Kconfig
@@ -3,13 +3,13 @@ config VIDEO_INTEL_IPU6
depends on ACPI || COMPILE_TEST
depends on VIDEO_DEV
depends on X86 && X86_64 && HAS_DMA
+ depends on IPU_BRIDGE || !IPU_BRIDGE
select DMA_OPS
select IOMMU_IOVA
select VIDEO_V4L2_SUBDEV_API
select MEDIA_CONTROLLER
select VIDEOBUF2_DMA_CONTIG
select V4L2_FWNODE
- select IPU_BRIDGE
help
This is the 6th Gen Intel Image Processing Unit, found in Intel SoCs
and used for capturing images and video from camera sensors.
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: uvcvideo: Fix custom control mapping probing
Author: Ricardo Ribalda <ribalda(a)chromium.org>
Date: Mon Jul 22 11:52:26 2024 +0000
Custom control mapping introduced a bug, where the filter function was
applied to every single control.
Fix it so it is only applied to the matching controls.
The following dmesg errors during probe are now fixed:
usb 1-5: Found UVC 1.00 device Integrated_Webcam_HD (0c45:670c)
usb 1-5: Failed to query (GET_CUR) UVC control 2 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 3 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 6 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 7 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 8 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 9 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 10 on unit 2: -75 (exp. 1).
Reported-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Closes: https://lore.kernel.org/linux-media/518cd6b4-68a8-4895-b8fc-97d4dae1ddc4@mo…
Cc: stable(a)vger.kernel.org
Fixes: 8f4362a8d42b ("media: uvcvideo: Allow custom control mapping")
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
Link: https://lore.kernel.org/r/20240722-fix-filter-mapping-v2-1-7ed5bb6c1185@chr…
Tested-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
drivers/media/usb/uvc/uvc_ctrl.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
---
diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c
index 0136df5732ba..4fe26e82e3d1 100644
--- a/drivers/media/usb/uvc/uvc_ctrl.c
+++ b/drivers/media/usb/uvc/uvc_ctrl.c
@@ -2680,6 +2680,10 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain,
for (i = 0; i < ARRAY_SIZE(uvc_ctrl_mappings); ++i) {
const struct uvc_control_mapping *mapping = &uvc_ctrl_mappings[i];
+ if (!uvc_entity_match_guid(ctrl->entity, mapping->entity) ||
+ ctrl->info.selector != mapping->selector)
+ continue;
+
/* Let the device provide a custom mapping. */
if (mapping->filter_mapping) {
mapping = mapping->filter_mapping(chain, ctrl);
@@ -2687,9 +2691,7 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain,
continue;
}
- if (uvc_entity_match_guid(ctrl->entity, mapping->entity) &&
- ctrl->info.selector == mapping->selector)
- __uvc_ctrl_add_mapping(chain, ctrl, mapping);
+ __uvc_ctrl_add_mapping(chain, ctrl, mapping);
}
}
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: intel/ipu6: select AUXILIARY_BUS in Kconfig
Author: Bingbu Cao <bingbu.cao(a)intel.com>
Date: Wed Jul 17 15:40:50 2024 +0800
Intel IPU6 PCI driver need register its devices on auxiliary
bus, so it needs to select the AUXILIARY_BUS in Kconfig.
Reported-by: kernel test robot <lkp(a)intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202407161833.7BEFXejx-lkp@intel.com/
Fixes: c70281cc83d6 ("media: intel/ipu6: add Kconfig and Makefile")
Signed-off-by: Bingbu Cao <bingbu.cao(a)intel.com>
Cc: stable(a)vger.kernel.org # for v6.10
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/pci/intel/ipu6/Kconfig | 1 +
1 file changed, 1 insertion(+)
---
diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig
index b7ab24b89836..40e20f0aa5ae 100644
--- a/drivers/media/pci/intel/ipu6/Kconfig
+++ b/drivers/media/pci/intel/ipu6/Kconfig
@@ -4,6 +4,7 @@ config VIDEO_INTEL_IPU6
depends on VIDEO_DEV
depends on X86 && X86_64 && HAS_DMA
depends on IPU_BRIDGE || !IPU_BRIDGE
+ select AUXILIARY_BUS
select DMA_OPS
select IOMMU_IOVA
select VIDEO_V4L2_SUBDEV_API
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: v4l: Fix missing tabular column hint for Y14P format
Author: Jean-Michel Hautbois <jeanmichel.hautbois(a)yoseli.org>
Date: Sat Jun 8 18:41:27 2024 +0200
The original patch added two columns in the flat-table of Luma-Only
Image Formats, without updating hints to latex: above it. This results
in wrong column count in the output of Sphinx's latex builder.
Fix it.
Reported-by: Akira Yokosawa <akiyks(a)gmail.com>
Closes: https://lore.kernel.org/linux-media/bdbc27ba-5098-49fb-aabf-753c81361cc7@gm…
Fixes: adb1d4655e53 ("media: v4l: Add V4L2-PIX-FMT-Y14P format")
Cc: stable(a)vger.kernel.org # for v6.10
Signed-off-by: Jean-Michel Hautbois <jeanmichel.hautbois(a)yoseli.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
---
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst
index f02e6cf3516a..74df19be91f6 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst
@@ -21,9 +21,9 @@ are often referred to as greyscale formats.
.. raw:: latex
- \scriptsize
+ \tiny
-.. tabularcolumns:: |p{3.6cm}|p{3.0cm}|p{1.3cm}|p{2.6cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|
+.. tabularcolumns:: |p{3.6cm}|p{2.4cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|
.. flat-table:: Luma-Only Image Formats
:header-rows: 1
Hello, I noticed a regression from v.6.6.43 to v6.6.44 caused by this commit.
When using virtio NIC with a QEMU/KVM Windows guest, network traffic
from the VM stalls in the outbound (upload) direction.This affects
remote access and file shares most noticeably, and the inbound
(download) direction does not have the issue.
iperf3 will show consistent results, 0 bytes/sec when initiating a
test within the guest to a server on LAN, and reverse will be full
speed. Nothing out of the ordinary in host dmesg or guest Event Viewer
while the behavior is being displayed.
Crucially, this only seems to affect Windows guests, a Ubuntu guest
with the same NIC configuration works fine in both directions.
I wonder if NetKVM guest drivers may be related, the current latest
version of the drivers (v248) did not make a difference, but it is
several months old.
Let me know if there are any further tests or info I can provide, thanks!
The patch titled
Subject: mm: only enforce minimum stack gap size if it's sensible
has been added to the -mm mm-unstable branch. Its filename is
mm-only-enforce-minimum-stack-gap-size-if-its-sensible.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Gow <davidgow(a)google.com>
Subject: mm: only enforce minimum stack gap size if it's sensible
Date: Sat, 3 Aug 2024 15:46:41 +0800
The generic mmap_base code tries to leave a gap between the top of the
stack and the mmap base address, but enforces a minimum gap size (MIN_GAP)
of 128MB, which is too large on some setups. In particular, on arm tasks
without ADDR_LIMIT_32BIT, the STACK_TOP value is less than 128MB, so it's
impossible to fit such a gap in.
Only enforce this minimum if MIN_GAP < MAX_GAP, as we'd prefer to honour
MAX_GAP, which is defined proportionally, so scales better and always
leaves us with both _some_ stack space and some room for mmap.
This fixes the usercopy KUnit test suite on 32-bit arm, as it doesn't set
any personality flags so gets the default (in this case 26-bit) task size.
This test can be run with: ./tools/testing/kunit/kunit.py run --arch arm
usercopy --make_options LLVM=1
Link: https://lkml.kernel.org/r/20240803074642.1849623-2-davidgow@google.com
Fixes: dba79c3df4a2 ("arm: use generic mmap top-down layout and brk randomization")
Signed-off-by: David Gow <davidgow(a)google.com>
Cc: Alexandre Ghiti <alex(a)ghiti.fr>
Cc: Kees Cook <kees(a)kernel.org>
Cc: Linus Walleij <linus.walleij(a)linaro.org>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Russell King <linux(a)armlinux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/util.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/util.c~mm-only-enforce-minimum-stack-gap-size-if-its-sensible
+++ a/mm/util.c
@@ -463,7 +463,7 @@ static unsigned long mmap_base(unsigned
if (gap + pad > gap)
gap += pad;
- if (gap < MIN_GAP)
+ if (gap < MIN_GAP && MIN_GAP < MAX_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
_
Patches currently in -mm which might be from davidgow(a)google.com are
mm-only-enforce-minimum-stack-gap-size-if-its-sensible.patch
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: uvcvideo: Fix custom control mapping probing
Author: Ricardo Ribalda <ribalda(a)chromium.org>
Date: Mon Jul 22 11:52:26 2024 +0000
Custom control mapping introduced a bug, where the filter function was
applied to every single control.
Fix it so it is only applied to the matching controls.
The following dmesg errors during probe are now fixed:
usb 1-5: Found UVC 1.00 device Integrated_Webcam_HD (0c45:670c)
usb 1-5: Failed to query (GET_CUR) UVC control 2 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 3 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 6 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 7 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 8 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 9 on unit 2: -75 (exp. 1).
usb 1-5: Failed to query (GET_CUR) UVC control 10 on unit 2: -75 (exp. 1).
Reported-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Closes: https://lore.kernel.org/linux-media/518cd6b4-68a8-4895-b8fc-97d4dae1ddc4@mo…
Cc: stable(a)vger.kernel.org
Fixes: 8f4362a8d42b ("media: uvcvideo: Allow custom control mapping")
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
Link: https://lore.kernel.org/r/20240722-fix-filter-mapping-v2-1-7ed5bb6c1185@chr…
Tested-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
drivers/media/usb/uvc/uvc_ctrl.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
---
diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c
index 0136df5732ba..4fe26e82e3d1 100644
--- a/drivers/media/usb/uvc/uvc_ctrl.c
+++ b/drivers/media/usb/uvc/uvc_ctrl.c
@@ -2680,6 +2680,10 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain,
for (i = 0; i < ARRAY_SIZE(uvc_ctrl_mappings); ++i) {
const struct uvc_control_mapping *mapping = &uvc_ctrl_mappings[i];
+ if (!uvc_entity_match_guid(ctrl->entity, mapping->entity) ||
+ ctrl->info.selector != mapping->selector)
+ continue;
+
/* Let the device provide a custom mapping. */
if (mapping->filter_mapping) {
mapping = mapping->filter_mapping(chain, ctrl);
@@ -2687,9 +2691,7 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain,
continue;
}
- if (uvc_entity_match_guid(ctrl->entity, mapping->entity) &&
- ctrl->info.selector == mapping->selector)
- __uvc_ctrl_add_mapping(chain, ctrl, mapping);
+ __uvc_ctrl_add_mapping(chain, ctrl, mapping);
}
}
The patch titled
Subject: crash: Fix riscv64 crash memory reserve dead loop
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
crash-fix-riscv64-crash-memory-reserve-dead-loop.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Jinjie Ruan <ruanjinjie(a)huawei.com>
Subject: crash: Fix riscv64 crash memory reserve dead loop
Date: Fri, 2 Aug 2024 17:01:05 +0800
On RISCV64 Qemu machine with 512MB memory, cmdline "crashkernel=500M,high"
will cause system stall as below:
Zone ranges:
DMA32 [mem 0x0000000080000000-0x000000009fffffff]
Normal empty
Movable zone start for each node
Early memory node ranges
node 0: [mem 0x0000000080000000-0x000000008005ffff]
node 0: [mem 0x0000000080060000-0x000000009fffffff]
Initmem setup node 0 [mem 0x0000000080000000-0x000000009fffffff]
(stall here)
commit 5d99cadf1568 ("crash: fix x86_32 crash memory reserve dead loop
bug") fix this on 32-bit architecture. However, the problem is not
completely solved. If `CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX` on
64-bit architecture, for example, when system memory is equal to
CRASH_ADDR_LOW_MAX on RISCV64, the following infinite loop will also
occur:
-> reserve_crashkernel_generic() and high is true
-> alloc at [CRASH_ADDR_LOW_MAX, CRASH_ADDR_HIGH_MAX] fail
-> alloc at [0, CRASH_ADDR_LOW_MAX] fail and repeatedly
(because CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX).
Before refactor in commit 9c08a2a139fe ("x86: kdump: use generic interface
to simplify crashkernel reservation code"), x86 do not try to reserve
crash memory at low if it fails to alloc above high 4G. However before
refator in commit fdc268232dbba ("arm64: kdump: use generic interface to
simplify crashkernel reservation"), arm64 try to reserve crash memory at
low if it fails above high 4G. For 64-bit systems, this attempt is less
beneficial than the opposite, remove it to fix this bug and align with
native x86 implementation.
After this patch, it print:
cannot allocate crashkernel (size:0x1f400000)
Link: https://lkml.kernel.org/r/20240802090105.3871929-1-ruanjinjie@huawei.com
Fixes: 39365395046f ("riscv: kdump: use generic interface to simplify crashkernel reservation")
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
Acked-by: Baoquan He <bhe(a)redhat.com>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: Chen Jiahao <chenjiahao16(a)huawei.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Vivek Goyal <vgoyal(a)redhat.com>
Cc: Alexandre Ghiti <alex(a)ghiti.fr>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/crash_reserve.c | 9 ---------
1 file changed, 9 deletions(-)
--- a/kernel/crash_reserve.c~crash-fix-riscv64-crash-memory-reserve-dead-loop
+++ a/kernel/crash_reserve.c
@@ -416,15 +416,6 @@ retry:
goto retry;
}
- /*
- * For crashkernel=size[KMG],high, if the first attempt was
- * for high memory, fall back to low memory.
- */
- if (high && search_end == CRASH_ADDR_HIGH_MAX) {
- search_end = CRASH_ADDR_LOW_MAX;
- search_base = 0;
- goto retry;
- }
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
crash_size);
return;
_
Patches currently in -mm which might be from ruanjinjie(a)huawei.com are
crash-fix-riscv64-crash-memory-reserve-dead-loop.patch
crash-fix-x86_32-crash-memory-reserve-dead-loop-bug.patch
crash-fix-x86_32-crash-memory-reserve-dead-loop.patch
arm-use-generic-interface-to-simplify-crashkernel-reservation.patch
crash-fix-crash-memory-reserve-exceed-system-memory-bug.patch
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: f2655ac2c06a15558e51ed6529de280e1553c86e
Gitweb: https://git.kernel.org/tip/f2655ac2c06a15558e51ed6529de280e1553c86e
Author: Paul E. McKenney <paulmck(a)kernel.org>
AuthorDate: Fri, 02 Aug 2024 08:46:15 -07:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Fri, 02 Aug 2024 18:29:28 +02:00
clocksource: Fix brown-bag boolean thinko in cs_watchdog_read()
The current "nretries > 1 || nretries >= max_retries" check in
cs_watchdog_read() will always evaluate to true, and thus pr_warn(), if
nretries is greater than 1. The intent is instead to never warn on the
first try, but otherwise warn if the successful retry was the last retry.
Therefore, change that "||" to "&&".
Fixes: db3a34e17433 ("clocksource: Retry clock read if long delays detected")
Reported-by: Borislav Petkov <bp(a)alien8.de>
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20240802154618.4149953-2-paulmck@kernel.org
---
kernel/time/clocksource.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index d25ba49..d0538a7 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -246,7 +246,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
if (wd_delay <= WATCHDOG_MAX_SKEW) {
- if (nretries > 1 || nretries >= max_retries) {
+ if (nretries > 1 && nretries >= max_retries) {
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
smp_processor_id(), watchdog->name, nretries);
}
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 305c821c3006c1f201eb85bcfb44a35930f54a71
Gitweb: https://git.kernel.org/tip/305c821c3006c1f201eb85bcfb44a35930f54a71
Author: Paul E. McKenney <paulmck(a)kernel.org>
AuthorDate: Thu, 01 Aug 2024 17:16:36 -07:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Fri, 02 Aug 2024 16:34:26 +02:00
clocksource: Fix brown-bag boolean thinko in cs_watchdog_read()
The current "nretries > 1 || nretries >= max_retries" check in
cs_watchdog_read() will always evaluate to true, and thus pr_warn(), if
nretries is greater than 1.
The intent is instead to never warn on the first try, but otherwise warn if
the successful retry was the last retry.
Therefore, change that "||" to "&&".
Fixes: db3a34e17433 ("clocksource: Retry clock read if long delays detected")
Reported-by: Borislav Petkov <bp(a)alien8.de>
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
---
kernel/time/clocksource.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index d25ba49..d0538a7 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -246,7 +246,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
if (wd_delay <= WATCHDOG_MAX_SKEW) {
- if (nretries > 1 || nretries >= max_retries) {
+ if (nretries > 1 && nretries >= max_retries) {
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
smp_processor_id(), watchdog->name, nretries);
}
From: Jason Gerecke <jason.gerecke(a)wacom.com>
The Wacom driver maps the HID_DG_TWIST usage to ABS_Z (rather than ABS_RZ)
for historic reasons. When the code to support twist was introduced in
commit 50066a042da5 ("HID: wacom: generic: Add support for height, tilt,
and twist usages"), we were careful to write it in such a way that it had
HID calculate the resolution of the twist axis assuming ABS_RZ instead
(so that we would get correct angular behavior). This was broken with
the introduction of commit 08a46b4190d3 ("HID: wacom: Set a default
resolution for older tablets"), which moved the resolution calculation
to occur *before* the adjustment from ABS_Z to ABS_RZ occurred.
This commit moves the calculation of resolution after the point that
we are finished setting things up for its proper use.
Signed-off-by: Jason Gerecke <jason.gerecke(a)wacom.com>
Fixes: 08a46b4190d3 ("HID: wacom: Set a default resolution for older tablets")
Cc: stable(a)vger.kernel.org
---
drivers/hid/wacom_wac.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 1f4564982b958..2541fa2e0fa3b 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1878,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
int fmax = field->logical_maximum;
unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid);
int resolution_code = code;
- int resolution = hidinput_calc_abs_res(field, resolution_code);
+ int resolution;
if (equivalent_usage == HID_DG_TWIST) {
resolution_code = ABS_RZ;
}
+ resolution = hidinput_calc_abs_res(field, resolution_code);
+
if (equivalent_usage == HID_GD_X) {
fmin += features->offset_left;
fmax -= features->offset_right;
--
2.45.2
We recently made GUP's common page table walking code to also walk hugetlb
VMAs without most hugetlb special-casing, preparing for the future of
having less hugetlb-specific page table walking code in the codebase.
Turns out that we missed one page table locking detail: page table locking
for hugetlb folios that are not mapped using a single PMD/PUD.
Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB
hugetlb folios on arm64 with 4 KiB base page size). GUP, as it walks the
page tables, will perform a pte_offset_map_lock() to grab the PTE table
lock.
However, hugetlb that concurrently modifies these page tables would
actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the
locks would differ. Something similar can happen right now with hugetlb
folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS.
This issue can be reproduced [1], for example triggering:
[ 3105.936100] ------------[ cut here ]------------
[ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188
[ 3105.944634] Modules linked in: [...]
[ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1
[ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024
[ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 3105.991108] pc : try_grab_folio+0x11c/0x188
[ 3105.994013] lr : follow_page_pte+0xd8/0x430
[ 3105.996986] sp : ffff80008eafb8f0
[ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43
[ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48
[ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978
[ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001
[ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000
[ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000
[ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0
[ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080
[ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000
[ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000
[ 3106.047957] Call trace:
[ 3106.049522] try_grab_folio+0x11c/0x188
[ 3106.051996] follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0
[ 3106.055527] follow_page_mask+0x1a0/0x2b8
[ 3106.058118] __get_user_pages+0xf0/0x348
[ 3106.060647] faultin_page_range+0xb0/0x360
[ 3106.063651] do_madvise+0x340/0x598
Let's make huge_pte_lockptr() effectively use the same PT locks as any
core-mm page table walker would. Add ptep_lockptr() to obtain the PTE
page table lock using a pte pointer -- unfortunately we cannot convert
pte_lockptr() because virt_to_page() doesn't work with kmap'ed page
tables we can have with CONFIG_HIGHPTE.
Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order,
such that when e.g., CONFIG_PGTABLE_LEVELS==2 with
PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected.
Document why that works.
There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb
folio being mapped using two PTE page tables. While hugetlb wants to take
the PMD table lock, core-mm would grab the PTE table lock of one of both
PTE page tables. In such corner cases, we have to make sure that both
locks match, which is (fortunately!) currently guaranteed for 8xx as it
does not support SMP and consequently doesn't use split PT locks.
[1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/
Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code")
Acked-by: Peter Xu <peterx(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
@James, I dropped your RB.
Retested on arm64 and x86-64. Cross-compiled on a bunch of others.
v3 -> v4:
* Replace PTE pointer alignment by BUILD_BUG_ON()
* Simplify lock lookup by looking up in reverse
* Adjust comment and patch description
v2 -> v3:
* Handle CONFIG_PGTABLE_LEVELS oddities as good as possible. It's a mess.
Remove the size >= P4D_SIZE check and simply default to the
&mm->page_table_lock.
* Align the PTE pointer to the start of the page table to handle PTE page
tables bigger than a single page (unclear if this could currently trigger).
* Extend patch description
v1 -> 2:
* Extend patch description
* Drop "mm: let pte_lockptr() consume a pte_t pointer"
* Introduce ptep_lockptr() in this patch
---
include/linux/hugetlb.h | 33 ++++++++++++++++++++++++++++++---
include/linux/mm.h | 11 +++++++++++
2 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8e462205400d..ac3ea8596f93 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -938,10 +938,37 @@ static inline bool htlb_allow_alloc_fallback(int reason)
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
- if (huge_page_size(h) == PMD_SIZE)
+ const unsigned long size = huge_page_size(h);
+
+ VM_WARN_ON(size == PAGE_SIZE);
+
+ /*
+ * hugetlb must use the exact same PT locks as core-mm page table
+ * walkers would. When modifying a PTE table, hugetlb must take the
+ * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
+ * PT lock etc.
+ *
+ * The expectation is that any hugetlb folio smaller than a PMD is
+ * always mapped into a single PTE table and that any hugetlb folio
+ * smaller than a PUD (but at least as big as a PMD) is always mapped
+ * into a single PMD table.
+ *
+ * If that does not hold for an architecture, then that architecture
+ * must disable split PT locks such that all *_lockptr() functions
+ * will give us the same result: the per-MM PT lock.
+ *
+ * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
+ * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
+ * and core-mm would use pmd_lockptr(). However, in such configurations
+ * split PMD locks are disabled -- they don't make sense on a single
+ * PGDIR page table -- and the end result is the same.
+ */
+ if (size >= PUD_SIZE)
+ return pud_lockptr(mm, (pud_t *) pte);
+ else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
return pmd_lockptr(mm, (pmd_t *) pte);
- VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
- return &mm->page_table_lock;
+ /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
+ return ptep_lockptr(mm, pte);
}
#ifndef hugepages_supported
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a890a1731c14..bd219ac9c026 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2869,6 +2869,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
}
+static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
+{
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
+ BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
+ return ptlock_ptr(virt_to_ptdesc(pte));
+}
+
static inline bool ptlock_init(struct ptdesc *ptdesc)
{
/*
@@ -2893,6 +2900,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return &mm->page_table_lock;
}
+static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
+{
+ return &mm->page_table_lock;
+}
static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void ptlock_free(struct ptdesc *ptdesc) {}
--
2.45.2
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6807352353561187a718e87204458999dbcbba1b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072916-pastrami-suction-5192@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
680735235356 ("ipv4: fix source address selection with route leak")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6807352353561187a718e87204458999dbcbba1b Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
Date: Wed, 10 Jul 2024 10:14:27 +0200
Subject: [PATCH] ipv4: fix source address selection with route leak
By default, an address assigned to the output interface is selected when
the source address is not specified. This is problematic when a route,
configured in a vrf, uses an interface from another vrf (aka route leak).
The original vrf does not own the selected source address.
Let's add a check against the output interface and call the appropriate
function to select the source address.
CC: stable(a)vger.kernel.org
Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
Reviewed-by: David Ahern <dsahern(a)kernel.org>
Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f669da98d11d..8956026bc0a2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2270,6 +2270,15 @@ void fib_select_path(struct net *net, struct fib_result *res,
fib_select_default(fl4, res);
check_saddr:
- if (!fl4->saddr)
- fl4->saddr = fib_result_prefsrc(net, res);
+ if (!fl4->saddr) {
+ struct net_device *l3mdev;
+
+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
+
+ if (!l3mdev ||
+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
+ fl4->saddr = fib_result_prefsrc(net, res);
+ else
+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
+ }
}
As we discussed before[1], soft recovery should be
forwarded to userspace, or we can get into a really
bad state where apps will keep submitting hanging
command buffers cascading us to a hard reset.
1: https://lore.kernel.org/all/bf23d5ed-9a6b-43e7-84ee-8cbfd0d60f18@froggi.es/
Signed-off-by: Joshua Ashton <joshua(a)froggi.es>
Cc: Friedrich Vock <friedrich.vock(a)gmx.de>
Cc: Bas Nieuwenhuizen <bas(a)basnieuwenhuizen.nl>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: André Almeida <andrealmeid(a)igalia.com>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4b3000c21ef2..aebf59855e9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -262,9 +262,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
struct dma_fence *fence = NULL;
int r;
- /* Ignore soft recovered fences here */
r = drm_sched_entity_error(s_entity);
- if (r && r != -ENODATA)
+ if (r)
goto error;
if (!fence && job->gang_submit)
--
2.44.0
In drm_client_modeset_probe(), the return value of drm_mode_duplicate() is
assigned to modeset->mode, which will lead to a possible NULL pointer
dereference on failure of drm_mode_duplicate(). Add a check to avoid npd.
Cc: stable(a)vger.kernel.org
Fixes: cf13909aee05 ("drm/fb-helper: Move out modeset config code")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v4:
- modified patch, set ret and break to handle error rightly.
Changes in v3:
- modified patch as suggestions, returned error directly when failing to
get modeset->mode.
Changes in v2:
- added the recipient's email address, due to the prolonged absence of a
response from the recipients.
- added Cc stable.
---
drivers/gpu/drm/drm_client_modeset.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 31af5cf37a09..cee5eafbfb81 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
kfree(modeset->mode);
modeset->mode = drm_mode_duplicate(dev, mode);
+ if (!modeset->mode) {
+ ret = -ENOMEM;
+ break;
+ }
+
drm_connector_get(connector);
modeset->connectors[modeset->num_connectors++] = connector;
modeset->x = offset->x;
--
2.25.1
Dear stable,
I hope this message finds you well. My name is Antonov, and I am
a sales representative authorized by leading refineries in
Kazakhstan to negotiate the sale of crude oil products worldwide.
If you are interested in purchasing crude oil products, we can
facilitate the process. The refinery has sufficient product
allocation to supply to serious buyers.
We currently offer a range of products including Petcoke, Ultra-
Low Sulphur Diesel, East Siberia-Pacific Ocean (ESPO) Blend,
Russian Light Cycle Oil (LCO), LNG, LPG, Jet Fuel, and more. All
our products meet SGS and GOST-R standards, and we guarantee
timely delivery.
Thank you for your attention. I look forward to your response and
to discussing further details.
Best regards,
Antonov
Message REF1: ieztudr02Q
Message Timestamp: 8/2/2024 4:19:59 a.m.
Zero and negative number is not a valid IRQ for in-kernel code and the
irq_of_parse_and_map() function returns zero on error. So this check for
valid IRQs should only accept values > 0.
Cc: stable(a)vger.kernel.org
Fixes: 2d9e31b9412c ("dmaengine: moxart: remove NO_IRQ")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- added Cc stable line;
- added Fixes line.
---
drivers/dma/moxart-dma.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c
index 66dc6d31b603..16dd3c5aba4d 100644
--- a/drivers/dma/moxart-dma.c
+++ b/drivers/dma/moxart-dma.c
@@ -568,7 +568,7 @@ static int moxart_probe(struct platform_device *pdev)
return -ENOMEM;
irq = irq_of_parse_and_map(node, 0);
- if (!irq) {
+ if (irq <= 0) {
dev_err(dev, "no IRQ resource\n");
return -EINVAL;
}
--
2.25.1
When looking at improving the user experience around the MPTCP endpoints
setup, I noticed that setting an endpoint with both the 'signal' and the
'subflow' flags -- as it has been done in the past by users according to
bug reports we got -- was resulting on only announcing the endpoint, but
not using it to create subflows: the 'subflow' flag was then ignored.
My initial thought was to modify IPRoute2 to warn the user when the two
flags were set, but it doesn't sound normal to ignore one of them. I
then looked at modifying the kernel not to allow having the two flags
set, but when discussing about that with Mat, we thought it was maybe
not ideal to do that, as there might be use-cases, we might break some
configs. Then I saw it was working before v5.17. So instead, I fixed the
support on the kernel side (patch 5) using Paolo's suggestion. This also
includes a fix on the options side (patch 1: for v5.11+), an explicit
deny of some options combinations (patch 2: for v5.18+), and some
refactoring (patches 3 and 4) to ease the inclusion of the patch 5.
While at it, I added a new selftest (patch 7) to validate this case --
including a modification of the chk_add_nr helper to inverse the sides
were the counters are checked (patch 6) -- and allowed ADD_ADDR echo
just after the MP_JOIN 3WHS.
The selftests modification have the same Fixes tag as the previous
commit, but no 'Cc: Stable': if the backport can work, that's good --
but it still need to be verified by running the selftests -- if not, no
need to worry, many CIs will use the selftests from the last stable
version to validate previous stable releases.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Matthieu Baerts (NGI0) (7):
mptcp: fully established after ADD_ADDR echo on MPJ
mptcp: pm: deny endp with signal + subflow + port
mptcp: pm: reduce indentation blocks
mptcp: pm: don't try to create sf if alloc failed
mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set
selftests: mptcp: join: ability to invert ADD_ADDR check
selftests: mptcp: join: test both signal & subflow
net/mptcp/options.c | 3 +-
net/mptcp/pm_netlink.c | 47 +++++++++++++--------
tools/testing/selftests/net/mptcp/mptcp_join.sh | 55 ++++++++++++++++++-------
3 files changed, 73 insertions(+), 32 deletions(-)
---
base-commit: 0bf50cead4c4710d9f704778c32ab8af47ddf070
change-id: 20240731-upstream-net-20240731-mptcp-endp-subflow-signal-181d640cf5e8
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
From: Michal Kubiak <michal.kubiak(a)intel.com>
The initialization of vport interrupt consists of two functions:
1) idpf_vport_intr_init() where a generic configuration is done
2) idpf_vport_intr_req_irq() where the irq for each q_vector is
requested.
The first function used to create a base name for each interrupt using
"kasprintf()" call. Unfortunately, although that call allocated memory
for a text buffer, that memory was never released.
Fix this by removing creating the interrupt base name in 1).
Instead, always create a full interrupt name in the function 2), because
there is no need to create a base name separately, considering that the
function 2) is never called out of idpf_vport_intr_init() context.
Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport")
Cc: stable(a)vger.kernel.org # 6.7
Signed-off-by: Michal Kubiak <michal.kubiak(a)intel.com>
Reviewed-by: Pavan Kumar Linga <pavan.kumar.linga(a)intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin(a)intel.com>
---
drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index af2879f03b8d..a2f9f252694a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3780,13 +3780,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
/**
* idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport
* @vport: main vport structure
- * @basename: name for the vector
*/
-static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
+static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
{
struct idpf_adapter *adapter = vport->adapter;
+ const char *drv_name, *if_name, *vec_name;
int vector, err, irq_num, vidx;
- const char *vec_name;
+
+ drv_name = dev_driver_string(&adapter->pdev->dev);
+ if_name = netdev_name(vport->netdev);
for (vector = 0; vector < vport->num_q_vectors; vector++) {
struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
@@ -3804,8 +3806,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
else
continue;
- name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name,
- vidx);
+ name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name,
+ vec_name, vidx);
err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
name, q_vector);
@@ -4326,7 +4328,6 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
*/
int idpf_vport_intr_init(struct idpf_vport *vport)
{
- char *int_name;
int err;
err = idpf_vport_intr_init_vec_idx(vport);
@@ -4340,11 +4341,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
if (err)
goto unroll_vectors_alloc;
- int_name = kasprintf(GFP_KERNEL, "%s-%s",
- dev_driver_string(&vport->adapter->pdev->dev),
- vport->netdev->name);
-
- err = idpf_vport_intr_req_irq(vport, int_name);
+ err = idpf_vport_intr_req_irq(vport);
if (err)
goto unroll_vectors_alloc;
--
2.45.2
The patch titled
Subject: mm-list_lru-fix-uaf-for-memory-cgroup-v2
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-list_lru-fix-uaf-for-memory-cgroup-v2.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Muchun Song <songmuchun(a)bytedance.com>
Subject: mm-list_lru-fix-uaf-for-memory-cgroup-v2
Date: Thu, 1 Aug 2024 10:46:03 +0800
only grab rcu lock when necessary, per Vlastimil
Link: https://lkml.kernel.org/r/20240801024603.1865-1-songmuchun@bytedance.com
Fixes: 0a97c01cd20b ("list_lru: allow explicit memcg and NUMA node selection")
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/list_lru.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
--- a/mm/list_lru.c~mm-list_lru-fix-uaf-for-memory-cgroup-v2
+++ a/mm/list_lru.c
@@ -112,12 +112,14 @@ bool list_lru_add_obj(struct list_lru *l
{
bool ret;
int nid = page_to_nid(virt_to_page(item));
- struct mem_cgroup *memcg;
- rcu_read_lock();
- memcg = list_lru_memcg_aware(lru) ? mem_cgroup_from_slab_obj(item) : NULL;
- ret = list_lru_add(lru, item, nid, memcg);
- rcu_read_unlock();
+ if (list_lru_memcg_aware(lru)) {
+ rcu_read_lock();
+ ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item));
+ rcu_read_unlock();
+ } else {
+ ret = list_lru_add(lru, item, nid, NULL);
+ }
return ret;
}
@@ -148,12 +150,14 @@ bool list_lru_del_obj(struct list_lru *l
{
bool ret;
int nid = page_to_nid(virt_to_page(item));
- struct mem_cgroup *memcg;
- rcu_read_lock();
- memcg = list_lru_memcg_aware(lru) ? mem_cgroup_from_slab_obj(item) : NULL;
- ret = list_lru_del(lru, item, nid, memcg);
- rcu_read_unlock();
+ if (list_lru_memcg_aware(lru)) {
+ rcu_read_lock();
+ ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item));
+ rcu_read_unlock();
+ } else {
+ ret = list_lru_del(lru, item, nid, NULL);
+ }
return ret;
}
_
Patches currently in -mm which might be from songmuchun(a)bytedance.com are
mm-list_lru-fix-uaf-for-memory-cgroup.patch
mm-list_lru-fix-uaf-for-memory-cgroup-v2.patch
mm-kmem-remove-mem_cgroup_from_obj.patch
This patch series makes it possible to use Rust together with the shadow
call stack sanitizer. The first patch is intended to be backported to
ensure that people don't try to use SCS with Rust on older kernel
versions. The second patch makes it possible to use Rust with the shadow
call stack sanitizer.
The second patch in this series depends on the next version of [1],
which Miguel will send soon.
Link: https://lore.kernel.org/rust-for-linux/20240709160615.998336-12-ojeda@kerne… [1]
Signed-off-by: Alice Ryhl <aliceryhl(a)google.com>
---
Changes in v4:
- Move `depends on` to CONFIG_RUST.
- Rewrite commit messages to include more context.
- Link to v3: https://lore.kernel.org/r/20240704-shadow-call-stack-v3-0-d11c7a6ebe30@goog…
Changes in v3:
- Use -Zfixed-x18.
- Add logic to reject unsupported rustc versions.
- Also include a fix to be backported.
- Link to v2: https://lore.kernel.org/rust-for-linux/20240305-shadow-call-stack-v2-1-c7b4…
Changes in v2:
- Add -Cforce-unwind-tables flag.
- Link to v1: https://lore.kernel.org/rust-for-linux/20240304-shadow-call-stack-v1-1-f055…
---
Alice Ryhl (2):
rust: SHADOW_CALL_STACK is incompatible with Rust
rust: support for shadow call stack sanitizer
Makefile | 1 +
arch/arm64/Makefile | 3 +++
init/Kconfig | 1 +
3 files changed, 5 insertions(+)
---
base-commit: 9cde54ad2f7ac3cf84f65df605570c5a00afc82f
change-id: 20240304-shadow-call-stack-9c197a4361d9
Best regards,
--
Alice Ryhl <aliceryhl(a)google.com>
These couple of patches intends to fix the reset-gpio handling
for imx335 driver.
Patch 1/2 mentions reset-gpio polarity in DT binding example.
It is ACTIVE_LOW according to the data sheet.
Patch 2/2 fixes the logical value of reset-gpio during
power-on/power-off sequence.
--
Changes in v2:
- Also include reset-gpio polarity, mention in DT binding
- Add Fixes tag in 2/2
- Set the reset line to high during init time in 2/2
Link to v1:
https://lore.kernel.org/linux-media/tyo5etjwsfznuk6vzwqmcphbu4pz4lskrg3fjie…
Umang Jain (2):
dt-bindings: imx335: Mention reset-gpio polarity
media: imx335: Fix reset-gpio handling
.../devicetree/bindings/media/i2c/sony,imx335.yaml | 2 ++
drivers/media/i2c/imx335.c | 8 ++++----
2 files changed, 6 insertions(+), 4 deletions(-)
--
2.45.0
Add camcc support and Regera PLL ops. Also, fix the pll post div mask.
Changes in V3:
- Split the fixes into separate patches, remove RETAIN_FF flag for
gdscs and document the BIT(15) of pll alpha value.
- Link to v2: https://lore.kernel.org/all/20240702-camcc-support-sm8150-v2-1-4baf54ec7333…
Changes in v2:
- As per Konrad's comments, re-use the zonda pll code for regera, as
both are mostly same.
- Fix the zonda_set_rate API and also the pll_post_div shift used in
trion pll post div set rate API
- Link to v1: https://lore.kernel.org/r/20240229-camcc-support-sm8150-v1-0-8c28c6c87990@q…
Satya Priya Kakitapalli (7):
clk: qcom: clk-alpha-pll: Fix the pll post div mask
clk: qcom: clk-alpha-pll: Fix the trion pll postdiv set rate API
clk: qcom: clk-alpha-pll: Fix zonda set_rate failure when PLL is
disabled
clk: qcom: clk-alpha-pll: Update set_rate for Zonda PLL
dt-bindings: clock: qcom: Add SM8150 camera clock controller
clk: qcom: Add camera clock controller driver for SM8150
arm64: dts: qcom: Add camera clock controller for sm8150
Taniya Das (1):
clk: qcom: clk-alpha-pll: Add support for Regera PLL ops
.../bindings/clock/qcom,sm8150-camcc.yaml | 77 +
arch/arm64/boot/dts/qcom/sa8155p.dtsi | 4 +
arch/arm64/boot/dts/qcom/sm8150.dtsi | 13 +
drivers/clk/qcom/Kconfig | 9 +
drivers/clk/qcom/Makefile | 1 +
drivers/clk/qcom/camcc-sm8150.c | 2159 +++++++++++++++++
drivers/clk/qcom/clk-alpha-pll.c | 57 +-
drivers/clk/qcom/clk-alpha-pll.h | 5 +
include/dt-bindings/clock/qcom,sm8150-camcc.h | 135 ++
9 files changed, 2456 insertions(+), 4 deletions(-)
create mode 100644 Documentation/devicetree/bindings/clock/qcom,sm8150-camcc.yaml
create mode 100644 drivers/clk/qcom/camcc-sm8150.c
create mode 100644 include/dt-bindings/clock/qcom,sm8150-camcc.h
--
2.25.1
In some cases, the subflow-level's copied_seq counter was incorrectly
increased, leading to an unexpected subflow reset.
Patch 1/2 fixes the RCVPRUNED MIB counter that was attached to the wrong
event since its introduction in v5.14, backported to v5.11.
Patch 2/2 fixes the copied_seq counter issues, is present since v5.10.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Paolo Abeni (2):
mptcp: fix bad RCVPRUNED mib accounting
mptcp: fix duplicate data handling
net/mptcp/protocol.c | 8 ++++----
net/mptcp/subflow.c | 16 ++++++++++++----
2 files changed, 16 insertions(+), 8 deletions(-)
---
base-commit: 0bf50cead4c4710d9f704778c32ab8af47ddf070
change-id: 20240731-upstream-net-20240731-mptcp-dup-data-f922353130af
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
These igc bug fixes are sent as a patch series because:
1. The two patches below remove the reliance on using the qbv_count field.
"igc: Fix qbv_config_change_errors logics"
"igc: Fix reset adapter logics when tx mode change"
qbv_count field will be removed in future patch via iwl-next.
2. The patch "igc: Fix qbv tx latency by setting gtxoffset" reuse the
function igc_tsn_will_tx_mode_change() which was created in the patch:
"igc: Fix reset adapter logics when tx mode change"
v1: https://patchwork.kernel.org/project/netdevbpf/cover/20240702040926.3327530…
Changelog:
v1 -> v2
- Instead of casting to bool, use !! (Simon)
- Simplify new functions created. Instead of if.. return true, else return false,
use single return. (Simon)
- Remove patch "igc: Remove unused qbv_coun" from this series which is targeting
to iwl-net. This patch will be sent to iwl-next. (Simon)
Faizal Rahim (3):
igc: Fix qbv_config_change_errors logics
igc: Fix reset adapter logics when tx mode change
igc: Fix qbv tx latency by setting gtxoffset
drivers/net/ethernet/intel/igc/igc_main.c | 8 +++--
drivers/net/ethernet/intel/igc/igc_tsn.c | 41 ++++++++++++++++-------
drivers/net/ethernet/intel/igc/igc_tsn.h | 1 +
3 files changed, 36 insertions(+), 14 deletions(-)
--
2.25.1
Peter Zijlstra <peterz(a)infradead.org> writes:
> On Tue, Jul 30, 2024 at 08:36:13PM -0400, matoro wrote:
>> On 2024-07-30 09:50, John David Anglin wrote:
>> > On 2024-07-30 9:41 a.m., John David Anglin wrote:
>> > > On 2024-07-29 7:11 p.m., matoro wrote:
>> > > > Hi all, just bumped to the newest mainline starting with 6.10.2
>> > > > and immediately ran into a crash on boot. Fully reproducible,
>> > > > reverting back to last known good (6.9.8) resolves the issue.
>> > > > Any clue what's going on here?
>> > > > I can provide full boot logs, start bisecting, etc if needed...
>> > > 6.10.2 built and booted okay on my c8000 with the attached config.
>> > > You could start
>> > > with it and incrementally add features to try to identify the one
>> > > that causes boot failure.
>> > Oh, I have an experimental clocksource patch installed. You will need
>> > to regenerate config
>> > with "make oldconfig" to use the current timer code. Probably, this
>> > would happen automatically.
>> > >
>> > > Your config would be needed to duplicate. Full boot log would also help.
>> >
>> > Dave
>>
>> Hi Dave, bisecting quickly revealed the cause here.
>
> https://lkml.kernel.org/r/20240731105557.GY33588@noisy.programming.kicks-as…
Greg, I see tglx's jump_label fix is queued for 6.10.3 but this one
isn't as it came too late. Is there any chance of chucking it in? It's
pretty nasty.
thanks,
sam
From: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
commit 89c7f5078935872cf47a713a645affb5037be694 upstream
This does not matter the least, but there is no other .[ch] file in the
repo that is executable, so clean this up.
Fixes: 29b83a64df3b ("MIPS: Octeon: Add PCIe link status check")
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de>
Signed-off-by: WangYuli <wangyuli(a)uniontech.com>
---
arch/mips/pci/pcie-octeon.c | 0
1 file changed, 0 insertions(+), 0 deletions(-)
mode change 100755 => 100644 arch/mips/pci/pcie-octeon.c
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
old mode 100755
new mode 100644
--
2.43.4
In commit 91419ae0420f ("arm64: dts: rockchip: use BCLK to GPIO switch
on rk3399"), an additional pinctrl state was added whose default pinmux
is for 8ch i2s0. However, Puma only has 2ch i2s0. It's been overriding
the pinctrl-0 property but the second property override was missed in
the aforementioned commit.
On Puma, a hardware slider called "BIOS Disable/Normal Boot" can disable
eMMC and SPI to force booting from SD card. Another software-controlled
GPIO is then configured to override this behavior to make eMMC and SPI
available without human intervention. This is currently done in U-Boot
and it was enough until the aforementioned commit.
Indeed, because of this additional not-yet-overridden property, this
software-controlled GPIO is now muxed in a state that does not override
this hardware slider anymore, rendering SPI and eMMC flashes unusable.
Let's override the property with the 2ch pinmux to fix this.
While at it, add a GPIO hog for this software-controlled GPIO to make it
explicit and also make it reserve the pin through the pinctrl subsystem
to make sure nobody can mistakenly request it for something else: better
have a non-working feature than eMMC/SPI being corrupted "randomly"!
Signed-off-by: Quentin Schulz <quentin.schulz(a)cherry.de>
---
Quentin Schulz (2):
arm64: dts: rockchip: fix eMMC/SPI corruption when audio has been used on RK3399 Puma
arm64: dts: rockchip: override BIOS_DISABLE signal via GPIO hog on RK3399 Puma
arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 36 ++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
---
base-commit: e4fc196f5ba36eb7b9758cf2c73df49a44199895
change-id: 20240731-puma-emmc-6-c141e891220b
Best regards,
--
Quentin Schulz <quentin.schulz(a)cherry.de>
In _set_opp() we are normally bailing out when trying to set an OPP that is
the current one. This make perfect sense, but becomes a problem when
_set_required_opps() calls it recursively.
More precisely, when a required OPP is being shared by multiple PM domains,
we end up skipping to request the corresponding performance-state for all
of the PM domains, but the first one. Let's fix the problem, by calling
_set_opp_level() from _set_required_opps() instead.
Fixes: e37440e7e2c2 ("OPP: Call dev_pm_opp_set_opp() for required OPPs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
---
drivers/opp/core.c | 47 +++++++++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 23 deletions(-)
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index cb4611fe1b5b..45eca65f27f9 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1061,6 +1061,28 @@ static int _set_opp_bw(const struct opp_table *opp_table,
return 0;
}
+static int _set_opp_level(struct device *dev, struct opp_table *opp_table,
+ struct dev_pm_opp *opp)
+{
+ unsigned int level = 0;
+ int ret = 0;
+
+ if (opp) {
+ if (opp->level == OPP_LEVEL_UNSET)
+ return 0;
+
+ level = opp->level;
+ }
+
+ /* Request a new performance state through the device's PM domain. */
+ ret = dev_pm_domain_set_performance_state(dev, level);
+ if (ret)
+ dev_err(dev, "Failed to set performance state %u (%d)\n", level,
+ ret);
+
+ return ret;
+}
+
/* This is only called for PM domain for now */
static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
struct dev_pm_opp *opp, bool up)
@@ -1091,7 +1113,8 @@ static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
if (devs[index]) {
required_opp = opp ? opp->required_opps[index] : NULL;
- ret = dev_pm_opp_set_opp(devs[index], required_opp);
+ ret = _set_opp_level(devs[index], opp_table,
+ required_opp);
if (ret)
return ret;
}
@@ -1102,28 +1125,6 @@ static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
return 0;
}
-static int _set_opp_level(struct device *dev, struct opp_table *opp_table,
- struct dev_pm_opp *opp)
-{
- unsigned int level = 0;
- int ret = 0;
-
- if (opp) {
- if (opp->level == OPP_LEVEL_UNSET)
- return 0;
-
- level = opp->level;
- }
-
- /* Request a new performance state through the device's PM domain. */
- ret = dev_pm_domain_set_performance_state(dev, level);
- if (ret)
- dev_err(dev, "Failed to set performance state %u (%d)\n", level,
- ret);
-
- return ret;
-}
-
static void _find_current_opp(struct device *dev, struct opp_table *opp_table)
{
struct dev_pm_opp *opp = ERR_PTR(-ENODEV);
--
2.34.1
This patchset serves to prevent an AB/BA deadlock:
thread 0:
* (lock A) acquire substream lock by
snd_pcm_stream_lock_irq() in
snd_pcm_status64()
* (lock B) wait for tasklet to finish by calling
tasklet_unlock_spin_wait() in
tasklet_disable_in_atomic() in
ohci_flush_iso_completions() of ohci.c
thread 1:
* (lock B) enter tasklet
* (lock A) attempt to acquire substream lock,
waiting for it to be released:
snd_pcm_stream_lock_irqsave() in
snd_pcm_period_elapsed() in
update_pcm_pointers() in
process_ctx_payloads() in
process_rx_packets() of amdtp-stream.c
? tasklet_unlock_spin_wait
</NMI>
<TASK>
ohci_flush_iso_completions firewire_ohci
amdtp_domain_stream_pcm_pointer snd_firewire_lib
snd_pcm_update_hw_ptr0 snd_pcm
snd_pcm_status64 snd_pcm
? native_queued_spin_lock_slowpath
</NMI>
<IRQ>
_raw_spin_lock_irqsave
snd_pcm_period_elapsed snd_pcm
process_rx_packets snd_firewire_lib
irq_target_callback snd_firewire_lib
handle_it_packet firewire_ohci
context_tasklet firewire_ohci
The issue has been reported as a regression of kernel 5.14:
Link: https://lore.kernel.org/regressions/kwryofzdmjvzkuw6j3clftsxmoolynljztxqwg7…
("[REGRESSION] ALSA: firewire-lib: snd_pcm_period_elapsed deadlock
with Fireface 800")
Commit 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period elapse event
in process context") removed the process context workqueue from
amdtp_domain_stream_pcm_pointer() and update_pcm_pointers() to remove
its overhead.
Commit b5b519965c4c ("ALSA: firewire-lib: obsolete workqueue for period
update") belongs to the same patch series and removed
the now-unused workqueue entirely.
Though being observed on RME Fireface 800, this issue would affect all
Firewire audio interfaces using ohci amdtp + pcm streaming.
ALSA streaming, especially under intensive CPU load will reveal this issue
the soonest due to issuing more hardIRQs, with time to occurrence ranging
from 2 secons to 30 minutes after starting playback.
to reproduce the issue:
direct ALSA playback to the device:
mpv --audio-device=alsa/sysdefault:CARD=Fireface800 Spor-Ignition.flac
Time to occurrence: 2s to 30m
Likelihood increased by:
- high CPU load
stress --cpu $(nproc)
- switching between applications via workspaces
tested with i915 in Xfce
PulsaAudio / PipeWire conceal the issue as they run PCM substream
without period wakeup mode, issuing less hardIRQs.
Cc: stable(a)vger.kernel.org
Backport note:
Also applies to and fixes on (tested):
6.10.2, 6.9.12, 6.6.43, 6.1.102, 5.15.164
Edmund Raile (2):
Revert "ALSA: firewire-lib: obsolete workqueue for period update"
Revert "ALSA: firewire-lib: operate for period elapse event in process
context"
sound/firewire/amdtp-stream.c | 38 ++++++++++++++++++++++-------------
sound/firewire/amdtp-stream.h | 1 +
2 files changed, 25 insertions(+), 14 deletions(-)
--
2.45.2
Good afternoon,
I am trying to allocate the 0-th page with mmap function in my code.
I am always getting this error with this error-code: mmap error ffffffff
Then I was searching the internet for this topic and I have found the
same topic at stackoverflow web pages.
Here I am sending the link:
https://stackoverflow.com/questions/63790813/allocating-address-zero-on-lin…
I was setting value of |/proc/sys/vm/mmap_min_add to zero and using the
root privileges along the link.
And I am having the same problem still.
Can you help, please.
Thank you for the reply.
Michal Hrachovec
|
These couple of patches intends to fix the reset-gpio handling
for imx335 driver.
Patch 1/2 mentions reset-gpio polarity in DT binding example.
Patch 2/2 fixes the logical value of reset-gpio during
power-on/power-off sequence.
--
Changes in v3:
- Rework 1/2 commit message
- Fix gpio include in DT example in 1/2
- Remove not-so-informative XCLR comment in 2/2
Changes in v2:
- Also include reset-gpio polarity, mention in DT binding
- Add Fixes tag in 2/2
- Set the reset line to high during init time in 2/2
Link to v2:
https://lore.kernel.org/linux-media/20240729110437.199428-1-umang.jain@idea…
Link to v1:
https://lore.kernel.org/linux-media/tyo5etjwsfznuk6vzwqmcphbu4pz4lskrg3fjie…
Signed-off-by: Umang Jain <umang.jain(a)ideasonboard.com>
---
Umang Jain (2):
dt-bindings: media: imx335: Add reset-gpios to the DT example
media: imx335: Fix reset-gpio handling
Documentation/devicetree/bindings/media/i2c/sony,imx335.yaml | 4 ++++
drivers/media/i2c/imx335.c | 9 ++++-----
2 files changed, 8 insertions(+), 5 deletions(-)
---
base-commit: f3d2b941adafcdfba9ef63d9ca5bb2d9b263e2af
change-id: 20240731-imx335-gpio-818d736f9295
Best regards,
--
Umang Jain <umang.jain(a)ideasonboard.com>
commit ab091ec536cb7b271983c0c063b17f62f3591583 upstream
There is a hardware power-saving problem with the Lenovo N60z
board. When turn it on and leave it for 10 hours, there is a
20% chance that a nvme disk will not wake up until reboot.
Link: https://lore.kernel.org/all/2B5581C46AC6E335+9c7a81f1-05fb-4fd0-9fbb-108757…
Signed-off-by: hmy <huanglin(a)uniontech.com>
Signed-off-by: Wentao Guan <guanwentao(a)uniontech.com>
Signed-off-by: WangYuli <wangyuli(a)uniontech.com>
Signed-off-by: Keith Busch <kbusch(a)kernel.org>
---
drivers/nvme/host/pci.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 486e44d20b43..e4776cff4208 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2821,6 +2821,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return NVME_QUIRK_SIMPLE_SUSPEND;
}
+ /*
+ * NVMe SSD drops off the PCIe bus after system idle
+ * for 10 hours on a Lenovo N60z board.
+ */
+ if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6"))
+ return NVME_QUIRK_NO_APST;
+
return 0;
}
--
2.43.4
commit ab091ec536cb7b271983c0c063b17f62f3591583 upstream
There is a hardware power-saving problem with the Lenovo N60z
board. When turn it on and leave it for 10 hours, there is a
20% chance that a nvme disk will not wake up until reboot.
Link: https://lore.kernel.org/all/2B5581C46AC6E335+9c7a81f1-05fb-4fd0-9fbb-108757…
Signed-off-by: hmy <huanglin(a)uniontech.com>
Signed-off-by: Wentao Guan <guanwentao(a)uniontech.com>
Signed-off-by: WangYuli <wangyuli(a)uniontech.com>
Signed-off-by: Keith Busch <kbusch(a)kernel.org>
---
drivers/nvme/host/pci.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 163497ef48fd..a243c066d923 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2481,6 +2481,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return NVME_QUIRK_NO_APST;
}
+ /*
+ * NVMe SSD drops off the PCIe bus after system idle
+ * for 10 hours on a Lenovo N60z board.
+ */
+ if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6"))
+ return NVME_QUIRK_NO_APST;
+
return 0;
}
--
2.43.4
USB 3.0 on xhci1 is not used, as the controller shares the same PHY as
pcie1. The latter is enabled to support the M.2 PCIe WLAN card on this
design.
Mark USB 3.0 as disabled on this controller using the
"mediatek,u3p-dis-msk" property.
Fixes: 96564b1e2ea4 ("arm64: dts: mediatek: Introduce the MT8395 Radxa NIO 12L board")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org>
---
arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts b/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts
index 4b5f6cf16f70..096fa999aa59 100644
--- a/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts
@@ -898,6 +898,7 @@ &xhci1 {
usb2-lpm-disable;
vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&vsys>;
+ mediatek,u3p-dis-msk = <1>;
status = "okay";
};
--
2.46.0.rc1.232.g9752f9e123-goog
USB 3.0 on xhci1 is not used, as the controller shares the same PHY as
pcie1. The latter is enabled to support the M.2 PCIe WLAN card on this
design.
Mark USB 3.0 as disabled on this controller using the
"mediatek,u3p-dis-msk" property.
Reported-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com> #KernelCI
Closes: https://lore.kernel.org/all/9fce9838-ef87-4d1b-b3df-63e1ddb0ec51@notapiano/
Fixes: b6267a396e1c ("arm64: dts: mediatek: cherry: Enable T-PHYs and USB XHCI controllers")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org>
---
arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index fe5400e17b0f..d3a52acbe48a 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -1404,6 +1404,7 @@ &xhci1 {
rx-fifo-depth = <3072>;
vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&usb_vbus>;
+ mediatek,u3p-dis-msk = <1>;
};
&xhci2 {
--
2.46.0.rc1.232.g9752f9e123-goog
The patch titled
Subject: kcov: properly check for softirq context
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kcov-properly-check-for-softirq-context.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Andrey Konovalov <andreyknvl(a)gmail.com>
Subject: kcov: properly check for softirq context
Date: Mon, 29 Jul 2024 04:21:58 +0200
When collecting coverage from softirqs, KCOV uses in_serving_softirq() to
check whether the code is running in the softirq context. Unfortunately,
in_serving_softirq() is > 0 even when the code is running in the hardirq
or NMI context for hardirqs and NMIs that happened during a softirq.
As a result, if a softirq handler contains a remote coverage collection
section and a hardirq with another remote coverage collection section
happens during handling the softirq, KCOV incorrectly detects a nested
softirq coverate collection section and prints a WARNING, as reported by
syzbot.
This issue was exposed by commit a7f3813e589f ("usb: gadget: dummy_hcd:
Switch to hrtimer transfer scheduler"), which switched dummy_hcd to using
hrtimer and made the timer's callback be executed in the hardirq context.
Change the related checks in KCOV to account for this behavior of
in_serving_softirq() and make KCOV ignore remote coverage collection
sections in the hardirq and NMI contexts.
This prevents the WARNING printed by syzbot but does not fix the inability
of KCOV to collect coverage from the __usb_hcd_giveback_urb when dummy_hcd
is in use (caused by a7f3813e589f); a separate patch is required for that.
Link: https://lkml.kernel.org/r/20240729022158.92059-1-andrey.konovalov@linux.dev
Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
Signed-off-by: Andrey Konovalov <andreyknvl(a)gmail.com>
Reported-by: syzbot+2388cdaeb6b10f0c13ac(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac
Acked-by: Marco Elver <elver(a)google.com>
Cc: Alan Stern <stern(a)rowland.harvard.edu>
Cc: Aleksandr Nogikh <nogikh(a)google.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Marcello Sylvester Bauer <sylv(a)sylv.io>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kcov.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
--- a/kernel/kcov.c~kcov-properly-check-for-softirq-context
+++ a/kernel/kcov.c
@@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct
kmsan_unpoison_memory(&area->list, sizeof(area->list));
}
+/*
+ * Unlike in_serving_softirq(), this function returns false when called during
+ * a hardirq or an NMI that happened in the softirq context.
+ */
+static inline bool in_softirq_really(void)
+{
+ return in_serving_softirq() && !in_hardirq() && !in_nmi();
+}
+
static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
{
unsigned int mode;
@@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum
* so we ignore code executed in interrupts, unless we are in a remote
* coverage collection section in a softirq.
*/
- if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
+ if (!in_task() && !(in_softirq_really() && t->kcov_softirq))
return false;
mode = READ_ONCE(t->kcov_mode);
/*
@@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle)
if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
return;
- if (!in_task() && !in_serving_softirq())
+ if (!in_task() && !in_softirq_really())
return;
local_lock_irqsave(&kcov_percpu_data.lock, flags);
@@ -991,7 +1000,7 @@ void kcov_remote_stop(void)
int sequence;
unsigned long flags;
- if (!in_task() && !in_serving_softirq())
+ if (!in_task() && !in_softirq_really())
return;
local_lock_irqsave(&kcov_percpu_data.lock, flags);
_
Patches currently in -mm which might be from andreyknvl(a)gmail.com are
kcov-properly-check-for-softirq-context.patch
kcov-dont-instrument-lib-find_bitc.patch
We recently made GUP's common page table walking code to also walk hugetlb
VMAs without most hugetlb special-casing, preparing for the future of
having less hugetlb-specific page table walking code in the codebase.
Turns out that we missed one page table locking detail: page table locking
for hugetlb folios that are not mapped using a single PMD/PUD.
Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB
hugetlb folios on arm64 with 4 KiB base page size). GUP, as it walks the
page tables, will perform a pte_offset_map_lock() to grab the PTE table
lock.
However, hugetlb that concurrently modifies these page tables would
actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the
locks would differ. Something similar can happen right now with hugetlb
folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS.
This issue can be reproduced [1], for example triggering:
[ 3105.936100] ------------[ cut here ]------------
[ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188
[ 3105.944634] Modules linked in: [...]
[ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1
[ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024
[ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 3105.991108] pc : try_grab_folio+0x11c/0x188
[ 3105.994013] lr : follow_page_pte+0xd8/0x430
[ 3105.996986] sp : ffff80008eafb8f0
[ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43
[ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48
[ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978
[ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001
[ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000
[ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000
[ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0
[ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080
[ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000
[ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000
[ 3106.047957] Call trace:
[ 3106.049522] try_grab_folio+0x11c/0x188
[ 3106.051996] follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0
[ 3106.055527] follow_page_mask+0x1a0/0x2b8
[ 3106.058118] __get_user_pages+0xf0/0x348
[ 3106.060647] faultin_page_range+0xb0/0x360
[ 3106.063651] do_madvise+0x340/0x598
Let's make huge_pte_lockptr() effectively uses the same PT locks as any
core-mm page table walker would. Add ptep_lockptr() to obtain the PTE
page table lock using a pte pointer -- unfortunately we cannot convert
pte_lockptr() because virt_to_page() doesn't work with kmap'ed page
tables we can have with CONFIG_HIGHPTE.
There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb
folio being mapped using two PTE page tables. While hugetlb wants to take
the PMD table lock, core-mm would grab the PTE table lock of one of both
PTE page tables. In such corner cases, we have to make sure that both
locks match, which is (fortunately!) currently guaranteed for 8xx as it
does not support SMP and consequently doesn't use split PT locks.
[1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/
Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code")
Cc: <stable(a)vger.kernel.org>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
Still busy runtime-testing of this version -- have to set up my ARM
environment again. Dropped the RB's/ACKs because there was significant
change in the pte_lockptr() handling.
v1 -> 2:
* Extend patch description
* Drop "mm: let pte_lockptr() consume a pte_t pointer"
* Introduce ptep_lockptr() in this patch
I wish there was a nicer way to avoid messing with CONFIG_HIGHPTE ...
---
include/linux/hugetlb.h | 26 +++++++++++++++++++++++---
include/linux/mm.h | 10 ++++++++++
2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c9bf68c239a01..dd6d4ee5ee59c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -944,10 +944,30 @@ static inline bool htlb_allow_alloc_fallback(int reason)
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
- if (huge_page_size(h) == PMD_SIZE)
+ VM_WARN_ON(huge_page_size(h) == PAGE_SIZE);
+ VM_WARN_ON(huge_page_size(h) >= P4D_SIZE);
+
+ /*
+ * hugetlb must use the exact same PT locks as core-mm page table
+ * walkers would. When modifying a PTE table, hugetlb must take the
+ * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
+ * PT lock etc.
+ *
+ * The expectation is that any hugetlb folio smaller than a PMD is
+ * always mapped into a single PTE table and that any hugetlb folio
+ * smaller than a PUD (but at least as big as a PMD) is always mapped
+ * into a single PMD table.
+ *
+ * If that does not hold for an architecture, then that architecture
+ * must disable split PT locks such that all *_lockptr() functions
+ * will give us the same result: the per-MM PT lock.
+ */
+ if (huge_page_size(h) < PMD_SIZE && !IS_ENABLED(CONFIG_HIGHPTE))
+ /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
+ return ptep_lockptr(mm, pte);
+ else if (huge_page_size(h) < PUD_SIZE)
return pmd_lockptr(mm, (pmd_t *) pte);
- VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
- return &mm->page_table_lock;
+ return pud_lockptr(mm, (pud_t *) pte);
}
#ifndef hugepages_supported
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b100df8cb5857..1b1f40ff00b7d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2926,6 +2926,12 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
}
+static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
+{
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
+ return ptlock_ptr(virt_to_ptdesc(pte));
+}
+
static inline bool ptlock_init(struct ptdesc *ptdesc)
{
/*
@@ -2950,6 +2956,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return &mm->page_table_lock;
}
+static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
+{
+ return &mm->page_table_lock;
+}
static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void ptlock_free(struct ptdesc *ptdesc) {}
--
2.45.2
The quilt patch titled
Subject: mm: let pte_lockptr() consume a pte_t pointer
has been removed from the -mm tree. Its filename was
mm-let-pte_lockptr-consume-a-pte_t-pointer.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm: let pte_lockptr() consume a pte_t pointer
Date: Thu, 25 Jul 2024 20:39:54 +0200
Patch series "mm/hugetlb: fix hugetlb vs. core-mm PT locking".
Working on another generic page table walker that tries to avoid
special-casing hugetlb, I found a page table locking issue with hugetlb
folios that are not mapped using a single PMD/PUD.
For some hugetlb folio sizes, GUP will take different page table locks
when walking the page tables than hugetlb when modifying the page tables.
I did not actually try reproducing an issue, but looking at
follow_pmd_mask() where we might be rereading a PMD value multiple times
it's rather clear that concurrent modifications are rather unpleasant.
In follow_page_pte() we might be better in that regard -- ptep_get() does
a READ_ONCE() -- but who knows what else could happen concurrently in some
weird corner cases (e.g., hugetlb folio getting unmapped and freed).
This patch (of 2):
pte_lockptr() is the only *_lockptr() function that doesn't consume what
would be expected: it consumes a pmd_t pointer instead of a pte_t pointer.
Let's change that. The two callers in pgtable-generic.c are easily
adjusted. Adjust khugepaged.c:retract_page_tables() to simply do a
pte_offset_map_nolock() to obtain the lock, even though we won't actually
be traversing the page table.
This makes the code more similar to the other variants and avoids other
hacks to make the new pte_lockptr() version happy. pte_lockptr() users
reside now only in pgtable-generic.c.
Maybe, using pte_offset_map_nolock() is the right thing to do because the
PTE table could have been removed in the meantime? At least it sounds
more future proof if we ever have other means of page table reclaim.
It's not quite clear if holding the PTE table lock is really required:
what if someone else obtains the lock just after we unlock it? But we'll
leave that as is for now, maybe there are good reasons.
This is a preparation for adapting hugetlb page table locking logic to
take the same locks as core-mm page table walkers would.
Link: https://lkml.kernel.org/r/20240725183955.2268884-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240725183955.2268884-2-david@redhat.com
Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mm.h | 7 ++++---
mm/khugepaged.c | 21 +++++++++++++++------
mm/pgtable-generic.c | 4 ++--
3 files changed, 21 insertions(+), 11 deletions(-)
--- a/include/linux/mm.h~mm-let-pte_lockptr-consume-a-pte_t-pointer
+++ a/include/linux/mm.h
@@ -2915,9 +2915,10 @@ static inline spinlock_t *ptlock_ptr(str
}
#endif /* ALLOC_SPLIT_PTLOCKS */
-static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
+static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pte_t *pte)
{
- return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
+ /* PTE page tables don't currently exceed a single page. */
+ return ptlock_ptr(virt_to_ptdesc(pte));
}
static inline bool ptlock_init(struct ptdesc *ptdesc)
@@ -2940,7 +2941,7 @@ static inline bool ptlock_init(struct pt
/*
* We use mm->page_table_lock to guard all pagetable pages of the mm.
*/
-static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
+static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pte_t *pte)
{
return &mm->page_table_lock;
}
--- a/mm/khugepaged.c~mm-let-pte_lockptr-consume-a-pte_t-pointer
+++ a/mm/khugepaged.c
@@ -1697,12 +1697,13 @@ static void retract_page_tables(struct a
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
struct mmu_notifier_range range;
+ bool retracted = false;
struct mm_struct *mm;
unsigned long addr;
pmd_t *pmd, pgt_pmd;
spinlock_t *pml;
spinlock_t *ptl;
- bool skipped_uffd = false;
+ pte_t *pte;
/*
* Check vma->anon_vma to exclude MAP_PRIVATE mappings that
@@ -1739,9 +1740,17 @@ static void retract_page_tables(struct a
mmu_notifier_invalidate_range_start(&range);
pml = pmd_lock(mm, pmd);
- ptl = pte_lockptr(mm, pmd);
+
+ /*
+ * No need to check the PTE table content, but we'll grab the
+ * PTE table lock while we zap it.
+ */
+ pte = pte_offset_map_nolock(mm, pmd, addr, &ptl);
+ if (!pte)
+ goto unlock_pmd;
if (ptl != pml)
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+ pte_unmap(pte);
/*
* Huge page lock is still held, so normally the page table
@@ -1752,20 +1761,20 @@ static void retract_page_tables(struct a
* repeating the anon_vma check protects from one category,
* and repeating the userfaultfd_wp() check from another.
*/
- if (unlikely(vma->anon_vma || userfaultfd_wp(vma))) {
- skipped_uffd = true;
- } else {
+ if (likely(!vma->anon_vma && !userfaultfd_wp(vma))) {
pgt_pmd = pmdp_collapse_flush(vma, addr, pmd);
pmdp_get_lockless_sync();
+ retracted = true;
}
if (ptl != pml)
spin_unlock(ptl);
+unlock_pmd:
spin_unlock(pml);
mmu_notifier_invalidate_range_end(&range);
- if (!skipped_uffd) {
+ if (retracted) {
mm_dec_nr_ptes(mm);
page_table_check_pte_clear_range(mm, addr, pgt_pmd);
pte_free_defer(mm, pmd_pgtable(pgt_pmd));
--- a/mm/pgtable-generic.c~mm-let-pte_lockptr-consume-a-pte_t-pointer
+++ a/mm/pgtable-generic.c
@@ -313,7 +313,7 @@ pte_t *pte_offset_map_nolock(struct mm_s
pte = __pte_offset_map(pmd, addr, &pmdval);
if (likely(pte))
- *ptlp = pte_lockptr(mm, &pmdval);
+ *ptlp = pte_lockptr(mm, pte);
return pte;
}
@@ -371,7 +371,7 @@ again:
pte = __pte_offset_map(pmd, addr, &pmdval);
if (unlikely(!pte))
return pte;
- ptl = pte_lockptr(mm, &pmdval);
+ ptl = pte_lockptr(mm, pte);
spin_lock(ptl);
if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
*ptlp = ptl;
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-let-pte_lockptr-consume-a-pte_t-pointer-fix.patch
mm-hugetlb-fix-hugetlb-vs-core-mm-pt-locking.patch
mm-turn-use_split_pte_ptlocks-use_split_pte_ptlocks-into-kconfig-options.patch
mm-hugetlb-enforce-that-pmd-pt-sharing-has-split-pmd-pt-locks.patch
powerpc-8xx-document-and-enforce-that-split-pt-locks-are-not-used.patch
mm-simplify-arch_make_folio_accessible.patch
mm-gup-convert-to-arch_make_folio_accessible.patch
s390-uv-drop-arch_make_page_accessible.patch
Commit 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period elapse event
in process context") removed the process context workqueue from
amdtp_domain_stream_pcm_pointer() and update_pcm_pointers() to remove
its overhead.
With RME Fireface 800, this lead to a regression since
Kernels 5.14.0, causing an AB/BA deadlock competition for the
substream lock with eventual system freeze under ALSA operation:
thread 0:
* (lock A) acquire substream lock by
snd_pcm_stream_lock_irq() in
snd_pcm_status64()
* (lock B) wait for tasklet to finish by calling
tasklet_unlock_spin_wait() in
tasklet_disable_in_atomic() in
ohci_flush_iso_completions() of ohci.c
thread 1:
* (lock B) enter tasklet
* (lock A) attempt to acquire substream lock,
waiting for it to be released:
snd_pcm_stream_lock_irqsave() in
snd_pcm_period_elapsed() in
update_pcm_pointers() in
process_ctx_payloads() in
process_rx_packets() of amdtp-stream.c
? tasklet_unlock_spin_wait
</NMI>
<TASK>
ohci_flush_iso_completions firewire_ohci
amdtp_domain_stream_pcm_pointer snd_firewire_lib
snd_pcm_update_hw_ptr0 snd_pcm
snd_pcm_status64 snd_pcm
? native_queued_spin_lock_slowpath
</NMI>
<IRQ>
_raw_spin_lock_irqsave
snd_pcm_period_elapsed snd_pcm
process_rx_packets snd_firewire_lib
irq_target_callback snd_firewire_lib
handle_it_packet firewire_ohci
context_tasklet firewire_ohci
Restore the process context work queue to prevent deadlock
AB/BA deadlock competition for ALSA substream lock of
snd_pcm_stream_lock_irq() in snd_pcm_status64()
and snd_pcm_stream_lock_irqsave() in snd_pcm_period_elapsed().
revert commit 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period
elapse event in process context")
Replace inline description to prevent future deadlock.
Cc: stable(a)vger.kernel.org
Fixes: 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period elapse event in process context")
Reported-by: edmund.raile <edmund.raile(a)proton.me>
Closes: https://lore.kernel.org/r/kwryofzdmjvzkuw6j3clftsxmoolynljztxqwg76hzeo4simn…
Signed-off-by: Edmund Raile <edmund.raile(a)protonmail.com>
---
sound/firewire/amdtp-stream.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 31201d506a21..7438999e0510 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -615,16 +615,8 @@ static void update_pcm_pointers(struct amdtp_stream *s,
// The program in user process should periodically check the status of intermediate
// buffer associated to PCM substream to process PCM frames in the buffer, instead
// of receiving notification of period elapsed by poll wait.
- if (!pcm->runtime->no_period_wakeup) {
- if (in_softirq()) {
- // In software IRQ context for 1394 OHCI.
- snd_pcm_period_elapsed(pcm);
- } else {
- // In process context of ALSA PCM application under acquired lock of
- // PCM substream.
- snd_pcm_period_elapsed_under_stream_lock(pcm);
- }
- }
+ if (!pcm->runtime->no_period_wakeup)
+ queue_work(system_highpri_wq, &s->period_work);
}
}
@@ -1864,11 +1856,14 @@ unsigned long amdtp_domain_stream_pcm_pointer(struct amdtp_domain *d,
{
struct amdtp_stream *irq_target = d->irq_target;
- // Process isochronous packets queued till recent isochronous cycle to handle PCM frames.
if (irq_target && amdtp_stream_running(irq_target)) {
- // In software IRQ context, the call causes dead-lock to disable the tasklet
- // synchronously.
- if (!in_softirq())
+ // use wq to prevent AB/BA deadlock competition for
+ // substream lock:
+ // fw_iso_context_flush_completions() acquires
+ // lock by ohci_flush_iso_completions(),
+ // amdtp-stream process_rx_packets() attempts to
+ // acquire same lock by snd_pcm_elapsed()
+ if (current_work() != &s->period_work)
fw_iso_context_flush_completions(irq_target->context);
}
--
2.45.2
This piece was missing in commit ae678317b95e ("netfs: Remove
deprecated use of PG_private_2 as a second writeback flag").
There is one remaining use of PG_private_2: the function
__fscache_clear_page_bits(), whose only purpose is to clear
PG_private_2. This is done via folio_end_private_2() which also
releases the folio reference which was supposed to be taken by
folio_start_private_2() (via ceph_set_page_fscache()).
__fscache_clear_page_bits() is called by __fscache_write_to_cache(),
but only if the parameter using_pgpriv2 is true; the only caller of
that function is ceph_fscache_write_to_cache() which still passes
true.
By calling folio_end_private_2() without folio_start_private_2(), the
folio refcounter breaks and causes trouble like RCU stalls and general
protection faults.
Cc: stable(a)vger.kernel.org
Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
Link: https://lore.kernel.org/ceph-devel/CAKPOu+_DA8XiMAA2ApMj7Pyshve_YWknw8Hdt1=…
Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com>
---
fs/ceph/addr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c16bc5250ef..aacea3e8fd6d 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -512,7 +512,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
- ceph_fscache_write_terminated, inode, true, caching);
+ ceph_fscache_write_terminated, inode, false, caching);
}
#else
static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 943ad0b62e3c21f324c4884caa6cb4a871bca05c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072940-parish-shirt-3e49@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
943ad0b62e3c ("kernel: rerun task_work while freezing in get_signal()")
f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
9963e444f71e ("sched: Widen TAKS_state literals")
f9fc8cad9728 ("sched: Add TASK_ANY for wait_task_inactive()")
9204a97f7ae8 ("sched: Change wait_task_inactive()s match_state")
1fbcaa923ce2 ("freezer,umh: Clean up freezer/initrd interaction")
5950e5d574c6 ("freezer: Have {,un}lock_system_sleep() save/restore flags")
0b9d46fc5ef7 ("sched: Rename task_running() to task_on_cpu()")
8386c414e27c ("PM: hibernate: defer device probing when resuming from hibernation")
57b6de08b5f6 ("ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs")
7b0fe1367ef2 ("ptrace: Document that wait_task_inactive can't fail")
1930a6e739c4 ("Merge tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 943ad0b62e3c21f324c4884caa6cb4a871bca05c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 10 Jul 2024 18:58:18 +0100
Subject: [PATCH] kernel: rerun task_work while freezing in get_signal()
io_uring can asynchronously add a task_work while the task is getting
freezed. TIF_NOTIFY_SIGNAL will prevent the task from sleeping in
do_freezer_trap(), and since the get_signal()'s relock loop doesn't
retry task_work, the task will spin there not being able to sleep
until the freezing is cancelled / the task is killed / etc.
Run task_works in the freezer path. Keep the patch small and simple
so it can be easily back ported, but we might need to do some cleaning
after and look if there are other places with similar problems.
Cc: stable(a)vger.kernel.org
Link: https://github.com/systemd/systemd/issues/33626
Fixes: 12db8b690010c ("entry: Add support for TIF_NOTIFY_SIGNAL")
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Acked-by: Oleg Nesterov <oleg(a)redhat.com>
Acked-by: Tejun Heo <tj(a)kernel.org>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/89ed3a52933370deaaf61a0a620a6ac91f1e754d.17206341…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/kernel/signal.c b/kernel/signal.c
index 1f9dd41c04be..60c737e423a1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2600,6 +2600,14 @@ static void do_freezer_trap(void)
spin_unlock_irq(¤t->sighand->siglock);
cgroup_enter_frozen();
schedule();
+
+ /*
+ * We could've been woken by task_work, run it to clear
+ * TIF_NOTIFY_SIGNAL. The caller will retry if necessary.
+ */
+ clear_notify_signal();
+ if (unlikely(task_work_pending(current)))
+ task_work_run();
}
static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0453aad676ff99787124b9b3af4a5f59fbe808e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072924-robin-manger-e92b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
0453aad676ff ("io_uring/io-wq: limit retrying worker initialisation")
8a565304927f ("io_uring/io-wq: Use set_bit() and test_bit() at worker->flags")
eb47943f2238 ("io-wq: Drop struct io_wqe")
dfd63baf892c ("io-wq: Move wq accounting to io_wq")
da64d6db3bd3 ("io_uring: One wqe per wq")
01e68ce08a30 ("io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers")
996d3efeb091 ("io-wq: Fix memory leak in worker creation")
024f15e033a5 ("io_uring: dedup io_run_task_work")
a6b21fbb4ce3 ("io_uring: move list helpers to a separate file")
ab1c84d855cf ("io_uring: make io_uring_types.h public")
48c13d898084 ("io_uring: explain io_wq_work::cancel_seq placement")
e418bbc97bff ("io_uring: move our reference counting into a header")
cd40cae29ef8 ("io_uring: split out open/close operations")
453b329be5ea ("io_uring: separate out file table handling code")
f4c163dd7d4b ("io_uring: split out fadvise/madvise operations")
0d5847274037 ("io_uring: split out fs related sync/fallocate functions")
531113bbd5bf ("io_uring: split out splice related operations")
11aeb71406dd ("io_uring: split out filesystem related operations")
e28683bdfc2f ("io_uring: move nop into its own file")
5e2a18d93fec ("io_uring: move xattr related opcodes to its own file")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0453aad676ff99787124b9b3af4a5f59fbe808e2 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 10 Jul 2024 18:58:17 +0100
Subject: [PATCH] io_uring/io-wq: limit retrying worker initialisation
If io-wq worker creation fails, we retry it by queueing up a task_work.
tasK_work is needed because it should be done from the user process
context. The problem is that retries are not limited, and if queueing a
task_work is the reason for the failure, we might get into an infinite
loop.
It doesn't seem to happen now but it would with the following patch
executing task_work in the freezer's loop. For now, arbitrarily limit the
number of attempts to create a worker.
Cc: stable(a)vger.kernel.org
Fixes: 3146cba99aa28 ("io-wq: make worker creation resilient against signals")
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/8280436925db88448c7c85c6656edee1a43029ea.17206341…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 913c92249522..f1e7c670add8 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -23,6 +23,7 @@
#include "io_uring.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ)
+#define WORKER_INIT_LIMIT 3
enum {
IO_WORKER_F_UP = 0, /* up and active */
@@ -58,6 +59,7 @@ struct io_worker {
unsigned long create_state;
struct callback_head create_work;
+ int init_retries;
union {
struct rcu_head rcu;
@@ -745,7 +747,7 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
return true;
}
-static inline bool io_should_retry_thread(long err)
+static inline bool io_should_retry_thread(struct io_worker *worker, long err)
{
/*
* Prevent perpetual task_work retry, if the task (or its group) is
@@ -753,6 +755,8 @@ static inline bool io_should_retry_thread(long err)
*/
if (fatal_signal_pending(current))
return false;
+ if (worker->init_retries++ >= WORKER_INIT_LIMIT)
+ return false;
switch (err) {
case -EAGAIN:
@@ -779,7 +783,7 @@ static void create_worker_cont(struct callback_head *cb)
io_init_new_worker(wq, worker, tsk);
io_worker_release(worker);
return;
- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
struct io_wq_acct *acct = io_wq_get_acct(worker);
atomic_dec(&acct->nr_running);
@@ -846,7 +850,7 @@ static bool create_io_worker(struct io_wq *wq, int index)
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
io_init_new_worker(wq, worker, tsk);
- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
kfree(worker);
goto fail;
} else {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0453aad676ff99787124b9b3af4a5f59fbe808e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072923-bodacious-claw-442b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
0453aad676ff ("io_uring/io-wq: limit retrying worker initialisation")
8a565304927f ("io_uring/io-wq: Use set_bit() and test_bit() at worker->flags")
eb47943f2238 ("io-wq: Drop struct io_wqe")
dfd63baf892c ("io-wq: Move wq accounting to io_wq")
da64d6db3bd3 ("io_uring: One wqe per wq")
01e68ce08a30 ("io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0453aad676ff99787124b9b3af4a5f59fbe808e2 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 10 Jul 2024 18:58:17 +0100
Subject: [PATCH] io_uring/io-wq: limit retrying worker initialisation
If io-wq worker creation fails, we retry it by queueing up a task_work.
tasK_work is needed because it should be done from the user process
context. The problem is that retries are not limited, and if queueing a
task_work is the reason for the failure, we might get into an infinite
loop.
It doesn't seem to happen now but it would with the following patch
executing task_work in the freezer's loop. For now, arbitrarily limit the
number of attempts to create a worker.
Cc: stable(a)vger.kernel.org
Fixes: 3146cba99aa28 ("io-wq: make worker creation resilient against signals")
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/8280436925db88448c7c85c6656edee1a43029ea.17206341…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 913c92249522..f1e7c670add8 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -23,6 +23,7 @@
#include "io_uring.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ)
+#define WORKER_INIT_LIMIT 3
enum {
IO_WORKER_F_UP = 0, /* up and active */
@@ -58,6 +59,7 @@ struct io_worker {
unsigned long create_state;
struct callback_head create_work;
+ int init_retries;
union {
struct rcu_head rcu;
@@ -745,7 +747,7 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
return true;
}
-static inline bool io_should_retry_thread(long err)
+static inline bool io_should_retry_thread(struct io_worker *worker, long err)
{
/*
* Prevent perpetual task_work retry, if the task (or its group) is
@@ -753,6 +755,8 @@ static inline bool io_should_retry_thread(long err)
*/
if (fatal_signal_pending(current))
return false;
+ if (worker->init_retries++ >= WORKER_INIT_LIMIT)
+ return false;
switch (err) {
case -EAGAIN:
@@ -779,7 +783,7 @@ static void create_worker_cont(struct callback_head *cb)
io_init_new_worker(wq, worker, tsk);
io_worker_release(worker);
return;
- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
struct io_wq_acct *acct = io_wq_get_acct(worker);
atomic_dec(&acct->nr_running);
@@ -846,7 +850,7 @@ static bool create_io_worker(struct io_wq *wq, int index)
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
io_init_new_worker(wq, worker, tsk);
- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
kfree(worker);
goto fail;
} else {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 30d77b7eef019fa4422980806e8b7cdc8674493e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072912-during-vitalize-fe0c@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
30d77b7eef01 ("mm/mglru: fix ineffective protection calculation")
3f74e6bd3b84 ("mm/mglru: fix overshooting shrinker memory")
4acef5694e01 ("mm/mglru: improve swappiness handling")
745b13e647cd ("mm/mglru: remove CONFIG_MEMCG")
4376807bf2d5 ("mm/mglru: reclaim offlined memcgs harder")
8aa420617918 ("mm/mglru: respect min_ttl_ms with memcgs")
5095a2b23987 ("mm/mglru: try to stop at high watermarks")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 30d77b7eef019fa4422980806e8b7cdc8674493e Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao(a)google.com>
Date: Fri, 12 Jul 2024 17:29:56 -0600
Subject: [PATCH] mm/mglru: fix ineffective protection calculation
mem_cgroup_calculate_protection() is not stateless and should only be used
as part of a top-down tree traversal. shrink_one() traverses the per-node
memcg LRU instead of the root_mem_cgroup tree, and therefore it should not
call mem_cgroup_calculate_protection().
The existing misuse in shrink_one() can cause ineffective protection of
sub-trees that are grandchildren of root_mem_cgroup. Fix it by reusing
lru_gen_age_node(), which already traverses the root_mem_cgroup tree, to
calculate the protection.
Previously lru_gen_age_node() opportunistically skips the first pass,
i.e., when scan_control->priority is DEF_PRIORITY. On the second pass,
lruvec_is_sizable() uses appropriate scan_control->priority, set by
set_initial_priority() from lru_gen_shrink_node(), to decide whether a
memcg is too small to reclaim from.
Now lru_gen_age_node() unconditionally traverses the root_mem_cgroup tree.
So it should call set_initial_priority() upfront, to make sure
lruvec_is_sizable() uses appropriate scan_control->priority on the first
pass. Otherwise, lruvec_is_reclaimable() can return false negatives and
result in premature OOM kills when min_ttl_ms is used.
Link: https://lkml.kernel.org/r/20240712232956.1427127-1-yuzhao@google.com
Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
Signed-off-by: Yu Zhao <yuzhao(a)google.com>
Reported-by: T.J. Mercier <tjmercier(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6216d79edb7f..525d3ffa8451 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3915,6 +3915,32 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
* working set protection
******************************************************************************/
+static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
+{
+ int priority;
+ unsigned long reclaimable;
+
+ if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
+ return;
+ /*
+ * Determine the initial priority based on
+ * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
+ * where reclaimed_to_scanned_ratio = inactive / total.
+ */
+ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
+ if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
+ reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
+
+ /* round down reclaimable and round up sc->nr_to_reclaim */
+ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
+
+ /*
+ * The estimation is based on LRU pages only, so cap it to prevent
+ * overshoots of shrinker objects by large margins.
+ */
+ sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY);
+}
+
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
{
int gen, type, zone;
@@ -3948,19 +3974,17 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
- /* see the comment on lru_gen_folio */
- gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
- birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
-
- if (time_is_after_jiffies(birth + min_ttl))
+ if (mem_cgroup_below_min(NULL, memcg))
return false;
if (!lruvec_is_sizable(lruvec, sc))
return false;
- mem_cgroup_calculate_protection(NULL, memcg);
+ /* see the comment on lru_gen_folio */
+ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
- return !mem_cgroup_below_min(NULL, memcg);
+ return time_is_before_jiffies(birth + min_ttl);
}
/* to protect the working set of the last N jiffies */
@@ -3970,23 +3994,20 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
{
struct mem_cgroup *memcg;
unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
+ bool reclaimable = !min_ttl;
VM_WARN_ON_ONCE(!current_is_kswapd());
- /* check the order to exclude compaction-induced reclaim */
- if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
- return;
+ set_initial_priority(pgdat, sc);
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
- if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
- mem_cgroup_iter_break(NULL, memcg);
- return;
- }
+ mem_cgroup_calculate_protection(NULL, memcg);
- cond_resched();
+ if (!reclaimable)
+ reclaimable = lruvec_is_reclaimable(lruvec, sc, min_ttl);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
/*
@@ -3994,7 +4015,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
* younger than min_ttl. However, another possibility is all memcgs are
* either too small or below min.
*/
- if (mutex_trylock(&oom_lock)) {
+ if (!reclaimable && mutex_trylock(&oom_lock)) {
struct oom_control oc = {
.gfp_mask = sc->gfp_mask,
};
@@ -4786,8 +4807,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- mem_cgroup_calculate_protection(NULL, memcg);
-
+ /* lru_gen_age_node() called mem_cgroup_calculate_protection() */
if (mem_cgroup_below_min(NULL, memcg))
return MEMCG_LRU_YOUNG;
@@ -4911,32 +4931,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
blk_finish_plug(&plug);
}
-static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
-{
- int priority;
- unsigned long reclaimable;
-
- if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
- return;
- /*
- * Determine the initial priority based on
- * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
- * where reclaimed_to_scanned_ratio = inactive / total.
- */
- reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
- if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
- reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
-
- /* round down reclaimable and round up sc->nr_to_reclaim */
- priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
-
- /*
- * The estimation is based on LRU pages only, so cap it to prevent
- * overshoots of shrinker objects by large margins.
- */
- sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY);
-}
-
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
{
struct blk_plug plug;
commit 4811f7af6090e8f5a398fbdd766f903ef6c0d787 upstream.
Syzbot reported that a buffer state inconsistency was detected in
nilfs_btnode_create_block(), triggering a kernel bug.
It is not appropriate to treat this inconsistency as a bug; it can occur
if the argument block address (the buffer index of the newly created
block) is a virtual block number and has been reallocated due to
corruption of the bitmap used to manage its allocation state.
So, modify nilfs_btnode_create_block() and its callers to treat it as a
possible filesystem error, rather than triggering a kernel bug.
Link: https://lkml.kernel.org/r/20240725052007.4562-1-konishi.ryusuke@gmail.com
Fixes: a60be987d45d ("nilfs2: B-tree node cache")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+89cc4f2324ed37988b60(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=89cc4f2324ed37988b60
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Please apply this patch to the stable trees indicated by the subject
prefix instead of the failed patches or the one I asked you to drop.
This patch is tailored to take page/folio conversion into account and
can be applied from v4.11 to v6.7.
Also, all the builds and tests I did on each stable tree passed.
Thanks,
Ryusuke Konishi
fs/nilfs2/btnode.c | 25 ++++++++++++++++++++-----
fs/nilfs2/btree.c | 4 ++--
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 5710833ac1cc..8fe348bceabe 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -51,12 +51,21 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
if (unlikely(!bh))
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
buffer_dirty(bh))) {
- brelse(bh);
- BUG();
+ /*
+ * The block buffer at the specified new address was already
+ * in use. This can happen if it is a virtual block number
+ * and has been reallocated due to corruption of the bitmap
+ * used to manage its allocation state (if not, the buffer
+ * clearing of an abandoned b-tree node is missing somewhere).
+ */
+ nilfs_error(inode->i_sb,
+ "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)",
+ (unsigned long long)blocknr, inode->i_ino);
+ goto failed;
}
memset(bh->b_data, 0, i_blocksize(inode));
bh->b_bdev = inode->i_sb->s_bdev;
@@ -67,6 +76,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
unlock_page(bh->b_page);
put_page(bh->b_page);
return bh;
+
+failed:
+ unlock_page(bh->b_page);
+ put_page(bh->b_page);
+ brelse(bh);
+ return ERR_PTR(-EIO);
}
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
@@ -217,8 +232,8 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
}
nbh = nilfs_btnode_create_block(btnc, newkey);
- if (!nbh)
- return -ENOMEM;
+ if (IS_ERR(nbh))
+ return PTR_ERR(nbh);
BUG_ON(nbh == obh);
ctxt->newbh = nbh;
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 65659fa0372e..598f05867059 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
struct buffer_head *bh;
bh = nilfs_btnode_create_block(btnc, ptr);
- if (!bh)
- return -ENOMEM;
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
set_buffer_nilfs_volatile(bh);
*bhp = bh;
--
2.43.5
Upstream commit aba3a8d01d62 ("usb: gadget: u_serial: add suspend
resume callbacks") added started_delayed flag, so that new ports
which are opened after USB suspend can start IO while resuming.
But if the port was already opened, and gadget suspend kicks in
afterwards, start_delayed will never be set. This causes resume
to bail out before calling gs_start_io(). Fix this by setting
start_delayed during suspend.
Fixes: aba3a8d01d62 ("usb: gadget: u_serial: add suspend resume callbacks")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Prashanth K <quic_prashk(a)quicinc.com>
---
drivers/usb/gadget/function/u_serial.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index eec7f7a2e40f..b394105e55d6 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1441,6 +1441,7 @@ void gserial_suspend(struct gserial *gser)
spin_lock(&port->port_lock);
spin_unlock(&serial_port_lock);
port->suspended = true;
+ port->start_delayed = true;
spin_unlock_irqrestore(&port->port_lock, flags);
}
EXPORT_SYMBOL_GPL(gserial_suspend);
--
2.25.1
From: Edward Adam Davis <eadavis(a)qq.com>
[ Upstream commit ce6dede912f064a855acf6f04a04cbb2c25b8c8c ]
[syzbot reported]
general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
CPU: 0 PID: 5061 Comm: syz-executor404 Not tainted 6.8.0-syzkaller-08951-gfe46a7dd189e #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
RIP: 0010:dtInsertEntry+0xd0c/0x1780 fs/jfs/jfs_dtree.c:3713
...
[Analyze]
In dtInsertEntry(), when the pointer h has the same value as p, after writing
name in UniStrncpy_to_le(), p->header.flag will be cleared. This will cause the
previously true judgment "p->header.flag & BT-LEAF" to change to no after writing
the name operation, this leads to entering an incorrect branch and accessing the
uninitialized object ih when judging this condition for the second time.
[Fix]
After got the page, check freelist first, if freelist == 0 then exit dtInsert()
and return -EINVAL.
Reported-by: syzbot+bba84aef3a26fb93deb9(a)syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis(a)qq.com>
Signed-off-by: Dave Kleikamp <dave.kleikamp(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/jfs/jfs_dtree.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index ea2c8f0fe832c..0c16f7f8eaa2b 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -847,6 +847,8 @@ int dtInsert(tid_t tid, struct inode *ip,
* the full page.
*/
DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
+ if (p->header.freelist == 0)
+ return -EINVAL;
/*
* insert entry for new key
--
2.43.0
From: Edward Adam Davis <eadavis(a)qq.com>
[ Upstream commit ce6dede912f064a855acf6f04a04cbb2c25b8c8c ]
[syzbot reported]
general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
CPU: 0 PID: 5061 Comm: syz-executor404 Not tainted 6.8.0-syzkaller-08951-gfe46a7dd189e #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
RIP: 0010:dtInsertEntry+0xd0c/0x1780 fs/jfs/jfs_dtree.c:3713
...
[Analyze]
In dtInsertEntry(), when the pointer h has the same value as p, after writing
name in UniStrncpy_to_le(), p->header.flag will be cleared. This will cause the
previously true judgment "p->header.flag & BT-LEAF" to change to no after writing
the name operation, this leads to entering an incorrect branch and accessing the
uninitialized object ih when judging this condition for the second time.
[Fix]
After got the page, check freelist first, if freelist == 0 then exit dtInsert()
and return -EINVAL.
Reported-by: syzbot+bba84aef3a26fb93deb9(a)syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis(a)qq.com>
Signed-off-by: Dave Kleikamp <dave.kleikamp(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/jfs/jfs_dtree.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 077a87e530205..3bcfb37a9c1f6 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -834,6 +834,8 @@ int dtInsert(tid_t tid, struct inode *ip,
* the full page.
*/
DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
+ if (p->header.freelist == 0)
+ return -EINVAL;
/*
* insert entry for new key
--
2.43.0
From: Christoph Hellwig <hch(a)lst.de>
[ Upstream commit bf4c89fc8797f5c0964a0c3d561fbe7e8483b62f ]
Commit b222dd2fdd53 ("block: call bio_uninit in bio_endio") added a call
to bio_uninit in bio_endio to work around callers that use bio_init but
fail to call bio_uninit after they are done to release the resources.
While this is an abuse of the bio_init API we still have quite a few of
those left. But this early uninit causes a problem for integrity data,
as at least some users need the bio_integrity_payload. Right now the
only one is the NVMe passthrough which archives this by adding a special
case to skip the freeing if the BIP_INTEGRITY_USER flag is set.
Sort this out by only putting bi_blkg in bio_endio as that is the cause
of the actual leaks - the few users of the crypto context and integrity
data all properly call bio_uninit, usually through bio_put for
dynamically allocated bios.
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Link: https://lore.kernel.org/r/20240702151047.1746127-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
block/bio.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index e9e809a63c597..c7a4bc05c43e7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1630,8 +1630,18 @@ void bio_endio(struct bio *bio)
goto again;
}
- /* release cgroup info */
- bio_uninit(bio);
+#ifdef CONFIG_BLK_CGROUP
+ /*
+ * Release cgroup info. We shouldn't have to do this here, but quite
+ * a few callers of bio_init fail to call bio_uninit, so we cover up
+ * for that here at least for now.
+ */
+ if (bio->bi_blkg) {
+ blkg_put(bio->bi_blkg);
+ bio->bi_blkg = NULL;
+ }
+#endif
+
if (bio->bi_end_io)
bio->bi_end_io(bio);
}
--
2.43.0
On the off chance that clock value ends up being too high (by means
of skl_ddi_calculate_wrpll() having benn called with big enough
value of crtc_state->port_clock * 1000), one possible consequence
may be that the result will not be able to fit into signed int.
Fix this issue by moving conversion of clock parameter from kHz to Hz
into the body of skl_ddi_calculate_wrpll(), as well as casting the
same parameter to u64 type while calculating the value for AFE clock.
This both mitigates the overflow problem and avoids possible erroneous
integer promotion mishaps.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: fe70b262e781 ("drm/i915: Move a bunch of stuff into rodata from the stack")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
v2: instead of double casting of 'clock' with (u64)(u32), convert
'clock' to Hz inside skl_ddi_calculate_wrpll() and cast it only
to u64 to mitigate the issue. Per Jani's <jani.nikula(a)linux.intel.com>
helpful suggestion made here:
https://lore.kernel.org/all/87ed7gzhin.fsf@intel.com/
Also, change commit description accordingly.
v1: https://lore.kernel.org/all/20240724184911.12250-1-n.zhandarovich@fintech.r…
drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 90998b037349..292d163036b1 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
}
static int
-skl_ddi_calculate_wrpll(int clock /* in Hz */,
+skl_ddi_calculate_wrpll(int clock,
int ref_clock,
struct skl_wrpll_params *wrpll_params)
{
@@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */,
};
unsigned int dco, d, i;
unsigned int p0, p1, p2;
- u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */
+ u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */
for (d = 0; d < ARRAY_SIZE(dividers); d++) {
for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
@@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state)
struct skl_wrpll_params wrpll_params = {};
int ret;
- ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000,
+ ret = skl_ddi_calculate_wrpll(crtc_state->port_clock,
i915->display.dpll.ref_clks.nssc, &wrpll_params);
if (ret)
return ret;
commit 11a1f4bc47362700fcbde717292158873fb847ed upstream.
Keith reports a use-after-free when a DPC event occurs concurrently to
hot-removal of the same portion of the hierarchy:
The dpc_handler() awaits readiness of the secondary bus below the
Downstream Port where the DPC event occurred. To do so, it polls the
config space of the first child device on the secondary bus. If that
child device is concurrently removed, accesses to its struct pci_dev
cause the kernel to oops.
That's because pci_bridge_wait_for_secondary_bus() neglects to hold a
reference on the child device. Before v6.3, the function was only
called on resume from system sleep or on runtime resume. Holding a
reference wasn't necessary back then because the pciehp IRQ thread
could never run concurrently. (On resume from system sleep, IRQs are
not enabled until after the resume_noirq phase. And runtime resume is
always awaited before a PCI device is removed.)
However starting with v6.3, pci_bridge_wait_for_secondary_bus() is also
called on a DPC event. Commit 53b54ad074de ("PCI/DPC: Await readiness
of secondary bus after reset"), which introduced that, failed to
appreciate that pci_bridge_wait_for_secondary_bus() now needs to hold a
reference on the child device because dpc_handler() and pciehp may
indeed run concurrently. The commit was backported to v5.10+ stable
kernels, so that's the oldest one affected.
Add the missing reference acquisition.
Abridged stack trace:
BUG: unable to handle page fault for address: 00000000091400c0
CPU: 15 PID: 2464 Comm: irq/53-pcie-dpc 6.9.0
RIP: pci_bus_read_config_dword+0x17/0x50
pci_dev_wait()
pci_bridge_wait_for_secondary_bus()
dpc_reset_link()
pcie_do_recovery()
dpc_handler()
Fixes: 53b54ad074de ("PCI/DPC: Await readiness of secondary bus after reset")
Closes: https://lore.kernel.org/r/20240612181625.3604512-3-kbusch@meta.com/
Link: https://lore.kernel.org/linux-pci/8e4bcd4116fd94f592f2bf2749f168099c480ddf.…
Reported-by: Keith Busch <kbusch(a)kernel.org>
Tested-by: Keith Busch <kbusch(a)kernel.org>
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Signed-off-by: Krzysztof Wilczyński <kwilczynski(a)kernel.org>
Reviewed-by: Keith Busch <kbusch(a)kernel.org>
Reviewed-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v5.10+
---
drivers/pci/pci.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 67216f4ea215..a88909f2ae65 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4916,7 +4916,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
int timeout)
{
struct pci_dev *child;
- int delay;
+ int delay, ret = 0;
if (pci_dev_is_disconnected(dev))
return 0;
@@ -4944,8 +4944,8 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
return 0;
}
- child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
- bus_list);
+ child = pci_dev_get(list_first_entry(&dev->subordinate->devices,
+ struct pci_dev, bus_list));
up_read(&pci_bus_sem);
/*
@@ -4955,7 +4955,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
if (!pci_is_pcie(dev)) {
pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
msleep(1000 + delay);
- return 0;
+ goto put_child;
}
/*
@@ -4976,7 +4976,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
* until the timeout expires.
*/
if (!pcie_downstream_port(dev))
- return 0;
+ goto put_child;
if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
@@ -4987,11 +4987,16 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
if (!pcie_wait_for_link_delay(dev, true, delay)) {
/* Did not train, no need to wait any further */
pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
- return -ENOTTY;
+ ret = -ENOTTY;
+ goto put_child;
}
}
- return pci_dev_wait(child, reset_type, timeout - delay);
+ ret = pci_dev_wait(child, reset_type, timeout - delay);
+
+put_child:
+ pci_dev_put(child);
+ return ret;
}
void pci_reset_secondary_bus(struct pci_dev *dev)
--
2.43.0
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073021-strut-specimen-8aad@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x c3281abea67c9c0dc6219bbc41d1feae05a16da3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073024-ruined-frightful-2dc2@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
c3281abea67c ("remoteproc: stm32_rproc: Fix mailbox interrupts queuing")
35bdafda40cc ("remoteproc: stm32_rproc: Add mutex protection for workqueue")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c3281abea67c9c0dc6219bbc41d1feae05a16da3 Mon Sep 17 00:00:00 2001
From: Gwenael Treuveur <gwenael.treuveur(a)foss.st.com>
Date: Tue, 21 May 2024 18:23:16 +0200
Subject: [PATCH] remoteproc: stm32_rproc: Fix mailbox interrupts queuing
Manage interrupt coming from coprocessor also when state is
ATTACHED.
Fixes: 35bdafda40cc ("remoteproc: stm32_rproc: Add mutex protection for workqueue")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gwenael Treuveur <gwenael.treuveur(a)foss.st.com>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen(a)foss.st.com>
Link: https://lore.kernel.org/r/20240521162316.156259-1-gwenael.treuveur@foss.st.…
Signed-off-by: Mathieu Poirier <mathieu.poirier(a)linaro.org>
diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c
index 88623df7d0c3..8c7f7950b80e 100644
--- a/drivers/remoteproc/stm32_rproc.c
+++ b/drivers/remoteproc/stm32_rproc.c
@@ -294,7 +294,7 @@ static void stm32_rproc_mb_vq_work(struct work_struct *work)
mutex_lock(&rproc->lock);
- if (rproc->state != RPROC_RUNNING)
+ if (rproc->state != RPROC_RUNNING && rproc->state != RPROC_ATTACHED)
goto unlock_mutex;
if (rproc_vq_interrupt(rproc, mb->vq_id) == IRQ_NONE)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f70fd92df7529e7283e02a6c3a2510075f13ba30
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073000-yelp-remnant-0e02@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
f70fd92df752 ("MIPS: dts: loongson: Fix ls2k1000-rtc interrupt")
dbb69b9d6234 ("MIPS: dts: loongson: Fix liointc IRQ polarity")
d89a415ff8d5 ("MIPS: Loongson64: DTS: Fix PCIe port nodes for ls7a")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f70fd92df7529e7283e02a6c3a2510075f13ba30 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Date: Fri, 14 Jun 2024 16:40:11 +0100
Subject: [PATCH] MIPS: dts: loongson: Fix ls2k1000-rtc interrupt
The correct interrupt line for RTC is line 8 on liointc1.
Fixes: e47084e116fc ("MIPS: Loongson64: DTS: Add RTC support to Loongson-2K1000")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de>
diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
index 3f5255584c30..c3a57a0befa7 100644
--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
+++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
@@ -92,8 +92,8 @@ liointc1: interrupt-controller@1fe11440 {
rtc0: rtc@1fe07800 {
compatible = "loongson,ls2k1000-rtc";
reg = <0 0x1fe07800 0 0x78>;
- interrupt-parent = <&liointc0>;
- interrupts = <60 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
};
uart0: serial@1fe00000 {
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x f70fd92df7529e7283e02a6c3a2510075f13ba30
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073000-directory-swimmable-5878@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
f70fd92df752 ("MIPS: dts: loongson: Fix ls2k1000-rtc interrupt")
dbb69b9d6234 ("MIPS: dts: loongson: Fix liointc IRQ polarity")
d89a415ff8d5 ("MIPS: Loongson64: DTS: Fix PCIe port nodes for ls7a")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f70fd92df7529e7283e02a6c3a2510075f13ba30 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Date: Fri, 14 Jun 2024 16:40:11 +0100
Subject: [PATCH] MIPS: dts: loongson: Fix ls2k1000-rtc interrupt
The correct interrupt line for RTC is line 8 on liointc1.
Fixes: e47084e116fc ("MIPS: Loongson64: DTS: Add RTC support to Loongson-2K1000")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de>
diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
index 3f5255584c30..c3a57a0befa7 100644
--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
+++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
@@ -92,8 +92,8 @@ liointc1: interrupt-controller@1fe11440 {
rtc0: rtc@1fe07800 {
compatible = "loongson,ls2k1000-rtc";
reg = <0 0x1fe07800 0 0x78>;
- interrupt-parent = <&liointc0>;
- interrupts = <60 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
};
uart0: serial@1fe00000 {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x dce68a49be26abf52712e0ee452a45fa01ab4624
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073049-brutishly-astride-ce55@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
dce68a49be26 ("remoteproc: imx_rproc: Fix refcount mistake in imx_rproc_addr_init")
61afafe8b938 ("remoteproc: imx_rproc: Fix refcount leak in imx_rproc_addr_init")
afe670e23af9 ("remoteproc: imx_rproc: Fix ignoring mapping vdev regions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dce68a49be26abf52712e0ee452a45fa01ab4624 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin(a)t-argos.ru>
Date: Wed, 12 Jun 2024 16:17:14 +0300
Subject: [PATCH] remoteproc: imx_rproc: Fix refcount mistake in
imx_rproc_addr_init
In imx_rproc_addr_init() strcmp() is performed over the node after the
of_node_put() is performed over it.
Fix this error by moving of_node_put() calls.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 5e4c1243071d ("remoteproc: imx_rproc: support remote cores booted before Linux Kernel")
Cc: stable(a)vger.kernel.org
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Link: https://lore.kernel.org/r/20240612131714.12907-1-amishin@t-argos.ru
Signed-off-by: Mathieu Poirier <mathieu.poirier(a)linaro.org>
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 39eacd90af14..144c8e9a642e 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -734,25 +734,29 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
continue;
}
err = of_address_to_resource(node, 0, &res);
- of_node_put(node);
if (err) {
dev_err(dev, "unable to resolve memory region\n");
+ of_node_put(node);
return err;
}
- if (b >= IMX_RPROC_MEM_MAX)
+ if (b >= IMX_RPROC_MEM_MAX) {
+ of_node_put(node);
break;
+ }
/* Not use resource version, because we might share region */
priv->mem[b].cpu_addr = devm_ioremap_wc(&pdev->dev, res.start, resource_size(&res));
if (!priv->mem[b].cpu_addr) {
dev_err(dev, "failed to remap %pr\n", &res);
+ of_node_put(node);
return -ENOMEM;
}
priv->mem[b].sys_addr = res.start;
priv->mem[b].size = resource_size(&res);
if (!strcmp(node->name, "rsc-table"))
priv->rsc_table = priv->mem[b].cpu_addr;
+ of_node_put(node);
b++;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 2fa26ca8b786888673689ccc9da6094150939982
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073036-capable-slander-fe44@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL")
afe670e23af9 ("remoteproc: imx_rproc: Fix ignoring mapping vdev regions")
8f2d8961640f ("remoteproc: imx_rproc: ignore mapping vdev regions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fa26ca8b786888673689ccc9da6094150939982 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin(a)t-argos.ru>
Date: Thu, 6 Jun 2024 10:52:04 +0300
Subject: [PATCH] remoteproc: imx_rproc: Skip over memory region when node
value is NULL
In imx_rproc_addr_init() "nph = of_count_phandle_with_args()" just counts
number of phandles. But phandles may be empty. So of_parse_phandle() in
the parsing loop (0 < a < nph) may return NULL which is later dereferenced.
Adjust this issue by adding NULL-return check.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver")
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Reviewed-by: Peng Fan <peng.fan(a)nxp.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240606075204.12354-1-amishin@t-argos.ru
[Fixed title to fit within the prescribed 70-75 charcters]
Signed-off-by: Mathieu Poirier <mathieu.poirier(a)linaro.org>
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 5a3fb902acc9..39eacd90af14 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -726,6 +726,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
struct resource res;
node = of_parse_phandle(np, "memory-region", a);
+ if (!node)
+ continue;
/* Not map vdevbuffer, vdevring region */
if (!strncmp(node->name, "vdev", strlen("vdev"))) {
of_node_put(node);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 2fa26ca8b786888673689ccc9da6094150939982
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073033-santa-unscrew-ac70@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL")
afe670e23af9 ("remoteproc: imx_rproc: Fix ignoring mapping vdev regions")
8f2d8961640f ("remoteproc: imx_rproc: ignore mapping vdev regions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fa26ca8b786888673689ccc9da6094150939982 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin(a)t-argos.ru>
Date: Thu, 6 Jun 2024 10:52:04 +0300
Subject: [PATCH] remoteproc: imx_rproc: Skip over memory region when node
value is NULL
In imx_rproc_addr_init() "nph = of_count_phandle_with_args()" just counts
number of phandles. But phandles may be empty. So of_parse_phandle() in
the parsing loop (0 < a < nph) may return NULL which is later dereferenced.
Adjust this issue by adding NULL-return check.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver")
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Reviewed-by: Peng Fan <peng.fan(a)nxp.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240606075204.12354-1-amishin@t-argos.ru
[Fixed title to fit within the prescribed 70-75 charcters]
Signed-off-by: Mathieu Poirier <mathieu.poirier(a)linaro.org>
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 5a3fb902acc9..39eacd90af14 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -726,6 +726,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
struct resource res;
node = of_parse_phandle(np, "memory-region", a);
+ if (!node)
+ continue;
/* Not map vdevbuffer, vdevring region */
if (!strncmp(node->name, "vdev", strlen("vdev"))) {
of_node_put(node);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 2fa26ca8b786888673689ccc9da6094150939982
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073032-attentive-tamale-4489@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL")
afe670e23af9 ("remoteproc: imx_rproc: Fix ignoring mapping vdev regions")
8f2d8961640f ("remoteproc: imx_rproc: ignore mapping vdev regions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fa26ca8b786888673689ccc9da6094150939982 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin(a)t-argos.ru>
Date: Thu, 6 Jun 2024 10:52:04 +0300
Subject: [PATCH] remoteproc: imx_rproc: Skip over memory region when node
value is NULL
In imx_rproc_addr_init() "nph = of_count_phandle_with_args()" just counts
number of phandles. But phandles may be empty. So of_parse_phandle() in
the parsing loop (0 < a < nph) may return NULL which is later dereferenced.
Adjust this issue by adding NULL-return check.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver")
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Reviewed-by: Peng Fan <peng.fan(a)nxp.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240606075204.12354-1-amishin@t-argos.ru
[Fixed title to fit within the prescribed 70-75 charcters]
Signed-off-by: Mathieu Poirier <mathieu.poirier(a)linaro.org>
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 5a3fb902acc9..39eacd90af14 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -726,6 +726,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
struct resource res;
node = of_parse_phandle(np, "memory-region", a);
+ if (!node)
+ continue;
/* Not map vdevbuffer, vdevring region */
if (!strncmp(node->name, "vdev", strlen("vdev"))) {
of_node_put(node);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073029-defog-revolt-64f0@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
ded080c86b3f ("rbd: don't move requests to the running list on errors")
637cd060537d ("rbd: new exclusive lock wait/wake code")
e1fddc8fdd22 ("rbd: quiescing lock should wait for image requests")
a2b1da09793d ("rbd: lock should be quiesced on reacquire")
0192ce2ee68b ("rbd: introduce image request state machine")
85b5e6d11898 ("rbd: move OSD request submission into object request state machines")
0ad5d953548f ("rbd: get rid of RBD_OBJ_WRITE_{FLAT,GUARD}")
a9b67e69949d ("rbd: replace obj_req->tried_parent with obj_req->read_state")
54ab3b24c536 ("rbd: get rid of obj_req->xferred, obj_req->result and img_req->xferred")
9b17eb2ce102 ("rbd: whole-object write and zeroout should copyup when snapshots exist")
89a59c1ca73b ("rbd: copyup with an empty snapshot context (aka deep-copyup)")
3a482501cf70 ("rbd: introduce rbd_obj_issue_copyup_ops()")
13488d53775b ("rbd: stop copying num_osd_ops in rbd_obj_issue_copyup()")
356889c49d84 ("rbd: clear ->xferred on error from rbd_obj_issue_copyup()")
0c93e1b7a26b ("rbd: round off and ignore discards that are too small")
6484cbe987e0 ("rbd: handle DISCARD and WRITE_ZEROES separately")
fd7e3f0d8f25 ("rbd: get rid of obj_req->obj_request_count")
26f887e0a3c4 ("libceph, rbd, ceph: move ceph_osdc_alloc_messages() calls")
39e58c3425b1 ("libceph: introduce alloc_watch_request()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073028-crepe-gently-80e3@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
ded080c86b3f ("rbd: don't move requests to the running list on errors")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073025-raffle-sadness-6e42@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
ded080c86b3f ("rbd: don't move requests to the running list on errors")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073024-usable-mullets-a00c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
ded080c86b3f ("rbd: don't move requests to the running list on errors")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073023-payback-dawdler-5c2b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
ded080c86b3f ("rbd: don't move requests to the running list on errors")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073022-profound-unsigned-6302@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
2237ceb71f89 ("rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings")
ded080c86b3f ("rbd: don't move requests to the running list on errors")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2237ceb71f89837ac47c5dce2aaa2c2b3a337a3c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Tue, 23 Jul 2024 18:07:59 +0200
Subject: [PATCH] rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive
mappings
Every time a watch is reestablished after getting lost, we need to
update the cookie which involves quiescing exclusive lock. For this,
we transition from RBD_LOCK_STATE_LOCKED to RBD_LOCK_STATE_QUIESCING
roughly for the duration of rbd_reacquire_lock() call. If the mapping
is exclusive and I/O happens to arrive in this time window, it's failed
with EROFS (later translated to EIO) based on the wrong assumption in
rbd_img_exclusive_lock() -- "lock got released?" check there stopped
making sense with commit a2b1da09793d ("rbd: lock should be quiesced on
reacquire").
To make it worse, any such I/O is added to the acquiring list before
EROFS is returned and this sets up for violating rbd_lock_del_request()
precondition that the request is either on the running list or not on
any list at all -- see commit ded080c86b3f ("rbd: don't move requests
to the running list on errors"). rbd_lock_del_request() ends up
processing these requests as if they were on the running list which
screws up quiescing_wait completion counter and ultimately leads to
rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
being triggered on the next watch error.
Cc: stable(a)vger.kernel.org # 06ef84c4e9c4: rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
Cc: stable(a)vger.kernel.org
Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c30d227753d7..ea6c592e015c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3457,6 +3457,7 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073001-kindling-liqueur-1bf1@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073000-cedar-nutrient-4b9a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073059-trodden-challenge-685c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073059-maker-presume-f696@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073058-suing-candied-447f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073057-gonad-crabgrass-4689@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x a503f91a3645651a39baf97f1aed90d5d9f9bda9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073057-pasture-amused-2fe9@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
a503f91a3645 ("mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks")
478211867460 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a503f91a3645651a39baf97f1aed90d5d9f9bda9 Mon Sep 17 00:00:00 2001
From: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Date: Fri, 5 Jul 2024 13:51:35 +0200
Subject: [PATCH] mtd: rawnand: lpx32xx: Fix dma_request_chan() error checks
The dma_request_chan() returns error pointer in case of error, while
dma_request_channel() returns NULL in case of error therefore different
error checks are needed for the two.
Fixes: 7326d3fb1ee3 ("mtd: rawnand: lpx32xx: Request DMA channels using DT entries")
Signed-off-by: Piotr Wojtaszczyk <piotr.wojtaszczyk(a)timesys.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240705115139.126522-1-piotr.wojtaszczyk…
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 92cebe871bb4..b9c3adc54c01 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -575,7 +575,7 @@ static int lpc32xx_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 3b7e3d259785..ade971e4cc3b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -722,7 +722,7 @@ static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host)
dma_cap_mask_t mask;
host->dma_chan = dma_request_chan(mtd->dev.parent, "rx-tx");
- if (!host->dma_chan) {
+ if (IS_ERR(host->dma_chan)) {
/* fallback to request using platform data */
if (!host->pdata || !host->pdata->dma_filter) {
dev_err(mtd->dev.parent, "no DMA platform data\n");
> If you're trying to backport something, I think you forgot to Cc stable
> and provide the proper upstream commit.
>
> As is this isn't something I can do anything with. The patch does not
> apply to any recent kernel and AFAICT this issue has long since been
> fixed.
When fixing this bug, I didn't pay much attention to upstream changes.
Upon reviewing the history of relevant commits, I found that they have
been merged and reverted multiple times:
```bash
git log -S 'cpumask_test_cpu(cpu, sched_domain_span(sd))' --oneline \
kernel/sched/fair.c
8aeaffef8c6e sched/fair: Take the scheduling domain into account in select_idle_smt()
3e6efe87cd5c sched/fair: Remove redundant check in select_idle_smt()
3e8c6c9aac42 sched/fair: Remove task_util from effective utilization in feec()
c722f35b513f sched/fair: Bring back select_idle_smt(), but differently
6cd56ef1df39 sched/fair: Remove select_idle_smt()
df3cb4ea1fb6 sched/fair: Fix wrong cpu selecting from isolated domain
```
The latest upstream commit 8aeaffef8c6e is not applicable to linux-4.19.y.
The current patch has been tested on linux-4.19.y and I am looking forward
to its inclusion in the stable version.
From: Ira Weiny <ira.weiny(a)intel.com>
commit ced085ef369af7a2b6da962ec2fbd01339f60693 upstream.
The "goto error" pattern is notorious for introducing subtle resource
leaks. Use the new cleanup.h helpers for PCI device reference counts and
locks.
Similar to the new put_device() and device_lock() cleanup helpers,
__free(put_device) and guard(device), define the same for PCI devices,
__free(pci_dev_put) and guard(pci_dev). These helpers eliminate the
need for "goto free;" and "goto unlock;" patterns. For example, A
'struct pci_dev *' instance declared as:
struct pci_dev *pdev __free(pci_dev_put) = NULL;
...will automatically call pci_dev_put() if @pdev is non-NULL when @pdev
goes out of scope (automatic variable scope). If a function wants to
invoke pci_dev_put() on error, but return @pdev on success, it can do:
return no_free_ptr(pdev);
...or:
return_ptr(pdev);
For potential cleanup opportunity there are 587 open-coded calls to
pci_dev_put() in the kernel with 65 instances within 10 lines of a goto
statement with the CXL driver threatening to add another one.
The guard() helper holds the associated lock for the remainder of the
current scope in which it was invoked. So, for example:
func(...)
{
if (...) {
...
guard(pci_dev); /* pci_dev_lock() invoked here */
...
} /* <- implied pci_dev_unlock() triggered here */
}
There are 15 invocations of pci_dev_unlock() in the kernel with 5
instances within 10 lines of a goto statement. Again, the CXL driver is
threatening to add another.
Introduce these helpers to preclude the addition of new more error prone
goto put; / goto unlock; sequences. For now, these helpers are used in
drivers/cxl/pci.c to allow ACPI error reports to be fed back into the
CXL driver associated with the PCI device identified in the report.
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Signed-off-by: Ira Weiny <ira.weiny(a)intel.com>
Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-8-1bb8a4ca2c7a@intel.com
[djbw: rewrite changelog]
Acked-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Acked-by: Ard Biesheuvel <ardb(a)kernel.org>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
---
include/linux/pci.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 512cb40150df..f14130011621 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1146,6 +1146,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
struct pci_dev *pci_dev_get(struct pci_dev *dev);
void pci_dev_put(struct pci_dev *dev);
+DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
void pci_remove_bus(struct pci_bus *b);
void pci_stop_and_remove_bus_device(struct pci_dev *dev);
void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
@@ -1851,6 +1852,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev);
void pci_dev_lock(struct pci_dev *dev);
int pci_dev_trylock(struct pci_dev *dev);
void pci_dev_unlock(struct pci_dev *dev);
+DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
/*
* PCI domain support. Sometimes called PCI segment (eg by ACPI),
--
2.43.0
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x c7d0b2db5bc5e8c0fdc67b3c8f463c3dfec92f77
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073024-passable-cardigan-cd15@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
c7d0b2db5bc5 ("bus: mhi: ep: Do not allocate memory for MHI objects from DMA zone")
2547beb00ddb ("bus: mhi: ep: Add support for async DMA read operation")
ee08acb58fe4 ("bus: mhi: ep: Add support for async DMA write operation")
8b786ed8fb08 ("bus: mhi: ep: Introduce async read/write callbacks")
927105244f8b ("bus: mhi: ep: Rename read_from_host() and write_to_host() APIs")
b08ded2ef2e9 ("bus: mhi: ep: Pass mhi_ep_buf_info struct to read/write APIs")
62210a26cd4f ("bus: mhi: ep: Use slab allocator where applicable")
987fdb5a43a6 ("bus: mhi: ep: Do not allocate event ring element on stack")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7d0b2db5bc5e8c0fdc67b3c8f463c3dfec92f77 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Date: Mon, 3 Jun 2024 22:13:54 +0530
Subject: [PATCH] bus: mhi: ep: Do not allocate memory for MHI objects from DMA
zone
MHI endpoint stack accidentally started allocating memory for objects from
DMA zone since commit 62210a26cd4f ("bus: mhi: ep: Use slab allocator
where applicable"). But there is no real need to allocate memory from this
naturally limited DMA zone. This also causes the MHI endpoint stack to run
out of memory while doing high bandwidth transfers.
So let's switch over to normal memory.
Cc: <stable(a)vger.kernel.org> # 6.8
Fixes: 62210a26cd4f ("bus: mhi: ep: Use slab allocator where applicable")
Reviewed-by: Mayank Rana <quic_mrana(a)quicinc.com>
Link: https://lore.kernel.org/r/20240603164354.79035-1-manivannan.sadhasivam@lina…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c
index f8f674adf1d4..4acfac73ca9a 100644
--- a/drivers/bus/mhi/ep/main.c
+++ b/drivers/bus/mhi/ep/main.c
@@ -90,7 +90,7 @@ static int mhi_ep_send_completion_event(struct mhi_ep_cntrl *mhi_cntrl, struct m
struct mhi_ring_element *event;
int ret;
- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA);
+ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL);
if (!event)
return -ENOMEM;
@@ -109,7 +109,7 @@ int mhi_ep_send_state_change_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_stat
struct mhi_ring_element *event;
int ret;
- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA);
+ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL);
if (!event)
return -ENOMEM;
@@ -127,7 +127,7 @@ int mhi_ep_send_ee_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_ee_type exec_e
struct mhi_ring_element *event;
int ret;
- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA);
+ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL);
if (!event)
return -ENOMEM;
@@ -146,7 +146,7 @@ static int mhi_ep_send_cmd_comp_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_e
struct mhi_ring_element *event;
int ret;
- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA);
+ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL);
if (!event)
return -ENOMEM;
@@ -438,7 +438,7 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
read_offset = mhi_chan->tre_size - mhi_chan->tre_bytes_left;
write_offset = len - buf_left;
- buf_addr = kmem_cache_zalloc(mhi_cntrl->tre_buf_cache, GFP_KERNEL | GFP_DMA);
+ buf_addr = kmem_cache_zalloc(mhi_cntrl->tre_buf_cache, GFP_KERNEL);
if (!buf_addr)
return -ENOMEM;
@@ -1481,14 +1481,14 @@ int mhi_ep_register_controller(struct mhi_ep_cntrl *mhi_cntrl,
mhi_cntrl->ev_ring_el_cache = kmem_cache_create("mhi_ep_event_ring_el",
sizeof(struct mhi_ring_element), 0,
- SLAB_CACHE_DMA, NULL);
+ 0, NULL);
if (!mhi_cntrl->ev_ring_el_cache) {
ret = -ENOMEM;
goto err_free_cmd;
}
mhi_cntrl->tre_buf_cache = kmem_cache_create("mhi_ep_tre_buf", MHI_EP_DEFAULT_MTU, 0,
- SLAB_CACHE_DMA, NULL);
+ 0, NULL);
if (!mhi_cntrl->tre_buf_cache) {
ret = -ENOMEM;
goto err_destroy_ev_ring_el_cache;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 509580fad7323b6a5da27e8365cd488f3b57210e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073042-crux-ditch-2ce7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
509580fad732 ("drm/i915/dp: Don't switch the LTTPR mode on an active link")
657586e474bd ("drm/i915: Add a DP1.2 compatible way to read LTTPR capabilities")
77f4ec2a4111 ("drm/i915/dp: remove accidental static on what should be a local variable")
a421d8a99216 ("drm/i915/dp: rewrite DP 2.0 128b/132b link training based on errata")
92e438619d16 ("drm/i915/dp: move intel_dp_prepare_link_train() call")
6c4d46523bf3 ("drm/i915: Pimp link training debug prints")
1f662675335b ("drm/i915: Print the DP vswing adjustment request")
be1525048c58 ("drm/i915: Show LTTPR in the TPS debug print")
c6921d484d3f ("drm/i915: Prepare link training for per-lane drive settings")
e722ab8b6968 ("drm/i915: Generalize .set_signal_levels()")
5bafd85dd770 ("drm/i915: Introduce has_buf_trans_select()")
f820693bc238 ("drm/i915: Introduce has_iboost()")
f6e3be98654e ("drm/i915: Fix DP clock recovery "voltage_tries" handling")
3b4da8315add ("drm/i915/dg2: use existing mechanisms for SNPS PHY translations")
0707570248b8 ("drm/i915/dp: pass crtc_state to intel_ddi_dp_level()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 509580fad7323b6a5da27e8365cd488f3b57210e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Mon, 8 Jul 2024 22:00:25 +0300
Subject: [PATCH] drm/i915/dp: Don't switch the LTTPR mode on an active link
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Switching to transparent mode leads to a loss of link synchronization,
so prevent doing this on an active link. This happened at least on an
Intel N100 system / DELL UD22 dock, the LTTPR residing either on the
host or the dock. To fix the issue, keep the current mode on an active
link, adjusting the LTTPR count accordingly (resetting it to 0 in
transparent mode).
v2: Adjust code comment during link training about reiniting the LTTPRs.
(Ville)
Fixes: 7b2a4ab8b0ef ("drm/i915: Switch to LTTPR transparent mode link training")
Reported-and-tested-by: Gareth Yu <gareth.yu(a)intel.com>
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10902
Cc: <stable(a)vger.kernel.org> # v5.15+
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal(a)intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-3-imre.…
(cherry picked from commit 211ad49cf8ccfdc798a719b4d1e000d0a8a9e588)
Signed-off-by: Tvrtko Ursulin <tursulin(a)ursulin.net>
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 1bc4ef84ff3b..d044c8e36bb3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -117,10 +117,24 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable)
return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1;
}
-static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp)
+{
+ return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE -
+ DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] ==
+ DP_PHY_REPEATER_MODE_TRANSPARENT;
+}
+
+/*
+ * Read the LTTPR common capabilities and switch the LTTPR PHYs to
+ * non-transparent mode if this is supported. Preserve the
+ * transparent/non-transparent mode on an active link.
+ *
+ * Return the number of detected LTTPRs in non-transparent mode or 0 if the
+ * LTTPRs are in transparent mode or the detection failed.
+ */
+static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
{
int lttpr_count;
- int i;
if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd))
return 0;
@@ -134,6 +148,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI
if (lttpr_count == 0)
return 0;
+ /*
+ * Don't change the mode on an active link, to prevent a loss of link
+ * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR
+ * resetting its internal state when the mode is changed from
+ * non-transparent to transparent.
+ */
+ if (intel_dp->link_trained) {
+ if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp))
+ goto out_reset_lttpr_count;
+
+ return lttpr_count;
+ }
+
/*
* See DP Standard v2.0 3.6.6.1. about the explicit disabling of
* non-transparent mode and the disable->enable non-transparent mode
@@ -154,11 +181,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI
"Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n");
intel_dp_set_lttpr_transparent_mode(intel_dp, true);
- intel_dp_reset_lttpr_count(intel_dp);
- return 0;
+ goto out_reset_lttpr_count;
}
+ return lttpr_count;
+
+out_reset_lttpr_count:
+ intel_dp_reset_lttpr_count(intel_dp);
+
+ return 0;
+}
+
+static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+ int lttpr_count;
+ int i;
+
+ lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd);
+
for (i = 0; i < lttpr_count; i++)
intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i));
@@ -1482,10 +1523,10 @@ void intel_dp_start_link_train(struct intel_atomic_state *state,
struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
struct intel_encoder *encoder = &dig_port->base;
bool passed;
-
/*
- * TODO: Reiniting LTTPRs here won't be needed once proper connector
- * HW state readout is added.
+ * Reinit the LTTPRs here to ensure that they are switched to
+ * non-transparent mode. During an earlier LTTPR detection this
+ * could've been prevented by an active link.
*/
int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 509580fad7323b6a5da27e8365cd488f3b57210e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073041-suffocate-marrow-e9ec@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
509580fad732 ("drm/i915/dp: Don't switch the LTTPR mode on an active link")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 509580fad7323b6a5da27e8365cd488f3b57210e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Mon, 8 Jul 2024 22:00:25 +0300
Subject: [PATCH] drm/i915/dp: Don't switch the LTTPR mode on an active link
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Switching to transparent mode leads to a loss of link synchronization,
so prevent doing this on an active link. This happened at least on an
Intel N100 system / DELL UD22 dock, the LTTPR residing either on the
host or the dock. To fix the issue, keep the current mode on an active
link, adjusting the LTTPR count accordingly (resetting it to 0 in
transparent mode).
v2: Adjust code comment during link training about reiniting the LTTPRs.
(Ville)
Fixes: 7b2a4ab8b0ef ("drm/i915: Switch to LTTPR transparent mode link training")
Reported-and-tested-by: Gareth Yu <gareth.yu(a)intel.com>
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10902
Cc: <stable(a)vger.kernel.org> # v5.15+
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal(a)intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-3-imre.…
(cherry picked from commit 211ad49cf8ccfdc798a719b4d1e000d0a8a9e588)
Signed-off-by: Tvrtko Ursulin <tursulin(a)ursulin.net>
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 1bc4ef84ff3b..d044c8e36bb3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -117,10 +117,24 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable)
return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1;
}
-static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp)
+{
+ return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE -
+ DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] ==
+ DP_PHY_REPEATER_MODE_TRANSPARENT;
+}
+
+/*
+ * Read the LTTPR common capabilities and switch the LTTPR PHYs to
+ * non-transparent mode if this is supported. Preserve the
+ * transparent/non-transparent mode on an active link.
+ *
+ * Return the number of detected LTTPRs in non-transparent mode or 0 if the
+ * LTTPRs are in transparent mode or the detection failed.
+ */
+static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
{
int lttpr_count;
- int i;
if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd))
return 0;
@@ -134,6 +148,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI
if (lttpr_count == 0)
return 0;
+ /*
+ * Don't change the mode on an active link, to prevent a loss of link
+ * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR
+ * resetting its internal state when the mode is changed from
+ * non-transparent to transparent.
+ */
+ if (intel_dp->link_trained) {
+ if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp))
+ goto out_reset_lttpr_count;
+
+ return lttpr_count;
+ }
+
/*
* See DP Standard v2.0 3.6.6.1. about the explicit disabling of
* non-transparent mode and the disable->enable non-transparent mode
@@ -154,11 +181,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI
"Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n");
intel_dp_set_lttpr_transparent_mode(intel_dp, true);
- intel_dp_reset_lttpr_count(intel_dp);
- return 0;
+ goto out_reset_lttpr_count;
}
+ return lttpr_count;
+
+out_reset_lttpr_count:
+ intel_dp_reset_lttpr_count(intel_dp);
+
+ return 0;
+}
+
+static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+ int lttpr_count;
+ int i;
+
+ lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd);
+
for (i = 0; i < lttpr_count; i++)
intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i));
@@ -1482,10 +1523,10 @@ void intel_dp_start_link_train(struct intel_atomic_state *state,
struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
struct intel_encoder *encoder = &dig_port->base;
bool passed;
-
/*
- * TODO: Reiniting LTTPRs here won't be needed once proper connector
- * HW state readout is added.
+ * Reinit the LTTPRs here to ensure that they are switched to
+ * non-transparent mode. During an earlier LTTPR detection this
+ * could've been prevented by an active link.
*/
int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d63d81094d208abb20fc444514b2d9ec2f4b7c4e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073032-gradient-gully-658f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d63d81094d20 ("drm/dp_mst: Fix all mstb marked as not probed after suspend/resume")
da68386d9edb ("drm: Rename dp/ to display/")
6c64ae228f08 ("Backmerge tag 'v5.17-rc6' into drm-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d63d81094d208abb20fc444514b2d9ec2f4b7c4e Mon Sep 17 00:00:00 2001
From: Wayne Lin <Wayne.Lin(a)amd.com>
Date: Wed, 26 Jun 2024 16:48:23 +0800
Subject: [PATCH] drm/dp_mst: Fix all mstb marked as not probed after
suspend/resume
[Why]
After supend/resume, with topology unchanged, observe that
link_address_sent of all mstb are marked as false even the topology probing
is done without any error.
It is caused by wrongly also include "ret == 0" case as a probing failure
case.
[How]
Remove inappropriate checking conditions.
Cc: Lyude Paul <lyude(a)redhat.com>
Cc: Harry Wentland <hwentlan(a)amd.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: Imre Deak <imre.deak(a)intel.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: stable(a)vger.kernel.org
Fixes: 37dfdc55ffeb ("drm/dp_mst: Cleanup drm_dp_send_link_address() a bit")
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240626084825.878565-2-Wayne…
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 7f8e1cfbe19d..68831f4e502a 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -2929,7 +2929,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
/* FIXME: Actually do some real error handling here */
ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
- if (ret <= 0) {
+ if (ret < 0) {
drm_err(mgr->dev, "Sending link address failed with %d\n", ret);
goto out;
}
@@ -2981,7 +2981,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
mutex_unlock(&mgr->lock);
out:
- if (ret <= 0)
+ if (ret < 0)
mstb->link_address_sent = false;
kfree(txmsg);
return ret < 0 ? ret : changed;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d63d81094d208abb20fc444514b2d9ec2f4b7c4e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073031-survivor-unaudited-6efd@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d63d81094d20 ("drm/dp_mst: Fix all mstb marked as not probed after suspend/resume")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d63d81094d208abb20fc444514b2d9ec2f4b7c4e Mon Sep 17 00:00:00 2001
From: Wayne Lin <Wayne.Lin(a)amd.com>
Date: Wed, 26 Jun 2024 16:48:23 +0800
Subject: [PATCH] drm/dp_mst: Fix all mstb marked as not probed after
suspend/resume
[Why]
After supend/resume, with topology unchanged, observe that
link_address_sent of all mstb are marked as false even the topology probing
is done without any error.
It is caused by wrongly also include "ret == 0" case as a probing failure
case.
[How]
Remove inappropriate checking conditions.
Cc: Lyude Paul <lyude(a)redhat.com>
Cc: Harry Wentland <hwentlan(a)amd.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: Imre Deak <imre.deak(a)intel.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: stable(a)vger.kernel.org
Fixes: 37dfdc55ffeb ("drm/dp_mst: Cleanup drm_dp_send_link_address() a bit")
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240626084825.878565-2-Wayne…
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 7f8e1cfbe19d..68831f4e502a 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -2929,7 +2929,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
/* FIXME: Actually do some real error handling here */
ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
- if (ret <= 0) {
+ if (ret < 0) {
drm_err(mgr->dev, "Sending link address failed with %d\n", ret);
goto out;
}
@@ -2981,7 +2981,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
mutex_unlock(&mgr->lock);
out:
- if (ret <= 0)
+ if (ret < 0)
mstb->link_address_sent = false;
kfree(txmsg);
return ret < 0 ? ret : changed;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073011-enigmatic-overstep-1479@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD")
0862cfd3e22f ("drm/udl: Move connector to modesetting code")
43858eb41e0d ("drm/udl: Various improvements to the connector")
2c1eafc40e53 ("drm/udl: Use USB timeout constant when reading EDID")
c020f66013b6 ("drm/udl: Test pixel limit in mode-config's mode-valid function")
59a811faa74f ("drm/udl: Rename struct udl_drm_connector to struct udl_connector")
255490f9150d ("drm: Drop drm_edid.h from drm_crtc.h")
0f95ee9a0c57 ("Merge tag 'drm-misc-next-2022-06-08' of git://anongit.freedesktop.org/drm/drm-misc into drm-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Fri, 10 May 2024 17:47:08 +0200
Subject: [PATCH] drm/udl: Remove DRM_CONNECTOR_POLL_HPD
DisplayLink devices do not generate hotplug events. Remove the poll
flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with
DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection")
Reviewed-by: Jani Nikula <jani.nikula(a)intel.com>
Cc: Robert Tarasov <tutankhamen(a)chromium.org>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Sean Paul <sean(a)poorly.run>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.15+
Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimme…
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 7702359c90c2..751da3a294c4 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -527,8 +527,7 @@ struct drm_connector *udl_connector_init(struct drm_device *dev)
drm_connector_helper_add(connector, &udl_connector_helper_funcs);
- connector->polled = DRM_CONNECTOR_POLL_HPD |
- DRM_CONNECTOR_POLL_CONNECT |
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
DRM_CONNECTOR_POLL_DISCONNECT;
return connector;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073010-pond-chamber-4302@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD")
0862cfd3e22f ("drm/udl: Move connector to modesetting code")
43858eb41e0d ("drm/udl: Various improvements to the connector")
2c1eafc40e53 ("drm/udl: Use USB timeout constant when reading EDID")
c020f66013b6 ("drm/udl: Test pixel limit in mode-config's mode-valid function")
59a811faa74f ("drm/udl: Rename struct udl_drm_connector to struct udl_connector")
255490f9150d ("drm: Drop drm_edid.h from drm_crtc.h")
0f95ee9a0c57 ("Merge tag 'drm-misc-next-2022-06-08' of git://anongit.freedesktop.org/drm/drm-misc into drm-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Fri, 10 May 2024 17:47:08 +0200
Subject: [PATCH] drm/udl: Remove DRM_CONNECTOR_POLL_HPD
DisplayLink devices do not generate hotplug events. Remove the poll
flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with
DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection")
Reviewed-by: Jani Nikula <jani.nikula(a)intel.com>
Cc: Robert Tarasov <tutankhamen(a)chromium.org>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Sean Paul <sean(a)poorly.run>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.15+
Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimme…
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 7702359c90c2..751da3a294c4 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -527,8 +527,7 @@ struct drm_connector *udl_connector_init(struct drm_device *dev)
drm_connector_helper_add(connector, &udl_connector_helper_funcs);
- connector->polled = DRM_CONNECTOR_POLL_HPD |
- DRM_CONNECTOR_POLL_CONNECT |
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
DRM_CONNECTOR_POLL_DISCONNECT;
return connector;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073010-deserve-askew-b1bf@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD")
0862cfd3e22f ("drm/udl: Move connector to modesetting code")
43858eb41e0d ("drm/udl: Various improvements to the connector")
2c1eafc40e53 ("drm/udl: Use USB timeout constant when reading EDID")
c020f66013b6 ("drm/udl: Test pixel limit in mode-config's mode-valid function")
59a811faa74f ("drm/udl: Rename struct udl_drm_connector to struct udl_connector")
255490f9150d ("drm: Drop drm_edid.h from drm_crtc.h")
0f95ee9a0c57 ("Merge tag 'drm-misc-next-2022-06-08' of git://anongit.freedesktop.org/drm/drm-misc into drm-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Fri, 10 May 2024 17:47:08 +0200
Subject: [PATCH] drm/udl: Remove DRM_CONNECTOR_POLL_HPD
DisplayLink devices do not generate hotplug events. Remove the poll
flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with
DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection")
Reviewed-by: Jani Nikula <jani.nikula(a)intel.com>
Cc: Robert Tarasov <tutankhamen(a)chromium.org>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Sean Paul <sean(a)poorly.run>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.15+
Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimme…
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 7702359c90c2..751da3a294c4 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -527,8 +527,7 @@ struct drm_connector *udl_connector_init(struct drm_device *dev)
drm_connector_helper_add(connector, &udl_connector_helper_funcs);
- connector->polled = DRM_CONNECTOR_POLL_HPD |
- DRM_CONNECTOR_POLL_CONNECT |
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
DRM_CONNECTOR_POLL_DISCONNECT;
return connector;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073009-turmoil-zombie-8941@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD")
0862cfd3e22f ("drm/udl: Move connector to modesetting code")
43858eb41e0d ("drm/udl: Various improvements to the connector")
2c1eafc40e53 ("drm/udl: Use USB timeout constant when reading EDID")
c020f66013b6 ("drm/udl: Test pixel limit in mode-config's mode-valid function")
59a811faa74f ("drm/udl: Rename struct udl_drm_connector to struct udl_connector")
255490f9150d ("drm: Drop drm_edid.h from drm_crtc.h")
0f95ee9a0c57 ("Merge tag 'drm-misc-next-2022-06-08' of git://anongit.freedesktop.org/drm/drm-misc into drm-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Fri, 10 May 2024 17:47:08 +0200
Subject: [PATCH] drm/udl: Remove DRM_CONNECTOR_POLL_HPD
DisplayLink devices do not generate hotplug events. Remove the poll
flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with
DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection")
Reviewed-by: Jani Nikula <jani.nikula(a)intel.com>
Cc: Robert Tarasov <tutankhamen(a)chromium.org>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Sean Paul <sean(a)poorly.run>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.15+
Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimme…
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 7702359c90c2..751da3a294c4 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -527,8 +527,7 @@ struct drm_connector *udl_connector_init(struct drm_device *dev)
drm_connector_helper_add(connector, &udl_connector_helper_funcs);
- connector->polled = DRM_CONNECTOR_POLL_HPD |
- DRM_CONNECTOR_POLL_CONNECT |
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
DRM_CONNECTOR_POLL_DISCONNECT;
return connector;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073008-bakeshop-unwarlike-9346@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD")
0862cfd3e22f ("drm/udl: Move connector to modesetting code")
43858eb41e0d ("drm/udl: Various improvements to the connector")
2c1eafc40e53 ("drm/udl: Use USB timeout constant when reading EDID")
c020f66013b6 ("drm/udl: Test pixel limit in mode-config's mode-valid function")
59a811faa74f ("drm/udl: Rename struct udl_drm_connector to struct udl_connector")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Fri, 10 May 2024 17:47:08 +0200
Subject: [PATCH] drm/udl: Remove DRM_CONNECTOR_POLL_HPD
DisplayLink devices do not generate hotplug events. Remove the poll
flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with
DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection")
Reviewed-by: Jani Nikula <jani.nikula(a)intel.com>
Cc: Robert Tarasov <tutankhamen(a)chromium.org>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Sean Paul <sean(a)poorly.run>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.15+
Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimme…
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 7702359c90c2..751da3a294c4 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -527,8 +527,7 @@ struct drm_connector *udl_connector_init(struct drm_device *dev)
drm_connector_helper_add(connector, &udl_connector_helper_funcs);
- connector->polled = DRM_CONNECTOR_POLL_HPD |
- DRM_CONNECTOR_POLL_CONNECT |
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
DRM_CONNECTOR_POLL_DISCONNECT;
return connector;
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073042-perennial-patio-0790@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
fa0c1c9d283b ("perf/x86/intel: Add a distinct name for Granite Rapids")
d142df13f357 ("perf/x86/intel: Switch to new Intel CPU model defines")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang(a)linux.intel.com>
Date: Mon, 8 Jul 2024 12:33:35 -0700
Subject: [PATCH] perf/x86/intel: Add a distinct name for Granite Rapids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently, the Sapphire Rapids and Granite Rapids share the same PMU
name, sapphire_rapids. Because from the kernel’s perspective, GNR is
similar to SPR. The only key difference is that they support different
extra MSRs. The code path and the PMU name are shared.
However, from end users' perspective, they are quite different. Besides
the extra MSRs, GNR has a newer PEBS format, supports Retire Latency,
supports new CPUID enumeration architecture, doesn't required the
load-latency AUX event, has additional TMA Level 1 Architectural Events,
etc. The differences can be enumerated by CPUID or the PERF_CAPABILITIES
MSR. They weren't reflected in the model-specific kernel setup.
But it is worth to have a distinct PMU name for GNR.
Fixes: a6742cb90b56 ("perf/x86/intel: Fix the FRONTEND encoding on GNR and MTL")
Suggested-by: Ahmad Yasin <ahmad.yasin(a)intel.com>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20240708193336.1192217-3-kan.liang@linux.intel.com
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index b61367991a16..0c9c2706d4ec 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6943,12 +6943,18 @@ __init int intel_pmu_init(void)
case INTEL_EMERALDRAPIDS_X:
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.extra_regs = intel_glc_extra_regs;
- fallthrough;
+ pr_cont("Sapphire Rapids events, ");
+ name = "sapphire_rapids";
+ goto glc_common;
+
case INTEL_GRANITERAPIDS_X:
case INTEL_GRANITERAPIDS_D:
+ x86_pmu.extra_regs = intel_rwc_extra_regs;
+ pr_cont("Granite Rapids events, ");
+ name = "granite_rapids";
+
+ glc_common:
intel_pmu_init_glc(NULL);
- if (!x86_pmu.extra_regs)
- x86_pmu.extra_regs = intel_rwc_extra_regs;
x86_pmu.pebs_ept = 1;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = glc_get_event_constraints;
@@ -6959,8 +6965,6 @@ __init int intel_pmu_init(void)
td_attr = glc_td_events_attrs;
tsx_attr = glc_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(true);
- pr_cont("Sapphire Rapids events, ");
- name = "sapphire_rapids";
break;
case INTEL_ALDERLAKE:
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073043-shortlist-silica-557d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
fa0c1c9d283b ("perf/x86/intel: Add a distinct name for Granite Rapids")
d142df13f357 ("perf/x86/intel: Switch to new Intel CPU model defines")
97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")
b0560bfd4b70 ("perf/x86/intel: Clean up the hybrid CPU type handling code")
299a5fc8e783 ("perf/x86/intel: Apply the common initialization code for ADL")
d87d221f854b ("perf/x86/intel: Factor out the initialization code for ADL e-core")
0ba0c03528e9 ("perf/x86/intel: Factor out the initialization code for SPR")
d4b5694c75d4 ("perf/x86/intel: Use the common uarch name for the shared functions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang(a)linux.intel.com>
Date: Mon, 8 Jul 2024 12:33:35 -0700
Subject: [PATCH] perf/x86/intel: Add a distinct name for Granite Rapids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently, the Sapphire Rapids and Granite Rapids share the same PMU
name, sapphire_rapids. Because from the kernel’s perspective, GNR is
similar to SPR. The only key difference is that they support different
extra MSRs. The code path and the PMU name are shared.
However, from end users' perspective, they are quite different. Besides
the extra MSRs, GNR has a newer PEBS format, supports Retire Latency,
supports new CPUID enumeration architecture, doesn't required the
load-latency AUX event, has additional TMA Level 1 Architectural Events,
etc. The differences can be enumerated by CPUID or the PERF_CAPABILITIES
MSR. They weren't reflected in the model-specific kernel setup.
But it is worth to have a distinct PMU name for GNR.
Fixes: a6742cb90b56 ("perf/x86/intel: Fix the FRONTEND encoding on GNR and MTL")
Suggested-by: Ahmad Yasin <ahmad.yasin(a)intel.com>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20240708193336.1192217-3-kan.liang@linux.intel.com
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index b61367991a16..0c9c2706d4ec 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6943,12 +6943,18 @@ __init int intel_pmu_init(void)
case INTEL_EMERALDRAPIDS_X:
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.extra_regs = intel_glc_extra_regs;
- fallthrough;
+ pr_cont("Sapphire Rapids events, ");
+ name = "sapphire_rapids";
+ goto glc_common;
+
case INTEL_GRANITERAPIDS_X:
case INTEL_GRANITERAPIDS_D:
+ x86_pmu.extra_regs = intel_rwc_extra_regs;
+ pr_cont("Granite Rapids events, ");
+ name = "granite_rapids";
+
+ glc_common:
intel_pmu_init_glc(NULL);
- if (!x86_pmu.extra_regs)
- x86_pmu.extra_regs = intel_rwc_extra_regs;
x86_pmu.pebs_ept = 1;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = glc_get_event_constraints;
@@ -6959,8 +6965,6 @@ __init int intel_pmu_init(void)
td_attr = glc_td_events_attrs;
tsx_attr = glc_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(true);
- pr_cont("Sapphire Rapids events, ");
- name = "sapphire_rapids";
break;
case INTEL_ALDERLAKE:
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x ac9aa295f7a89d38656739628796f086f0b160e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073034-senator-fringe-8188@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
ac9aa295f7a8 ("perf: imx_perf: fix counter start and config sequence")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ac9aa295f7a89d38656739628796f086f0b160e2 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2(a)nxp.com>
Date: Wed, 29 May 2024 16:03:55 +0800
Subject: [PATCH] perf: imx_perf: fix counter start and config sequence
In current driver, the counter will start firstly and then be configured.
This sequence is not correct for AXI filter events since the correct
AXI_MASK and AXI_ID are not set yet. Then the results may be inaccurate.
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Fixes: 55691f99d417 ("drivers/perf: imx_ddr: Add support for NXP i.MX9 SoC DDRC PMU driver")
cc: stable(a)vger.kernel.org
Signed-off-by: Xu Yang <xu.yang_2(a)nxp.com>
Link: https://lore.kernel.org/r/20240529080358.703784-5-xu.yang_2@nxp.com
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 5433c52a9872..7b43b54920da 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -541,12 +541,12 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
hwc->idx = counter;
hwc->state |= PERF_HES_STOPPED;
- if (flags & PERF_EF_START)
- ddr_perf_event_start(event, flags);
-
/* read trans, write trans, read beat */
imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+ if (flags & PERF_EF_START)
+ ddr_perf_event_start(event, flags);
+
return 0;
}
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x ac9aa295f7a89d38656739628796f086f0b160e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073034-defender-boastful-74f0@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
ac9aa295f7a8 ("perf: imx_perf: fix counter start and config sequence")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ac9aa295f7a89d38656739628796f086f0b160e2 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2(a)nxp.com>
Date: Wed, 29 May 2024 16:03:55 +0800
Subject: [PATCH] perf: imx_perf: fix counter start and config sequence
In current driver, the counter will start firstly and then be configured.
This sequence is not correct for AXI filter events since the correct
AXI_MASK and AXI_ID are not set yet. Then the results may be inaccurate.
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Fixes: 55691f99d417 ("drivers/perf: imx_ddr: Add support for NXP i.MX9 SoC DDRC PMU driver")
cc: stable(a)vger.kernel.org
Signed-off-by: Xu Yang <xu.yang_2(a)nxp.com>
Link: https://lore.kernel.org/r/20240529080358.703784-5-xu.yang_2@nxp.com
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 5433c52a9872..7b43b54920da 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -541,12 +541,12 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
hwc->idx = counter;
hwc->state |= PERF_HES_STOPPED;
- if (flags & PERF_EF_START)
- ddr_perf_event_start(event, flags);
-
/* read trans, write trans, read beat */
imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+ if (flags & PERF_EF_START)
+ ddr_perf_event_start(event, flags);
+
return 0;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x ad97196379d0b8cb24ef3d5006978a6554e6467f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073023-blame-activist-10a9@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
ad97196379d0 ("perf/x86/intel/pt: Fix a topa_entry base address calculation")
38bb8d77d0b9 ("perf/x86/intel/pt: Split ToPA metadata and page layout")
539f7c26b41d ("perf/x86/intel/pt: Use pointer arithmetics instead in ToPA entry calculation")
fffec50f541a ("perf/x86/intel/pt: Use helpers to obtain ToPA entry size")
f6d079ce867d ("perf/x86/intel/pt: Export pt_cap_get()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ad97196379d0b8cb24ef3d5006978a6554e6467f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter(a)intel.com>
Date: Mon, 24 Jun 2024 23:10:56 +0300
Subject: [PATCH] perf/x86/intel/pt: Fix a topa_entry base address calculation
topa_entry->base is a bit-field. Bit-fields are not promoted to a 64-bit
type, even if the underlying type is 64-bit, and so, if necessary, must
be cast to a larger type when calculations are done.
Fix a topa_entry->base address calculation by adding a cast.
Without the cast, the address was limited to 36-bits i.e. 64GiB.
The address calculation is used on systems that do not support Multiple
Entry ToPA (only Broadwell), and affects physical addresses on or above
64GiB. Instead of writing to the correct address, the address comprising
the first 36 bits would be written to.
Intel PT snapshot and sampling modes are not affected.
Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver")
Reported-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240624201101.60186-3-adrian.hunter@intel.com
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 14db6d9d318b..047a2cd5b3fe 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -878,7 +878,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
- return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
+ return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073058-game-plant-575c@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073056-hypertext-proving-0c10@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073057-ogle-vocalist-31c7@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073055-coaster-antitrust-74f5@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073055-planner-cabana-cb16@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073054-emblaze-cufflink-f604@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 76ba6acfcce871db13ad51c6dc8f56fec2e92853
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073053-sterilize-garland-3014@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
76ba6acfcce8 ("mm: optimize the redundant loop of mm_update_owner_next()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76ba6acfcce871db13ad51c6dc8f56fec2e92853 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Date: Thu, 20 Jun 2024 20:21:24 +0800
Subject: [PATCH] mm: optimize the redundant loop of mm_update_owner_next()
When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
/proc or ptrace or page migration (get_task_mm()), it is impossible to
find an appropriate task_struct in the loop whose mm_struct is the same as
the target mm_struct.
If the above race condition is combined with the stress-ng-zombie and
stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
write_lock_irq() for tasklist_lock.
Recognize this situation in advance and exit early.
Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Tycho Andersen <tandersen(a)netflix.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 63d9866ab01ffd0d0835d5564107283a4afc0a38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073048-jurist-stem-a276@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
63d9866ab01f ("mm: shmem: rename mTHP shmem counters")
f216c845f3c7 ("mm: add per-order mTHP split counters")
66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous shmem")
e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem")
3d95bc21cea5 ("mm: shmem: add THP validation for PMD-mapped THP related statistics")
6f775463d002 ("mm: shmem: use folio_alloc_mpol() in shmem_alloc_folio()")
0d648dd5c899 ("mm: drop the 'anon_' prefix for swap-out mTHP counters")
42248b9d34ea ("mm: add docs for per-order mTHP counters and transhuge_page ABI")
d0f048ac39f6 ("mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters")
ec33687c6749 ("mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback counters")
5ed890ce5147 ("mm: vmscan: avoid split during shrink_folio_list()")
835c3a25aa37 ("mm: huge_memory: add the missing folio_test_pmd_mappable() for THP split statistics")
085ff35e7636 ("mm: memory: move mem_cgroup_charge() into alloc_anon_folio()")
19eaf44954df ("mm: thp: support allocation of anonymous multi-size THP")
3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
ddc1a5cbc05d ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
23e4883248f0 ("mm: add page_rmappable_folio() wrapper")
c36f6e6dff4d ("mempolicy trivia: slightly more consistent naming")
7f1ee4e20708 ("mempolicy trivia: delete those ancient pr_debug()s")
1cb5d11a370f ("mempolicy: fix migrate_pages(2) syscall return nr_failed")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63d9866ab01ffd0d0835d5564107283a4afc0a38 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Wed, 10 Jul 2024 10:55:01 +0100
Subject: [PATCH] mm: shmem: rename mTHP shmem counters
The legacy PMD-sized THP counters at /proc/vmstat include thp_file_alloc,
thp_file_fallback and thp_file_fallback_charge, which rather confusingly
refer to shmem THP and do not include any other types of file pages. This
is inconsistent since in most other places in the kernel, THP counters are
explicitly separated for anon, shmem and file flavours. However, we are
stuck with it since it constitutes a user ABI.
Recently, commit 66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous
shmem") added equivalent mTHP stats for shmem, keeping the same "file_"
prefix in the names. But in future, we may want to add extra stats to
cover actual file pages, at which point, it would all become very
confusing.
So let's take the opportunity to rename these new counters "shmem_" before
the change makes it upstream and the ABI becomes immutable. While we are
at it, let's improve the documentation for the legacy counters to make it
clear that they count shmem pages only.
Link: https://lkml.kernel.org/r/20240710095503.3193901-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Daniel Gomez <da.gomez(a)samsung.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index fe237825b95c..058485daf186 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -412,20 +412,23 @@ thp_collapse_alloc_failed
the allocation.
thp_file_alloc
- is incremented every time a file huge page is successfully
- allocated.
+ is incremented every time a shmem huge page is successfully
+ allocated (Note that despite being named after "file", the counter
+ measures only shmem).
thp_file_fallback
- is incremented if a file huge page is attempted to be allocated
- but fails and instead falls back to using small pages.
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages. (Note that
+ despite being named after "file", the counter measures only shmem).
thp_file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
- successful.
+ successful. (Note that despite being named after "file", the
+ counter measures only shmem).
thp_file_mapped
- is incremented every time a file huge page is mapped into
+ is incremented every time a file or shmem huge page is mapped into
user address space.
thp_split_page
@@ -496,16 +499,16 @@ swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
-file_alloc
- is incremented every time a file huge page is successfully
+shmem_alloc
+ is incremented every time a shmem huge page is successfully
allocated.
-file_fallback
- is incremented if a file huge page is attempted to be allocated
+shmem_fallback
+ is incremented if a shmem huge page is attempted to be allocated
but fails and instead falls back to using small pages.
-file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+shmem_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
successful.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index acb6ac24a07e..cff002be83eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -269,9 +269,9 @@ enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
MTHP_STAT_SWPOUT,
MTHP_STAT_SWPOUT_FALLBACK,
- MTHP_STAT_FILE_ALLOC,
- MTHP_STAT_FILE_FALLBACK,
- MTHP_STAT_FILE_FALLBACK_CHARGE,
+ MTHP_STAT_SHMEM_ALLOC,
+ MTHP_STAT_SHMEM_FALLBACK,
+ MTHP_STAT_SHMEM_FALLBACK_CHARGE,
MTHP_STAT_SPLIT,
MTHP_STAT_SPLIT_FAILED,
MTHP_STAT_SPLIT_DEFERRED,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9ec64aa2be94..f9696c94e211 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -568,9 +568,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_alloc, MTHP_STAT_FILE_ALLOC);
-DEFINE_MTHP_STAT_ATTR(file_fallback, MTHP_STAT_FILE_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_fallback_charge, MTHP_STAT_FILE_FALLBACK_CHARGE);
+DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
@@ -581,9 +581,9 @@ static struct attribute *stats_attrs[] = {
&anon_fault_fallback_charge_attr.attr,
&swpout_attr.attr,
&swpout_fallback_attr.attr,
- &file_alloc_attr.attr,
- &file_fallback_attr.attr,
- &file_fallback_charge_attr.attr,
+ &shmem_alloc_attr.attr,
+ &shmem_fallback_attr.attr,
+ &shmem_fallback_charge_attr.attr,
&split_attr.attr,
&split_failed_attr.attr,
&split_deferred_attr.attr,
diff --git a/mm/shmem.c b/mm/shmem.c
index 921d59c3d669..f24dfbd387ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1777,7 +1777,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
if (pages == HPAGE_PMD_NR)
count_vm_event(THP_FILE_FALLBACK);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(order, MTHP_STAT_FILE_FALLBACK);
+ count_mthp_stat(order, MTHP_STAT_SHMEM_FALLBACK);
#endif
order = next_order(&suitable_orders, order);
}
@@ -1804,8 +1804,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
count_vm_event(THP_FILE_FALLBACK_CHARGE);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK);
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK_CHARGE);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK_CHARGE);
#endif
}
goto unlock;
@@ -2181,7 +2181,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
if (folio_test_pmd_mappable(folio))
count_vm_event(THP_FILE_ALLOC);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_ALLOC);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_ALLOC);
#endif
goto alloced;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 63d9866ab01ffd0d0835d5564107283a4afc0a38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073046-feminine-relapsing-3d49@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
63d9866ab01f ("mm: shmem: rename mTHP shmem counters")
f216c845f3c7 ("mm: add per-order mTHP split counters")
66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous shmem")
e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem")
3d95bc21cea5 ("mm: shmem: add THP validation for PMD-mapped THP related statistics")
6f775463d002 ("mm: shmem: use folio_alloc_mpol() in shmem_alloc_folio()")
0d648dd5c899 ("mm: drop the 'anon_' prefix for swap-out mTHP counters")
42248b9d34ea ("mm: add docs for per-order mTHP counters and transhuge_page ABI")
d0f048ac39f6 ("mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters")
ec33687c6749 ("mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback counters")
5ed890ce5147 ("mm: vmscan: avoid split during shrink_folio_list()")
835c3a25aa37 ("mm: huge_memory: add the missing folio_test_pmd_mappable() for THP split statistics")
085ff35e7636 ("mm: memory: move mem_cgroup_charge() into alloc_anon_folio()")
19eaf44954df ("mm: thp: support allocation of anonymous multi-size THP")
3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
ddc1a5cbc05d ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
23e4883248f0 ("mm: add page_rmappable_folio() wrapper")
c36f6e6dff4d ("mempolicy trivia: slightly more consistent naming")
7f1ee4e20708 ("mempolicy trivia: delete those ancient pr_debug()s")
1cb5d11a370f ("mempolicy: fix migrate_pages(2) syscall return nr_failed")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63d9866ab01ffd0d0835d5564107283a4afc0a38 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Wed, 10 Jul 2024 10:55:01 +0100
Subject: [PATCH] mm: shmem: rename mTHP shmem counters
The legacy PMD-sized THP counters at /proc/vmstat include thp_file_alloc,
thp_file_fallback and thp_file_fallback_charge, which rather confusingly
refer to shmem THP and do not include any other types of file pages. This
is inconsistent since in most other places in the kernel, THP counters are
explicitly separated for anon, shmem and file flavours. However, we are
stuck with it since it constitutes a user ABI.
Recently, commit 66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous
shmem") added equivalent mTHP stats for shmem, keeping the same "file_"
prefix in the names. But in future, we may want to add extra stats to
cover actual file pages, at which point, it would all become very
confusing.
So let's take the opportunity to rename these new counters "shmem_" before
the change makes it upstream and the ABI becomes immutable. While we are
at it, let's improve the documentation for the legacy counters to make it
clear that they count shmem pages only.
Link: https://lkml.kernel.org/r/20240710095503.3193901-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Daniel Gomez <da.gomez(a)samsung.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index fe237825b95c..058485daf186 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -412,20 +412,23 @@ thp_collapse_alloc_failed
the allocation.
thp_file_alloc
- is incremented every time a file huge page is successfully
- allocated.
+ is incremented every time a shmem huge page is successfully
+ allocated (Note that despite being named after "file", the counter
+ measures only shmem).
thp_file_fallback
- is incremented if a file huge page is attempted to be allocated
- but fails and instead falls back to using small pages.
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages. (Note that
+ despite being named after "file", the counter measures only shmem).
thp_file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
- successful.
+ successful. (Note that despite being named after "file", the
+ counter measures only shmem).
thp_file_mapped
- is incremented every time a file huge page is mapped into
+ is incremented every time a file or shmem huge page is mapped into
user address space.
thp_split_page
@@ -496,16 +499,16 @@ swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
-file_alloc
- is incremented every time a file huge page is successfully
+shmem_alloc
+ is incremented every time a shmem huge page is successfully
allocated.
-file_fallback
- is incremented if a file huge page is attempted to be allocated
+shmem_fallback
+ is incremented if a shmem huge page is attempted to be allocated
but fails and instead falls back to using small pages.
-file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+shmem_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
successful.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index acb6ac24a07e..cff002be83eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -269,9 +269,9 @@ enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
MTHP_STAT_SWPOUT,
MTHP_STAT_SWPOUT_FALLBACK,
- MTHP_STAT_FILE_ALLOC,
- MTHP_STAT_FILE_FALLBACK,
- MTHP_STAT_FILE_FALLBACK_CHARGE,
+ MTHP_STAT_SHMEM_ALLOC,
+ MTHP_STAT_SHMEM_FALLBACK,
+ MTHP_STAT_SHMEM_FALLBACK_CHARGE,
MTHP_STAT_SPLIT,
MTHP_STAT_SPLIT_FAILED,
MTHP_STAT_SPLIT_DEFERRED,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9ec64aa2be94..f9696c94e211 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -568,9 +568,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_alloc, MTHP_STAT_FILE_ALLOC);
-DEFINE_MTHP_STAT_ATTR(file_fallback, MTHP_STAT_FILE_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_fallback_charge, MTHP_STAT_FILE_FALLBACK_CHARGE);
+DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
@@ -581,9 +581,9 @@ static struct attribute *stats_attrs[] = {
&anon_fault_fallback_charge_attr.attr,
&swpout_attr.attr,
&swpout_fallback_attr.attr,
- &file_alloc_attr.attr,
- &file_fallback_attr.attr,
- &file_fallback_charge_attr.attr,
+ &shmem_alloc_attr.attr,
+ &shmem_fallback_attr.attr,
+ &shmem_fallback_charge_attr.attr,
&split_attr.attr,
&split_failed_attr.attr,
&split_deferred_attr.attr,
diff --git a/mm/shmem.c b/mm/shmem.c
index 921d59c3d669..f24dfbd387ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1777,7 +1777,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
if (pages == HPAGE_PMD_NR)
count_vm_event(THP_FILE_FALLBACK);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(order, MTHP_STAT_FILE_FALLBACK);
+ count_mthp_stat(order, MTHP_STAT_SHMEM_FALLBACK);
#endif
order = next_order(&suitable_orders, order);
}
@@ -1804,8 +1804,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
count_vm_event(THP_FILE_FALLBACK_CHARGE);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK);
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK_CHARGE);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK_CHARGE);
#endif
}
goto unlock;
@@ -2181,7 +2181,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
if (folio_test_pmd_mappable(folio))
count_vm_event(THP_FILE_ALLOC);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_ALLOC);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_ALLOC);
#endif
goto alloced;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 63d9866ab01ffd0d0835d5564107283a4afc0a38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073045-browbeat-banshee-05db@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
63d9866ab01f ("mm: shmem: rename mTHP shmem counters")
f216c845f3c7 ("mm: add per-order mTHP split counters")
66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous shmem")
e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem")
3d95bc21cea5 ("mm: shmem: add THP validation for PMD-mapped THP related statistics")
6f775463d002 ("mm: shmem: use folio_alloc_mpol() in shmem_alloc_folio()")
0d648dd5c899 ("mm: drop the 'anon_' prefix for swap-out mTHP counters")
42248b9d34ea ("mm: add docs for per-order mTHP counters and transhuge_page ABI")
d0f048ac39f6 ("mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters")
ec33687c6749 ("mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback counters")
5ed890ce5147 ("mm: vmscan: avoid split during shrink_folio_list()")
835c3a25aa37 ("mm: huge_memory: add the missing folio_test_pmd_mappable() for THP split statistics")
085ff35e7636 ("mm: memory: move mem_cgroup_charge() into alloc_anon_folio()")
19eaf44954df ("mm: thp: support allocation of anonymous multi-size THP")
3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
ddc1a5cbc05d ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
23e4883248f0 ("mm: add page_rmappable_folio() wrapper")
c36f6e6dff4d ("mempolicy trivia: slightly more consistent naming")
7f1ee4e20708 ("mempolicy trivia: delete those ancient pr_debug()s")
1cb5d11a370f ("mempolicy: fix migrate_pages(2) syscall return nr_failed")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63d9866ab01ffd0d0835d5564107283a4afc0a38 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Wed, 10 Jul 2024 10:55:01 +0100
Subject: [PATCH] mm: shmem: rename mTHP shmem counters
The legacy PMD-sized THP counters at /proc/vmstat include thp_file_alloc,
thp_file_fallback and thp_file_fallback_charge, which rather confusingly
refer to shmem THP and do not include any other types of file pages. This
is inconsistent since in most other places in the kernel, THP counters are
explicitly separated for anon, shmem and file flavours. However, we are
stuck with it since it constitutes a user ABI.
Recently, commit 66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous
shmem") added equivalent mTHP stats for shmem, keeping the same "file_"
prefix in the names. But in future, we may want to add extra stats to
cover actual file pages, at which point, it would all become very
confusing.
So let's take the opportunity to rename these new counters "shmem_" before
the change makes it upstream and the ABI becomes immutable. While we are
at it, let's improve the documentation for the legacy counters to make it
clear that they count shmem pages only.
Link: https://lkml.kernel.org/r/20240710095503.3193901-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Daniel Gomez <da.gomez(a)samsung.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index fe237825b95c..058485daf186 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -412,20 +412,23 @@ thp_collapse_alloc_failed
the allocation.
thp_file_alloc
- is incremented every time a file huge page is successfully
- allocated.
+ is incremented every time a shmem huge page is successfully
+ allocated (Note that despite being named after "file", the counter
+ measures only shmem).
thp_file_fallback
- is incremented if a file huge page is attempted to be allocated
- but fails and instead falls back to using small pages.
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages. (Note that
+ despite being named after "file", the counter measures only shmem).
thp_file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
- successful.
+ successful. (Note that despite being named after "file", the
+ counter measures only shmem).
thp_file_mapped
- is incremented every time a file huge page is mapped into
+ is incremented every time a file or shmem huge page is mapped into
user address space.
thp_split_page
@@ -496,16 +499,16 @@ swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
-file_alloc
- is incremented every time a file huge page is successfully
+shmem_alloc
+ is incremented every time a shmem huge page is successfully
allocated.
-file_fallback
- is incremented if a file huge page is attempted to be allocated
+shmem_fallback
+ is incremented if a shmem huge page is attempted to be allocated
but fails and instead falls back to using small pages.
-file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+shmem_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
successful.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index acb6ac24a07e..cff002be83eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -269,9 +269,9 @@ enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
MTHP_STAT_SWPOUT,
MTHP_STAT_SWPOUT_FALLBACK,
- MTHP_STAT_FILE_ALLOC,
- MTHP_STAT_FILE_FALLBACK,
- MTHP_STAT_FILE_FALLBACK_CHARGE,
+ MTHP_STAT_SHMEM_ALLOC,
+ MTHP_STAT_SHMEM_FALLBACK,
+ MTHP_STAT_SHMEM_FALLBACK_CHARGE,
MTHP_STAT_SPLIT,
MTHP_STAT_SPLIT_FAILED,
MTHP_STAT_SPLIT_DEFERRED,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9ec64aa2be94..f9696c94e211 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -568,9 +568,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_alloc, MTHP_STAT_FILE_ALLOC);
-DEFINE_MTHP_STAT_ATTR(file_fallback, MTHP_STAT_FILE_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_fallback_charge, MTHP_STAT_FILE_FALLBACK_CHARGE);
+DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
@@ -581,9 +581,9 @@ static struct attribute *stats_attrs[] = {
&anon_fault_fallback_charge_attr.attr,
&swpout_attr.attr,
&swpout_fallback_attr.attr,
- &file_alloc_attr.attr,
- &file_fallback_attr.attr,
- &file_fallback_charge_attr.attr,
+ &shmem_alloc_attr.attr,
+ &shmem_fallback_attr.attr,
+ &shmem_fallback_charge_attr.attr,
&split_attr.attr,
&split_failed_attr.attr,
&split_deferred_attr.attr,
diff --git a/mm/shmem.c b/mm/shmem.c
index 921d59c3d669..f24dfbd387ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1777,7 +1777,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
if (pages == HPAGE_PMD_NR)
count_vm_event(THP_FILE_FALLBACK);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(order, MTHP_STAT_FILE_FALLBACK);
+ count_mthp_stat(order, MTHP_STAT_SHMEM_FALLBACK);
#endif
order = next_order(&suitable_orders, order);
}
@@ -1804,8 +1804,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
count_vm_event(THP_FILE_FALLBACK_CHARGE);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK);
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK_CHARGE);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK_CHARGE);
#endif
}
goto unlock;
@@ -2181,7 +2181,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
if (folio_test_pmd_mappable(folio))
count_vm_event(THP_FILE_ALLOC);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_ALLOC);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_ALLOC);
#endif
goto alloced;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 63d9866ab01ffd0d0835d5564107283a4afc0a38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024073044-compound-musky-a016@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
63d9866ab01f ("mm: shmem: rename mTHP shmem counters")
f216c845f3c7 ("mm: add per-order mTHP split counters")
66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous shmem")
e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem")
3d95bc21cea5 ("mm: shmem: add THP validation for PMD-mapped THP related statistics")
6f775463d002 ("mm: shmem: use folio_alloc_mpol() in shmem_alloc_folio()")
0d648dd5c899 ("mm: drop the 'anon_' prefix for swap-out mTHP counters")
42248b9d34ea ("mm: add docs for per-order mTHP counters and transhuge_page ABI")
d0f048ac39f6 ("mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters")
ec33687c6749 ("mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback counters")
5ed890ce5147 ("mm: vmscan: avoid split during shrink_folio_list()")
835c3a25aa37 ("mm: huge_memory: add the missing folio_test_pmd_mappable() for THP split statistics")
085ff35e7636 ("mm: memory: move mem_cgroup_charge() into alloc_anon_folio()")
19eaf44954df ("mm: thp: support allocation of anonymous multi-size THP")
3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
ddc1a5cbc05d ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
23e4883248f0 ("mm: add page_rmappable_folio() wrapper")
c36f6e6dff4d ("mempolicy trivia: slightly more consistent naming")
7f1ee4e20708 ("mempolicy trivia: delete those ancient pr_debug()s")
1cb5d11a370f ("mempolicy: fix migrate_pages(2) syscall return nr_failed")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63d9866ab01ffd0d0835d5564107283a4afc0a38 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Wed, 10 Jul 2024 10:55:01 +0100
Subject: [PATCH] mm: shmem: rename mTHP shmem counters
The legacy PMD-sized THP counters at /proc/vmstat include thp_file_alloc,
thp_file_fallback and thp_file_fallback_charge, which rather confusingly
refer to shmem THP and do not include any other types of file pages. This
is inconsistent since in most other places in the kernel, THP counters are
explicitly separated for anon, shmem and file flavours. However, we are
stuck with it since it constitutes a user ABI.
Recently, commit 66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous
shmem") added equivalent mTHP stats for shmem, keeping the same "file_"
prefix in the names. But in future, we may want to add extra stats to
cover actual file pages, at which point, it would all become very
confusing.
So let's take the opportunity to rename these new counters "shmem_" before
the change makes it upstream and the ABI becomes immutable. While we are
at it, let's improve the documentation for the legacy counters to make it
clear that they count shmem pages only.
Link: https://lkml.kernel.org/r/20240710095503.3193901-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Daniel Gomez <da.gomez(a)samsung.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index fe237825b95c..058485daf186 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -412,20 +412,23 @@ thp_collapse_alloc_failed
the allocation.
thp_file_alloc
- is incremented every time a file huge page is successfully
- allocated.
+ is incremented every time a shmem huge page is successfully
+ allocated (Note that despite being named after "file", the counter
+ measures only shmem).
thp_file_fallback
- is incremented if a file huge page is attempted to be allocated
- but fails and instead falls back to using small pages.
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages. (Note that
+ despite being named after "file", the counter measures only shmem).
thp_file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
- successful.
+ successful. (Note that despite being named after "file", the
+ counter measures only shmem).
thp_file_mapped
- is incremented every time a file huge page is mapped into
+ is incremented every time a file or shmem huge page is mapped into
user address space.
thp_split_page
@@ -496,16 +499,16 @@ swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
-file_alloc
- is incremented every time a file huge page is successfully
+shmem_alloc
+ is incremented every time a shmem huge page is successfully
allocated.
-file_fallback
- is incremented if a file huge page is attempted to be allocated
+shmem_fallback
+ is incremented if a shmem huge page is attempted to be allocated
but fails and instead falls back to using small pages.
-file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+shmem_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
successful.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index acb6ac24a07e..cff002be83eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -269,9 +269,9 @@ enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
MTHP_STAT_SWPOUT,
MTHP_STAT_SWPOUT_FALLBACK,
- MTHP_STAT_FILE_ALLOC,
- MTHP_STAT_FILE_FALLBACK,
- MTHP_STAT_FILE_FALLBACK_CHARGE,
+ MTHP_STAT_SHMEM_ALLOC,
+ MTHP_STAT_SHMEM_FALLBACK,
+ MTHP_STAT_SHMEM_FALLBACK_CHARGE,
MTHP_STAT_SPLIT,
MTHP_STAT_SPLIT_FAILED,
MTHP_STAT_SPLIT_DEFERRED,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9ec64aa2be94..f9696c94e211 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -568,9 +568,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_alloc, MTHP_STAT_FILE_ALLOC);
-DEFINE_MTHP_STAT_ATTR(file_fallback, MTHP_STAT_FILE_FALLBACK);
-DEFINE_MTHP_STAT_ATTR(file_fallback_charge, MTHP_STAT_FILE_FALLBACK_CHARGE);
+DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
@@ -581,9 +581,9 @@ static struct attribute *stats_attrs[] = {
&anon_fault_fallback_charge_attr.attr,
&swpout_attr.attr,
&swpout_fallback_attr.attr,
- &file_alloc_attr.attr,
- &file_fallback_attr.attr,
- &file_fallback_charge_attr.attr,
+ &shmem_alloc_attr.attr,
+ &shmem_fallback_attr.attr,
+ &shmem_fallback_charge_attr.attr,
&split_attr.attr,
&split_failed_attr.attr,
&split_deferred_attr.attr,
diff --git a/mm/shmem.c b/mm/shmem.c
index 921d59c3d669..f24dfbd387ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1777,7 +1777,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
if (pages == HPAGE_PMD_NR)
count_vm_event(THP_FILE_FALLBACK);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(order, MTHP_STAT_FILE_FALLBACK);
+ count_mthp_stat(order, MTHP_STAT_SHMEM_FALLBACK);
#endif
order = next_order(&suitable_orders, order);
}
@@ -1804,8 +1804,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
count_vm_event(THP_FILE_FALLBACK_CHARGE);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK);
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK_CHARGE);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK_CHARGE);
#endif
}
goto unlock;
@@ -2181,7 +2181,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
if (folio_test_pmd_mappable(folio))
count_vm_event(THP_FILE_ALLOC);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_ALLOC);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_ALLOC);
#endif
goto alloced;
}