The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ff7c76f66d8bad4e694c264c789249e1d3a8205d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16782668406257(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
ff7c76f66d8b ("powerpc/boot: Don't always pass -mcpu=powerpc when building 32-bit uImage")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ff7c76f66d8bad4e694c264c789249e1d3a8205d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org>
Date: Wed, 25 Jan 2023 08:39:00 +0100
Subject: [PATCH] powerpc/boot: Don't always pass -mcpu=powerpc when building
32-bit uImage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When CONFIG_TARGET_CPU is specified then pass its value to the compiler
-mcpu option. This fixes following build error when building kernel with
powerpc e500 SPE capable cross compilers:
BOOTAS arch/powerpc/boot/crt0.o
powerpc-linux-gnuspe-gcc: error: unrecognized argument in option ‘-mcpu=powerpc’
powerpc-linux-gnuspe-gcc: note: valid arguments to ‘-mcpu=’ are: 8540 8548 native
make[1]: *** [arch/powerpc/boot/Makefile:231: arch/powerpc/boot/crt0.o] Error 1
Similar change was already introduced for the main powerpc Makefile in
commit 446cda1b21d9 ("powerpc/32: Don't always pass -mcpu=powerpc to the
compiler").
Fixes: 40a75584e526 ("powerpc/boot: Build wrapper for an appropriate CPU")
Cc: stable(a)vger.kernel.org # v5.19+
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/2ae3ae5887babfdacc34435bff0944b3f336100a.16746323…
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index d32d95aea5d6..295f76df13b5 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -39,13 +39,19 @@ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
$(LINUXINCLUDE)
ifdef CONFIG_PPC64_BOOT_WRAPPER
-ifdef CONFIG_CPU_LITTLE_ENDIAN
-BOOTCFLAGS += -m64 -mcpu=powerpc64le
+BOOTCFLAGS += -m64
else
-BOOTCFLAGS += -m64 -mcpu=powerpc64
+BOOTCFLAGS += -m32
endif
+
+ifdef CONFIG_TARGET_CPU_BOOL
+BOOTCFLAGS += -mcpu=$(CONFIG_TARGET_CPU)
+else ifdef CONFIG_PPC64_BOOT_WRAPPER
+ifdef CONFIG_CPU_LITTLE_ENDIAN
+BOOTCFLAGS += -mcpu=powerpc64le
else
-BOOTCFLAGS += -m32 -mcpu=powerpc
+BOOTCFLAGS += -mcpu=powerpc64
+endif
endif
BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
The patch below does not apply to the 6.2-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.2.y
git checkout FETCH_HEAD
git cherry-pick -x ff7c76f66d8bad4e694c264c789249e1d3a8205d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167826683916519(a)kroah.com' --subject-prefix 'PATCH 6.2.y' HEAD^..
Possible dependencies:
ff7c76f66d8b ("powerpc/boot: Don't always pass -mcpu=powerpc when building 32-bit uImage")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ff7c76f66d8bad4e694c264c789249e1d3a8205d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org>
Date: Wed, 25 Jan 2023 08:39:00 +0100
Subject: [PATCH] powerpc/boot: Don't always pass -mcpu=powerpc when building
32-bit uImage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When CONFIG_TARGET_CPU is specified then pass its value to the compiler
-mcpu option. This fixes following build error when building kernel with
powerpc e500 SPE capable cross compilers:
BOOTAS arch/powerpc/boot/crt0.o
powerpc-linux-gnuspe-gcc: error: unrecognized argument in option ‘-mcpu=powerpc’
powerpc-linux-gnuspe-gcc: note: valid arguments to ‘-mcpu=’ are: 8540 8548 native
make[1]: *** [arch/powerpc/boot/Makefile:231: arch/powerpc/boot/crt0.o] Error 1
Similar change was already introduced for the main powerpc Makefile in
commit 446cda1b21d9 ("powerpc/32: Don't always pass -mcpu=powerpc to the
compiler").
Fixes: 40a75584e526 ("powerpc/boot: Build wrapper for an appropriate CPU")
Cc: stable(a)vger.kernel.org # v5.19+
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/2ae3ae5887babfdacc34435bff0944b3f336100a.16746323…
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index d32d95aea5d6..295f76df13b5 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -39,13 +39,19 @@ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
$(LINUXINCLUDE)
ifdef CONFIG_PPC64_BOOT_WRAPPER
-ifdef CONFIG_CPU_LITTLE_ENDIAN
-BOOTCFLAGS += -m64 -mcpu=powerpc64le
+BOOTCFLAGS += -m64
else
-BOOTCFLAGS += -m64 -mcpu=powerpc64
+BOOTCFLAGS += -m32
endif
+
+ifdef CONFIG_TARGET_CPU_BOOL
+BOOTCFLAGS += -mcpu=$(CONFIG_TARGET_CPU)
+else ifdef CONFIG_PPC64_BOOT_WRAPPER
+ifdef CONFIG_CPU_LITTLE_ENDIAN
+BOOTCFLAGS += -mcpu=powerpc64le
else
-BOOTCFLAGS += -m32 -mcpu=powerpc
+BOOTCFLAGS += -mcpu=powerpc64
+endif
endif
BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
Andrew, this should replace these patches in mm-hotfixes-unstable:
400922513f30 ("maple_tree: fix mas_skip_node() end slot detection")
4d4ec28ef3a4 ("test_maple_tree: add more testing for mas_empty_area()")
mas_empty_area() was incorrectly returning an error when there was room.
The issue was tracked down to mas_skip_node() using the incorrect
end-of-slot count. Instead of using the nodes hard limit, the limit of
data should be used.
mas_skip_node() was also setting the min and max to that of the child
node, which was unnecessary. Within these limits being set, there was
also a bug that corrupted the maple state's max if the offset was set to
the maximum node pivot. The bug was without consequence unless there
was a sufficient gap in the next child node which would cause an error
to be returned.
This patch set fixes these errors by removing the limit setting from
mas_skip_node() and uses the mas_data_end() for slot limits, and adds
tests for all failures discovered.
Liam R. Howlett (2):
maple_tree: Fix mas_skip_node() end slot detection
test_maple_tree: Add more testing for mas_empty_area()
lib/maple_tree.c | 24 +++++-----------------
lib/test_maple_tree.c | 48 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 19 deletions(-)
--
2.39.2
From: Eric Biggers <ebiggers(a)google.com>
When writing a page from an encrypted file that is using
filesystem-layer encryption (not inline encryption), ext4 encrypts the
pagecache page into a bounce page, then writes the bounce page.
It also passes the bounce page to wbc_account_cgroup_owner(). That's
incorrect, because the bounce page is a newly allocated temporary page
that doesn't have the memory cgroup of the original pagecache page.
This makes wbc_account_cgroup_owner() not account the I/O to the owner
of the pagecache page as it should.
Fix this by always passing the pagecache page to
wbc_account_cgroup_owner().
Fixes: 001e4a8775f6 ("ext4: implement cgroup writeback support")
Cc: stable(a)vger.kernel.org
Reported-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/ext4/page-io.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index beaec6d81074a..1e4db96a04e63 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -409,7 +409,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
static void io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
- struct page *page,
+ struct page *pagecache_page,
+ struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
@@ -421,10 +422,11 @@ static void io_submit_add_bh(struct ext4_io_submit *io,
}
if (io->io_bio == NULL)
io_submit_init_bio(io, bh);
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
+ bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
+ wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
}
@@ -561,8 +563,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- io_submit_add_bh(io, inode,
- bounce_page ? bounce_page : page, bh);
+ io_submit_add_bh(io, inode, page, bounce_page, bh);
} while ((bh = bh->b_this_page) != head);
unlock:
unlock_page(page);
base-commit: 6d796c50f84ca79f1722bb131799e5a5710c4700
--
2.39.1
commit fdaf88531cfd17b2a710cceb3141ef6f9085ff40 on 5.15.y
When we backport dadd0dcaa67d ("net/ulp: prevent ULP without clone op from
entering the LISTEN status"), we have accidentally backported a part of
7a7160edf1bf ("net: Return errno in sk->sk_prot->get_port().") and removed
err = -EADDRINUSE in inet_csk_listen_start().
Thus, listen() no longer returns -EADDRINUSE even if ->get_port() failed
as reported in [0].
We set -EADDRINUSE to err just before ->get_port() to fix the regression.
[0]: https://lore.kernel.org/stable/EF8A45D0-768A-4CD5-9A8A-0FA6E610ABF7@winter.…
Reported-by: Winter <winter(a)winter.cafe>
Signed-off-by: Kuniyuki Iwashima <kuniyu(a)amazon.com>
---
net/ipv4/inet_connection_sock.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6a1774b4609b..c7306bbc13df 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -925,6 +925,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
+ err = -EADDRINUSE;
sk_state_store(sk, TCP_LISTEN);
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);
--
2.30.2
The quilt patch titled
Subject: mm/damon/paddr: fix folio_size() call after folio_put() in damon_pa_young()
has been removed from the -mm tree. Its filename was
mm-damon-paddr-fix-folio_size-call-after-folio_put-in-damon_pa_young.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/paddr: fix folio_size() call after folio_put() in damon_pa_young()
Date: Sat, 4 Mar 2023 19:39:48 +0000
Patch series "mm/damon/paddr: Fix folio-use-after-put bugs".
There are two folio accesses after folio_put() in mm/damon/paddr.c file.
Fix those.
This patch (of 2):
damon_pa_young() is accessing a folio via folio_size() after folio_put()
for the folio has invoked. Fix it.
Link: https://lkml.kernel.org/r/20230304193949.296391-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20230304193949.296391-2-sj@kernel.org
Fixes: 397b0c3a584b ("mm/damon/paddr: remove folio_sz field from damon_pa_access_chk_result")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [6.2.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/damon/paddr.c~mm-damon-paddr-fix-folio_size-call-after-folio_put-in-damon_pa_young
+++ a/mm/damon/paddr.c
@@ -130,7 +130,6 @@ static bool damon_pa_young(unsigned long
accessed = false;
else
accessed = true;
- folio_put(folio);
goto out;
}
@@ -144,10 +143,10 @@ static bool damon_pa_young(unsigned long
if (need_lock)
folio_unlock(folio);
- folio_put(folio);
out:
*folio_sz = folio_size(folio);
+ folio_put(folio);
return accessed;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
The quilt patch titled
Subject: ocfs2: fix data corruption after failed write
has been removed from the -mm tree. Its filename was
ocfs2-fix-data-corruption-after-failed-write.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Jan Kara via Ocfs2-devel <ocfs2-devel(a)oss.oracle.com>
Subject: ocfs2: fix data corruption after failed write
Date: Thu, 2 Mar 2023 16:38:43 +0100
When buffered write fails to copy data into underlying page cache page,
ocfs2_write_end_nolock() just zeroes out and dirties the page. This can
leave dirty page beyond EOF and if page writeback tries to write this page
before write succeeds and expands i_size, page gets into inconsistent
state where page dirty bit is clear but buffer dirty bits stay set
resulting in page data never getting written and so data copied to the
page is lost. Fix the problem by invalidating page beyond EOF after
failed write.
Link: https://lkml.kernel.org/r/20230302153843.18499-1-jack@suse.cz
Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/fs/ocfs2/aops.c~ocfs2-fix-data-corruption-after-failed-write
+++ a/fs/ocfs2/aops.c
@@ -1977,11 +1977,26 @@ int ocfs2_write_end_nolock(struct addres
}
if (unlikely(copied < len) && wc->w_target_page) {
+ loff_t new_isize;
+
if (!PageUptodate(wc->w_target_page))
copied = 0;
- ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
- start+len);
+ new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
+ if (new_isize > page_offset(wc->w_target_page))
+ ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+ start+len);
+ else {
+ /*
+ * When page is fully beyond new isize (data copy
+ * failed), do not bother zeroing the page. Invalidate
+ * it instead so that writeback does not get confused
+ * put page & buffer dirty bits into inconsistent
+ * state.
+ */
+ block_invalidate_folio(page_folio(wc->w_target_page),
+ 0, PAGE_SIZE);
+ }
}
if (wc->w_target_page)
flush_dcache_page(wc->w_target_page);
_
Patches currently in -mm which might be from ocfs2-devel(a)oss.oracle.com are
The quilt patch titled
Subject: mm: teach mincore_hugetlb about pte markers
has been removed from the -mm tree. Its filename was
mm-teach-mincore_hugetlb-about-pte-markers.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: James Houghton <jthoughton(a)google.com>
Subject: mm: teach mincore_hugetlb about pte markers
Date: Thu, 2 Mar 2023 22:24:04 +0000
By checking huge_pte_none(), we incorrectly classify PTE markers as
"present". Instead, check huge_pte_none_mostly(), classifying PTE markers
the same as if the PTE were completely blank.
PTE markers, unlike other kinds of swap entries, don't reference any
physical page and don't indicate that a physical page was mapped
previously. As such, treat them as non-present for the sake of mincore().
Link: https://lkml.kernel.org/r/20230302222404.175303-1-jthoughton@google.com
Fixes: 5c041f5d1f23 ("mm: teach core mm about pte markers")
Signed-off-by: James Houghton <jthoughton(a)google.com>
Acked-by: Peter Xu <peterx(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: James Houghton <jthoughton(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/mincore.c~mm-teach-mincore_hugetlb-about-pte-markers
+++ a/mm/mincore.c
@@ -33,7 +33,7 @@ static int mincore_hugetlb(pte_t *pte, u
* Hugepages under user process are always in RAM and never
* swapped out, but theoretically it needs to be checked.
*/
- present = pte && !huge_pte_none(huge_ptep_get(pte));
+ present = pte && !huge_pte_none_mostly(huge_ptep_get(pte));
for (; addr != end; vec++, addr += PAGE_SIZE)
*vec = present;
walk->private = vec;
_
Patches currently in -mm which might be from jthoughton(a)google.com are
When ath11k runs into internal errors upon suspend,
it returns an error code to pci_pm_suspend, which
aborts the entire system suspend.
The driver should not abort system suspend, but should
keep its internal errors to itself, and allow the system
to suspend. Otherwise, a user can suspend a laptop
by closing the lid and sealing it into a case, assuming
that is will suspend, rather than heating up and draining
the battery when in transit.
In practice, the ath11k device seems to have plenty of transient
errors, and subsequent suspend cycles after this failure
often succeed.
https://bugzilla.kernel.org/show_bug.cgi?id=216968
Fixes: d1b0c33850d29 ("ath11k: implement suspend for QCA6390 PCI devices")
Signed-off-by: Len Brown <len.brown(a)intel.com>
Cc: Carl Huang <cjhuang(a)codeaurora.org>
Cc: Kalle Valo <kvalo(a)codeaurora.org>
Cc: stable(a)vger.kernel.org
---
drivers/net/wireless/ath/ath11k/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 99cf3357c66e..3c6005ab9a71 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -979,7 +979,7 @@ static __maybe_unused int ath11k_pci_pm_suspend(struct device *dev)
if (ret)
ath11k_warn(ab, "failed to suspend core: %d\n", ret);
- return ret;
+ return 0;
}
static __maybe_unused int ath11k_pci_pm_resume(struct device *dev)
--
2.37.2
The patch titled
Subject: mm/ksm: fix race with ksm_exit() in VMA iteration
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-ksm-fix-race-with-ksm_exit-in-vma-iteration.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Subject: mm/ksm: fix race with ksm_exit() in VMA iteration
Date: Tue, 7 Mar 2023 15:59:51 -0500
ksm_exit() may remove the mm from the ksm_scan between the unlocking of
the ksm_mmlist and the start of the VMA iteration. This results in the
mmap_read_lock() not being taken and a report from lockdep that the mm
isn't locked in the maple tree code.
Fix the race by checking if this mm has been removed before iterating the
VMAs. __ksm_exit() uses the mmap lock to synchronize the freeing of an
mm, so it is safe to keep iterating over the VMAs when it is going to be
freed.
This change will slow down the mm exit during the race condition, but will
speed up the non-race scenarios iteration over the VMA list, which should
be much more common.
Link: https://lkml.kernel.org/r/20230307205951.2465275-1-Liam.Howlett@oracle.com
Fixes: a5f18ba07276 ("mm/ksm: use vma iterators instead of vma linked list")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Link: https://lore.kernel.org/lkml/ZAdUUhSbaa6fHS36@xpf.sh.intel.com/
Reported-by: <syzbot+2ee18845e89ae76342c5(a)syzkaller.appspotmail.com>
Link: https://syzkaller.appspot.com/bug?id=64a3e95957cd3deab99df7cd7b5a9475af92c9…
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <heng.su(a)intel.com>
Cc: Pengfei Xu <pengfei.xu(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/ksm.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/mm/ksm.c~mm-ksm-fix-race-with-ksm_exit-in-vma-iteration
+++ a/mm/ksm.c
@@ -988,9 +988,10 @@ static int unmerge_and_remove_all_rmap_i
mm = mm_slot->slot.mm;
mmap_read_lock(mm);
+ if (ksm_test_exit(mm))
+ goto mm_exiting;
+
for_each_vma(vmi, vma) {
- if (ksm_test_exit(mm))
- break;
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
continue;
err = unmerge_ksm_pages(vma,
@@ -999,6 +1000,7 @@ static int unmerge_and_remove_all_rmap_i
goto error;
}
+mm_exiting:
remove_trailing_rmap_items(&mm_slot->rmap_list);
mmap_read_unlock(mm);
_
Patches currently in -mm which might be from Liam.Howlett(a)oracle.com are
maple_tree-fix-mas_skip_node-end-slot-detection.patch
test_maple_tree-add-more-testing-for-mas_empty_area.patch
mm-ksm-fix-race-with-ksm_exit-in-vma-iteration.patch
maple_tree-be-more-cautious-about-dead-nodes.patch
maple_tree-detect-dead-nodes-in-mas_start.patch
maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch
maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch
maple_tree-fix-write-memory-barrier-of-nodes-once-dead-for-rcu-mode.patch
maple_tree-add-smp_rmb-to-dead-node-detection.patch
maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch
mm-enable-maple-tree-rcu-mode-by-default.patch
The patch titled
Subject: nilfs2: fix kernel-infoleak in nilfs_ioctl_wrap_copy()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-fix-kernel-infoleak-in-nilfs_ioctl_wrap_copy.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix kernel-infoleak in nilfs_ioctl_wrap_copy()
Date: Tue, 7 Mar 2023 17:55:48 +0900
The ioctl helper function nilfs_ioctl_wrap_copy(), which exchanges a
metadata array to/from user space, may copy uninitialized buffer regions
to user space memory for read-only ioctl commands NILFS_IOCTL_GET_SUINFO
and NILFS_IOCTL_GET_CPINFO.
This can occur when the element size of the user space metadata given by
the v_size member of the argument nilfs_argv structure is larger than the
size of the metadata element (nilfs_suinfo structure or nilfs_cpinfo
structure) on the file system side.
KMSAN-enabled kernels detect this issue as follows:
BUG: KMSAN: kernel-infoleak in instrument_copy_to_user
include/linux/instrumented.h:121 [inline]
BUG: KMSAN: kernel-infoleak in _copy_to_user+0xc0/0x100 lib/usercopy.c:33
instrument_copy_to_user include/linux/instrumented.h:121 [inline]
_copy_to_user+0xc0/0x100 lib/usercopy.c:33
copy_to_user include/linux/uaccess.h:169 [inline]
nilfs_ioctl_wrap_copy+0x6fa/0xc10 fs/nilfs2/ioctl.c:99
nilfs_ioctl_get_info fs/nilfs2/ioctl.c:1173 [inline]
nilfs_ioctl+0x2402/0x4450 fs/nilfs2/ioctl.c:1290
nilfs_compat_ioctl+0x1b8/0x200 fs/nilfs2/ioctl.c:1343
__do_compat_sys_ioctl fs/ioctl.c:968 [inline]
__se_compat_sys_ioctl+0x7dd/0x1000 fs/ioctl.c:910
__ia32_compat_sys_ioctl+0x93/0xd0 fs/ioctl.c:910
do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline]
__do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178
do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203
do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246
entry_SYSENTER_compat_after_hwframe+0x70/0x82
Uninit was created at:
__alloc_pages+0x9f6/0xe90 mm/page_alloc.c:5572
alloc_pages+0xab0/0xd80 mm/mempolicy.c:2287
__get_free_pages+0x34/0xc0 mm/page_alloc.c:5599
nilfs_ioctl_wrap_copy+0x223/0xc10 fs/nilfs2/ioctl.c:74
nilfs_ioctl_get_info fs/nilfs2/ioctl.c:1173 [inline]
nilfs_ioctl+0x2402/0x4450 fs/nilfs2/ioctl.c:1290
nilfs_compat_ioctl+0x1b8/0x200 fs/nilfs2/ioctl.c:1343
__do_compat_sys_ioctl fs/ioctl.c:968 [inline]
__se_compat_sys_ioctl+0x7dd/0x1000 fs/ioctl.c:910
__ia32_compat_sys_ioctl+0x93/0xd0 fs/ioctl.c:910
do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline]
__do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178
do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203
do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246
entry_SYSENTER_compat_after_hwframe+0x70/0x82
Bytes 16-127 of 3968 are uninitialized
...
This eliminates the leak issue by initializing the page allocated as
buffer using get_zeroed_page().
Link: https://lkml.kernel.org/r/20230307085548.6290-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+132fdd2f1e1805fdc591(a)syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/000000000000a5bd2d05f63f04ae@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/ioctl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/nilfs2/ioctl.c~nilfs2-fix-kernel-infoleak-in-nilfs_ioctl_wrap_copy
+++ a/fs/nilfs2/ioctl.c
@@ -71,7 +71,7 @@ static int nilfs_ioctl_wrap_copy(struct
if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
return -EINVAL;
- buf = (void *)__get_free_pages(GFP_NOFS, 0);
+ buf = (void *)get_zeroed_page(GFP_NOFS);
if (unlikely(!buf))
return -ENOMEM;
maxmembs = PAGE_SIZE / argv->v_size;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-fix-kernel-infoleak-in-nilfs_ioctl_wrap_copy.patch
The patch titled
Subject: test_maple_tree: add more testing for mas_empty_area()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
test_maple_tree-add-more-testing-for-mas_empty_area.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Subject: test_maple_tree: add more testing for mas_empty_area()
Date: Tue, 7 Mar 2023 13:02:47 -0500
Test robust filling of an entire area of the tree, then test one beyond.
This is to test the walking back up the tree at the end of nodes and error
condition. Test inspired by the reproducer code provided by Snild Dolkow.
The last test in the function tests for the case of a corrupted maple
state caused by the incorrect limits set during mas_skip_node(). There
needs to be a gap in the second last child and last child, but the search
must rule out the second last child's gap. This would avoid correcting
the maple state to the correct max limit and return an error.
Link: https://lkml.kernel.org/r/20230307180247.2220303-3-Liam.Howlett@oracle.com
Cc: Snild Dolkow <snild(a)sony.com>
Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.…
Fixes: e15e06a83923 ("lib/test_maple_tree: add testing for maple tree")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Peng Zhang <zhangpeng.00(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/test_maple_tree.c | 48 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
--- a/lib/test_maple_tree.c~test_maple_tree-add-more-testing-for-mas_empty_area
+++ a/lib/test_maple_tree.c
@@ -2670,6 +2670,49 @@ static noinline void check_empty_area_wi
rcu_read_unlock();
}
+static noinline void check_empty_area_fill(struct maple_tree *mt)
+{
+ const unsigned long max = 0x25D78000;
+ unsigned long size;
+ int loop, shift;
+ MA_STATE(mas, mt, 0, 0);
+
+ mt_set_non_kernel(99999);
+ for (shift = 12; shift <= 16; shift++) {
+ loop = 5000;
+ size = 1 << shift;
+ while (loop--) {
+ mas_set(&mas, 0);
+ mas_lock(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != 0);
+ MT_BUG_ON(mt, mas.last != mas.index + size - 1);
+ mas_store_gfp(&mas, (void *)size, GFP_KERNEL);
+ mas_unlock(&mas);
+ mas_reset(&mas);
+ }
+ }
+
+ /* No space left. */
+ size = 0x1000;
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != -EBUSY);
+ rcu_read_unlock();
+
+ /* Fill a depth 3 node to the maximum */
+ for (unsigned long i = 629440511; i <= 629440800; i += 6)
+ mtree_store_range(mt, i, i + 5, (void *)i, GFP_KERNEL);
+ /* Make space in the second-last depth 4 node */
+ mtree_erase(mt, 631668735);
+ /* Make space in the last depth 4 node */
+ mtree_erase(mt, 629506047);
+ mas_reset(&mas);
+ /* Search from just after the gap in the second-last depth 4 */
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area(&mas, 629506048, 690000000, 0x5000) != 0);
+ rcu_read_unlock();
+ mt_set_non_kernel(0);
+}
+
static DEFINE_MTREE(tree);
static int maple_tree_seed(void)
{
@@ -2926,6 +2969,11 @@ static int maple_tree_seed(void)
check_empty_area_window(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_empty_area_fill(&tree);
+ mtree_destroy(&tree);
+
+
#if defined(BENCH)
skip:
#endif
_
Patches currently in -mm which might be from Liam.Howlett(a)oracle.com are
maple_tree-fix-mas_skip_node-end-slot-detection.patch
test_maple_tree-add-more-testing-for-mas_empty_area.patch
maple_tree-be-more-cautious-about-dead-nodes.patch
maple_tree-detect-dead-nodes-in-mas_start.patch
maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch
maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch
maple_tree-fix-write-memory-barrier-of-nodes-once-dead-for-rcu-mode.patch
maple_tree-add-smp_rmb-to-dead-node-detection.patch
maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch
mm-enable-maple-tree-rcu-mode-by-default.patch
The patch titled
Subject: maple_tree: fix mas_skip_node() end slot detection
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
maple_tree-fix-mas_skip_node-end-slot-detection.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Subject: maple_tree: fix mas_skip_node() end slot detection
Date: Tue, 7 Mar 2023 13:02:46 -0500
Patch series "Fix mas_skip_node() for mas_empty_area()", v2.
mas_empty_area() was incorrectly returning an error when there was room.
The issue was tracked down to mas_skip_node() using the incorrect
end-of-slot count. Instead of using the nodes hard limit, the limit of
data should be used.
mas_skip_node() was also setting the min and max to that of the child
node, which was unnecessary. Within these limits being set, there was
also a bug that corrupted the maple state's max if the offset was set to
the maximum node pivot. The bug was without consequence unless there was
a sufficient gap in the next child node which would cause an error to be
returned.
This patch set fixes these errors by removing the limit setting from
mas_skip_node() and uses the mas_data_end() for slot limits, and adds
tests for all failures discovered.
This patch (of 2):
mas_skip_node() is used to move the maple state to the node with a higher
limit. It does this by walking up the tree and increasing the slot count.
Since slot count may not be able to be increased, it may need to walk up
multiple times to find room to walk right to a higher limit node. The
limit of slots that was being used was the node limit and not the last
location of data in the node. This would cause the maple state to be
shifted outside actual data and enter an error state, thus returning
-EBUSY.
The result of the incorrect error state means that mas_awalk() would
return an error instead of finding the allocation space.
The fix is to use mas_data_end() in mas_skip_node() to detect the nodes
data end point and continue walking the tree up until it is safe to move
to a node with a higher limit.
The walk up the tree also sets the maple state limits so remove the buggy
code from mas_skip_node(). Setting the limits had the unfortunate side
effect of triggering another bug if the parent node was full and the there
was no suitable gap in the second last child, but room in the next child.
mas_skip_node() may also be passed a maple state in an error state from
mas_anode_descend() when no allocations are available. Return on such an
error state immediately.
Link: https://lkml.kernel.org/r/20230307180247.2220303-1-Liam.Howlett@oracle.com
Link: https://lkml.kernel.org/r/20230307180247.2220303-2-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Snild Dolkow <snild(a)sony.com>
Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.…
Cc: Peng Zhang <zhangpeng.00(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 24 +++++-------------------
1 file changed, 5 insertions(+), 19 deletions(-)
--- a/lib/maple_tree.c~maple_tree-fix-mas_skip_node-end-slot-detection
+++ a/lib/maple_tree.c
@@ -5099,35 +5099,21 @@ static inline bool mas_rewind_node(struc
*/
static inline bool mas_skip_node(struct ma_state *mas)
{
- unsigned char slot, slot_count;
- unsigned long *pivots;
- enum maple_type mt;
+ if (mas_is_err(mas))
+ return false;
- mt = mte_node_type(mas->node);
- slot_count = mt_slots[mt] - 1;
do {
if (mte_is_root(mas->node)) {
- slot = mas->offset;
- if (slot > slot_count) {
+ if (mas->offset >= mas_data_end(mas)) {
mas_set_err(mas, -EBUSY);
return false;
}
} else {
mas_ascend(mas);
- slot = mas->offset;
- mt = mte_node_type(mas->node);
- slot_count = mt_slots[mt] - 1;
}
- } while (slot > slot_count);
-
- mas->offset = ++slot;
- pivots = ma_pivots(mas_mn(mas), mt);
- if (slot > 0)
- mas->min = pivots[slot - 1] + 1;
-
- if (slot <= slot_count)
- mas->max = pivots[slot];
+ } while (mas->offset >= mas_data_end(mas));
+ mas->offset++;
return true;
}
_
Patches currently in -mm which might be from Liam.Howlett(a)oracle.com are
maple_tree-fix-mas_skip_node-end-slot-detection.patch
test_maple_tree-add-more-testing-for-mas_empty_area.patch
maple_tree-be-more-cautious-about-dead-nodes.patch
maple_tree-detect-dead-nodes-in-mas_start.patch
maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch
maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch
maple_tree-fix-write-memory-barrier-of-nodes-once-dead-for-rcu-mode.patch
maple_tree-add-smp_rmb-to-dead-node-detection.patch
maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch
mm-enable-maple-tree-rcu-mode-by-default.patch
Commit c048b60d39e1 ("mtd: core: provide unique name for nvmem device")
tries to give the nvmem device a unique name, but fails badly if the mtd
device doesn't have a "struct device" associated with it, i.e. if
CONFIG_MTD_PARTITIONED_MASTER is not set. This will result in the name
"(null)-user-otp", which is not unique. It seems the best we can do is
to use the compatible name together with a unique identifier added by
the nvmem subsystem by using NVMEM_DEVID_AUTO.
Fixes: c048b60d39e1 ("mtd: core: provide unique name for nvmem device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Michael Walle <michael(a)walle.cc>
---
drivers/mtd/mtdcore.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 0feacb9fbdac..3fe2825a85a1 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -888,8 +888,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
/* OTP nvmem will be registered on the physical device */
config.dev = mtd->dev.parent;
- config.name = kasprintf(GFP_KERNEL, "%s-%s", dev_name(&mtd->dev), compatible);
- config.id = NVMEM_DEVID_NONE;
+ config.name = compatible;
config.owner = THIS_MODULE;
config.type = NVMEM_TYPE_OTP;
config.root_only = true;
@@ -905,7 +904,6 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
nvmem = NULL;
of_node_put(np);
- kfree(config.name);
return nvmem;
}
--
2.30.2
Top of the day to you, I'm Yuliia Kadulina Deputy Chairman of the Management Board, and Personal Finance Director at Ukrsibbank Ukraine. I have a proposal for you. Kindly revert so I can fill you in on the details.
Thanks,
Yuliia
From: Valentin Schneider <vschneid(a)redhat.com>
commit 811d581194f7412eda97acc03d17fc77824b561f upstream.
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI
panic() doesn't work. The cause of that lies in the PREEMPT_RT definition
of mutex_trylock():
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
return 0;
This prevents an nmi_panic() from executing the main body of
__crash_kexec() which does the actual kexec into the kdump kernel. The
warning and return are explained by:
6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
[...]
The reasons for this are:
1) There is a potential deadlock in the slowpath
2) Another cpu which blocks on the rtmutex will boost the task
which allegedly locked the rtmutex, but that cannot work
because the hard/softirq context borrows the task context.
Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex
and replace it with an atomic variable. This is somewhat overzealous as
*some* callsites could keep using a mutex (e.g. the sysfs-facing ones
like crash_shrink_memory()), but this has the benefit of involving a
single unified lock and preventing any future NMI-related surprises.
Tested by triggering NMI panics via:
$ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
$ echo 1 > /proc/sys/kernel/unknown_nmi_panic
$ echo 1 > /proc/sys/kernel/panic
$ ipmitool power diag
Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com
Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
Signed-off-by: Valentin Schneider <vschneid(a)redhat.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: "Eric W . Biederman" <ebiederm(a)xmission.com>
Cc: Juri Lelli <jlelli(a)redhat.com>
Cc: Luis Claudio R. Goncalves <lgoncalv(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Wen Yang <wenyang.linux(a)foxmail.com>
---
kernel/kexec.c | 11 ++++-------
kernel/kexec_core.c | 20 ++++++++++----------
kernel/kexec_file.c | 4 ++--
kernel/kexec_internal.h | 15 ++++++++++++++-
4 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9c7aef8f4bb6..f0f0c6555454 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -112,13 +112,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
/*
* Because we write directly to the reserved memory region when loading
- * crash kernels we need a mutex here to prevent multiple crash kernels
- * from attempting to load simultaneously, and to prevent a crash kernel
- * from loading over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
+ * crash kernels we need a serialization here to prevent multiple crash
+ * kernels from attempting to load simultaneously.
*/
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (flags & KEXEC_ON_CRASH) {
@@ -184,7 +181,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
kimage_free(image);
out_unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index e47870f30728..7a8104d48997 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -45,7 +45,7 @@
#include <crypto/sha.h>
#include "kexec_internal.h"
-DEFINE_MUTEX(kexec_mutex);
+atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -943,7 +943,7 @@ int kexec_load_disabled;
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
- /* Take the kexec_mutex here to prevent sys_kexec_load
+ /* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@@ -951,7 +951,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
- if (mutex_trylock(&kexec_mutex)) {
+ if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
@@ -960,7 +960,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -993,13 +993,13 @@ ssize_t crash_get_memory_size(void)
{
ssize_t size = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return size;
}
@@ -1019,7 +1019,7 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
@@ -1058,7 +1058,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);
unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
@@ -1130,7 +1130,7 @@ int kernel_kexec(void)
{
int error = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
@@ -1205,7 +1205,7 @@ int kernel_kexec(void)
#endif
Unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return error;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index fff11916aba3..b9c857782ada 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -343,7 +343,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
image = NULL;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
dest_image = &kexec_image;
@@ -415,7 +415,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
kimage_free(image);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 39d30ccf8d87..49d4e3ab9c96 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,7 +15,20 @@ int kimage_is_destination_range(struct kimage *image,
int machine_kexec_post_load(struct kimage *image);
-extern struct mutex kexec_mutex;
+/*
+ * Whatever is used to serialize accesses to the kexec_crash_image needs to be
+ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
+ * "simple" atomic variable that is acquired with a cmpxchg().
+ */
+extern atomic_t __kexec_lock;
+static inline bool kexec_trylock(void)
+{
+ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
+}
+static inline void kexec_unlock(void)
+{
+ atomic_set_release(&__kexec_lock, 0);
+}
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
--
2.37.2
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 8e47363588377e1bdb65e2b020b409cfb44dd260
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819253422103(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
8e4736358837 ("thermal: intel: powerclamp: Fix cur_state for multi package system")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e47363588377e1bdb65e2b020b409cfb44dd260 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Wed, 1 Feb 2023 12:39:41 -0800
Subject: [PATCH] thermal: intel: powerclamp: Fix cur_state for multi package
system
The powerclamp cooling device cur_state shows actual idle observed by
package C-state idle counters. But the implementation is not sufficient
for multi package or multi die system. The cur_state value is incorrect.
On these systems, these counters must be read from each package/die and
somehow aggregate them. But there is no good method for aggregation.
It was not a problem when explicit CPU model addition was required to
enable intel powerclamp. In this way certain CPU models could have
been avoided. But with the removal of CPU model check with the
availability of Package C-state counters, the driver is loaded on most
of the recent systems.
For multi package/die systems, just show the actual target idle state,
the system is trying to achieve. In powerclamp this is the user set
state minus one.
Also there is no use of starting a worker thread for polling package
C-state counters and applying any compensation for multiple package
or multiple die systems.
Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: 4.14+ <stable(a)vger.kernel.org> # 4.14+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b80e25ec1261..2f4cbfdf26a0 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -57,6 +57,7 @@
static unsigned int target_mwait;
static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
/* user selected target */
static unsigned int set_target_ratio;
@@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
/* we only use compensation if all adjacent ones are good */
if (ratio == 1 &&
cal_data[ratio].confidence >= CONFIDENCE_OK &&
@@ -519,7 +523,8 @@ static int start_power_clamp(void)
control_cpu = cpumask_first(cpu_online_mask);
clamping = true;
- schedule_delayed_work(&poll_pkg_cstate_work, 0);
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
/* start one kthread worker per online cpu */
for_each_online_cpu(cpu) {
@@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- if (true == clamping)
- *state = pkg_cstate_ratio_cur;
- else
+ if (clamping) {
+ if (poll_pkg_cstate_enable)
+ *state = pkg_cstate_ratio_cur;
+ else
+ *state = set_target_ratio;
+ } else {
/* to save power, do not poll idle ratio while not clamping */
*state = -1; /* indicates invalid state */
+ }
return 0;
}
@@ -712,6 +721,9 @@ static int __init powerclamp_init(void)
goto exit_unregister;
}
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+
cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
&powerclamp_cooling_ops);
if (IS_ERR(cooling_dev)) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8e47363588377e1bdb65e2b020b409cfb44dd260
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781926219650(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8e4736358837 ("thermal: intel: powerclamp: Fix cur_state for multi package system")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e47363588377e1bdb65e2b020b409cfb44dd260 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Wed, 1 Feb 2023 12:39:41 -0800
Subject: [PATCH] thermal: intel: powerclamp: Fix cur_state for multi package
system
The powerclamp cooling device cur_state shows actual idle observed by
package C-state idle counters. But the implementation is not sufficient
for multi package or multi die system. The cur_state value is incorrect.
On these systems, these counters must be read from each package/die and
somehow aggregate them. But there is no good method for aggregation.
It was not a problem when explicit CPU model addition was required to
enable intel powerclamp. In this way certain CPU models could have
been avoided. But with the removal of CPU model check with the
availability of Package C-state counters, the driver is loaded on most
of the recent systems.
For multi package/die systems, just show the actual target idle state,
the system is trying to achieve. In powerclamp this is the user set
state minus one.
Also there is no use of starting a worker thread for polling package
C-state counters and applying any compensation for multiple package
or multiple die systems.
Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: 4.14+ <stable(a)vger.kernel.org> # 4.14+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b80e25ec1261..2f4cbfdf26a0 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -57,6 +57,7 @@
static unsigned int target_mwait;
static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
/* user selected target */
static unsigned int set_target_ratio;
@@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
/* we only use compensation if all adjacent ones are good */
if (ratio == 1 &&
cal_data[ratio].confidence >= CONFIDENCE_OK &&
@@ -519,7 +523,8 @@ static int start_power_clamp(void)
control_cpu = cpumask_first(cpu_online_mask);
clamping = true;
- schedule_delayed_work(&poll_pkg_cstate_work, 0);
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
/* start one kthread worker per online cpu */
for_each_online_cpu(cpu) {
@@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- if (true == clamping)
- *state = pkg_cstate_ratio_cur;
- else
+ if (clamping) {
+ if (poll_pkg_cstate_enable)
+ *state = pkg_cstate_ratio_cur;
+ else
+ *state = set_target_ratio;
+ } else {
/* to save power, do not poll idle ratio while not clamping */
*state = -1; /* indicates invalid state */
+ }
return 0;
}
@@ -712,6 +721,9 @@ static int __init powerclamp_init(void)
goto exit_unregister;
}
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+
cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
&powerclamp_cooling_ops);
if (IS_ERR(cooling_dev)) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 194b3348bdbb7db65375c72f3f774aee4cc6614e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678206664158120(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
803766cbf85f ("iommu/vt-d: Fix lockdep splat in intel_pasid_get_entry()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Date: Thu, 16 Feb 2023 21:08:15 +0800
Subject: [PATCH] iommu/vt-d: Fix PASID directory pointer coherency
On platforms that do not support IOMMU Extended capability bit 0
Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
any translation structures. IOMMU access goes only directly to
memory. Intel IOMMU code was missing a flush for the PASID table
directory that resulted in the unrecoverable fault as shown below.
This patch adds clflush calls whenever allocating and updating
a PASID table directory to ensure cache coherency.
On the reverse direction, there's no need to clflush the PASID directory
pointer when we deactivate a context entry in that IOMMU hardware will
not see the old PASID directory pointer after we clear the context entry.
PASID directory entries are also never freed once allocated.
DMAR: DRHD: handling fault status reg 3
DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
[fault reason 0x51] SM: Present bit in Directory Entry is clear
DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
DMAR: pasid dir entry: 0x0000000101b4e001
DMAR: pasid table entry[0]: 0x0000000000000109
DMAR: pasid table entry[1]: 0x0000000000000001
DMAR: pasid table entry[2]: 0x0000000000000000
DMAR: pasid table entry[3]: 0x0000000000000000
DMAR: pasid table entry[4]: 0x0000000000000000
DMAR: pasid table entry[5]: 0x0000000000000000
DMAR: pasid table entry[6]: 0x0000000000000000
DMAR: pasid table entry[7]: 0x0000000000000000
DMAR: PTE not present at level 4
Cc: <stable(a)vger.kernel.org>
Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reported-by: Sukumar Ghorai <sukumar.ghorai(a)intel.com>
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.inte…
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index ec964ac7d797..9d2f05cf6164 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(pasid_table->table, size);
+
return 0;
}
@@ -215,6 +218,10 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
free_pgtable_page(entries);
goto retry;
}
+ if (!ecap_coherent(info->iommu->ecap)) {
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
+ }
}
return &entries[index];
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 194b3348bdbb7db65375c72f3f774aee4cc6614e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820662222233(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Date: Thu, 16 Feb 2023 21:08:15 +0800
Subject: [PATCH] iommu/vt-d: Fix PASID directory pointer coherency
On platforms that do not support IOMMU Extended capability bit 0
Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
any translation structures. IOMMU access goes only directly to
memory. Intel IOMMU code was missing a flush for the PASID table
directory that resulted in the unrecoverable fault as shown below.
This patch adds clflush calls whenever allocating and updating
a PASID table directory to ensure cache coherency.
On the reverse direction, there's no need to clflush the PASID directory
pointer when we deactivate a context entry in that IOMMU hardware will
not see the old PASID directory pointer after we clear the context entry.
PASID directory entries are also never freed once allocated.
DMAR: DRHD: handling fault status reg 3
DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
[fault reason 0x51] SM: Present bit in Directory Entry is clear
DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
DMAR: pasid dir entry: 0x0000000101b4e001
DMAR: pasid table entry[0]: 0x0000000000000109
DMAR: pasid table entry[1]: 0x0000000000000001
DMAR: pasid table entry[2]: 0x0000000000000000
DMAR: pasid table entry[3]: 0x0000000000000000
DMAR: pasid table entry[4]: 0x0000000000000000
DMAR: pasid table entry[5]: 0x0000000000000000
DMAR: pasid table entry[6]: 0x0000000000000000
DMAR: pasid table entry[7]: 0x0000000000000000
DMAR: PTE not present at level 4
Cc: <stable(a)vger.kernel.org>
Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reported-by: Sukumar Ghorai <sukumar.ghorai(a)intel.com>
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.inte…
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index ec964ac7d797..9d2f05cf6164 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(pasid_table->table, size);
+
return 0;
}
@@ -215,6 +218,10 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
free_pgtable_page(entries);
goto retry;
}
+ if (!ecap_coherent(info->iommu->ecap)) {
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
+ }
}
return &entries[index];
mas_skip_node() is used to move the maple state to the node with a
higher limit. It does this by walking up the tree and increasing the
slot count. Since slot count may not be able to be increased, it may
need to walk up multiple times to find room to walk right to a higher
limit node. The limit of slots that was being used was the node limit
and not the last location of data in the node. This would cause the
maple state to be shifted outside actual data and enter an error state,
thus returning -EBUSY.
The result of the incorrect error state means that mas_awalk() would
return an error instead of finding the allocation space.
The fix is to use mas_data_end() in mas_skip_node() to detect the nodes
data end point and continue walking the tree up until it is safe to move
to a node with a higher limit.
mas_skip_node() may also be passed a maple state in an error state from
mas_anode_descend() when no allocations are available. Return on such
an error state immediately.
Reported-by: Snild Dolkow <snild(a)sony.com>
Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.…
Cc: <Stable(a)vger.kernel.org>
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
---
lib/maple_tree.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 2be86368237d..2efe854946d6 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5188,34 +5188,29 @@ static inline bool mas_rewind_node(struct ma_state *mas)
*/
static inline bool mas_skip_node(struct ma_state *mas)
{
- unsigned char slot, slot_count;
unsigned long *pivots;
enum maple_type mt;
- mt = mte_node_type(mas->node);
- slot_count = mt_slots[mt] - 1;
+ if (mas_is_err(mas))
+ return false;
+
do {
if (mte_is_root(mas->node)) {
- slot = mas->offset;
- if (slot > slot_count) {
+ if (mas->offset >= mas_data_end(mas)) {
mas_set_err(mas, -EBUSY);
return false;
}
} else {
mas_ascend(mas);
- slot = mas->offset;
- mt = mte_node_type(mas->node);
- slot_count = mt_slots[mt] - 1;
}
- } while (slot > slot_count);
+ } while (mas->offset >= mas_data_end(mas));
- mas->offset = ++slot;
+ mt = mte_node_type(mas->node);
pivots = ma_pivots(mas_mn(mas), mt);
- if (slot > 0)
- mas->min = pivots[slot - 1] + 1;
-
- if (slot <= slot_count)
- mas->max = pivots[slot];
+ mas->min = pivots[mas->offset] + 1;
+ mas->offset++;
+ if (mas->offset < mt_slots[mt])
+ mas->max = pivots[mas->offset];
return true;
}
--
2.39.2
The patch below does not apply to the 6.2-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.2.y
git checkout FETCH_HEAD
git cherry-pick -x 1eac28201ac0725192f5ced34192d281a06692e5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820482516243(a)kroah.com' --subject-prefix 'PATCH 6.2.y' HEAD^..
Possible dependencies:
1eac28201ac0 ("RISC-V: fix ordering of Zbb extension")
80c200b34ee8 ("RISC-V: resort all extensions in consistent orders")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1eac28201ac0725192f5ced34192d281a06692e5 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko.stuebner(a)vrull.eu>
Date: Wed, 8 Feb 2023 23:53:27 +0100
Subject: [PATCH] RISC-V: fix ordering of Zbb extension
As Andrew reported,
Zb* comes after Zi* according 27.11 "Subset Naming Convention"
so fix the ordering accordingly.
Reported-by: Andrew Jones <ajones(a)ventanamicro.com>
Signed-off-by: Heiko Stuebner <heiko.stuebner(a)vrull.eu>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Reviewed-by: Andrew Jones <ajones(a)ventanamicro.com>
Tested-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230208225328.1636017-2-heiko@sntech.de
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 420228e219f7..8400f0cc9704 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -185,9 +185,9 @@ arch_initcall(riscv_cpuinfo_init);
* New entries to this struct should follow the ordering rules described above.
*/
static struct riscv_isa_ext_data isa_ext_arr[] = {
- __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
+ __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 85636167e3206c3fbd52254fc432991cc4e90194
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16782054308810(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
85636167e320 ("drm/i915: Don't use BAR mappings for ring buffers with LLC")
fa85bfd19c26 ("drm/i915: Update the helper to set correct mapping")
e09e903a6e89 ("drm/i915/selftests: Prepare execlists and lrc selftests for obj->mm.lock removal")
17b7ab92bec3 ("drm/i915/selftests: Prepare hangcheck for obj->mm.lock removal")
d3ad29567d4e ("drm/i915/selftests: Prepare context selftest for obj->mm.lock removal")
c858ffa17716 ("drm/i915: Lock ww in ucode objects correctly")
c05258889ed4 ("drm/i915: Add igt_spinner_pin() to allow for ww locking around spinner.")
6895649bf13f ("drm/i915/selftests: Set error returns")
a0d3fdb628b8 ("drm/i915/gt: Split logical ring contexts from execlist submission")
d0d829e56674 ("drm/i915: split gen8+ flush and bb_start emission functions")
70a2b431c364 ("drm/i915/gt: Rename lrc.c to execlists_submission.c")
d33fcd798cb7 ("drm/i915/gt: Ignore dt==0 for reporting underflows")
09212e81e545 ("drm/i915/gt: Flush xcs before tgl breadcrumbs")
c10f6019d0b2 ("drm/i915/gt: Use the local HWSP offset during submission")
89db95377be4 ("drm/i915/gt: Confirm the context survives execution")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 85636167e3206c3fbd52254fc432991cc4e90194 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison(a)Intel.com>
Date: Wed, 15 Feb 2023 17:11:01 -0800
Subject: [PATCH] drm/i915: Don't use BAR mappings for ring buffers with LLC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Direction from hardware is that ring buffers should never be mapped
via the BAR on systems with LLC. There are too many caching pitfalls
due to the way BAR accesses are routed. So it is safest to just not
use it.
Signed-off-by: John Harrison <John.C.Harrison(a)Intel.com>
Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere")
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Cc: intel-gfx(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.9+
Tested-by: Jouni Högander <jouni.hogander(a)intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-3-John…
(cherry picked from commit 65c08339db1ada87afd6cfe7db8e60bb4851d919)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index fb1d2595392e..fb99143be98e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -53,7 +53,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
if (unlikely(ret))
goto err_unpin;
- if (i915_vma_is_map_and_fenceable(vma)) {
+ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
addr = (void __force *)i915_vma_pin_iomap(vma);
} else {
int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
@@ -98,7 +98,7 @@ void intel_ring_unpin(struct intel_ring *ring)
return;
i915_vma_unset_ggtt_write(vma);
- if (i915_vma_is_map_and_fenceable(vma))
+ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820515930221(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
046eca5018f8 ("vfio/type1: prevent underflow of locked_vm via exec()")
4ab4fcfce5b5 ("vfio/type1: fix vaddr_get_pfns() return in vfio_pin_page_external()")
4b6c33b32296 ("vfio/type1: Prepare for batched pinning with struct vfio_batch")
be16c1fd99f4 ("vfio/type1: Change success value of vaddr_get_pfn()")
aae7a75a821a ("vfio/type1: Add proper error unwind for vfio_iommu_replay()")
64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
bce617edecad ("mm: do page fault accounting in handle_mm_fault")
ed03d924587e ("mm/gup: use a standard migration target allocation callback")
bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware")
41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API")
19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function")
d92bbc2719bd ("mm/hugetlb: unify migration callbacks")
b4b382238ed2 ("mm/migrate: move migration helper from .h to .c")
c7073bab5772 ("mm/page_isolation: prefer the node of the source page")
3e4e28c5a8f0 ("mmap locking API: convert mmap_sem API comments")
d8ed45c5dcd4 ("mmap locking API: use coccinelle to convert mmap_sem rwsem call sites")
ca5999fde0a1 ("mm: introduce include/linux/pgtable.h")
420c2091b65a ("mm/gup: introduce pin_user_pages_locked()")
5a36f0f3f518 ("Merge tag 'vfio-v5.8-rc1' of git://github.com/awilliam/linux-vfio")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare(a)oracle.com>
Date: Tue, 31 Jan 2023 08:58:04 -0800
Subject: [PATCH] vfio/type1: prevent underflow of locked_vm via exec()
When a vfio container is preserved across exec, the task does not change,
but it gets a new mm with locked_vm=0, and loses the count from existing
dma mappings. If the user later unmaps a dma mapping, locked_vm underflows
to a large unsigned value, and a subsequent dma map request fails with
ENOMEM in __account_locked_vm.
To avoid underflow, grab and save the mm at the time a dma is mapped.
Use that mm when adjusting locked_vm, rather than re-acquiring the saved
task's mm, which may have changed. If the saved mm is dead, do nothing.
locked_vm is incremented for existing mappings in a subsequent patch.
Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Sistare <steven.sistare(a)oracle.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare…
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 144f5bb20fb8..6b757d035457 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -100,6 +100,7 @@ struct vfio_dma {
struct task_struct *task;
struct rb_root pfn_list; /* Ex-user pinned pfn list */
unsigned long *bitmap;
+ struct mm_struct *mm;
};
struct vfio_batch {
@@ -420,8 +421,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
if (!npage)
return 0;
- mm = async ? get_task_mm(dma->task) : dma->task->mm;
- if (!mm)
+ mm = dma->mm;
+ if (async && !mmget_not_zero(mm))
return -ESRCH; /* process exited */
ret = mmap_write_lock_killable(mm);
@@ -794,8 +795,8 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
struct mm_struct *mm;
int ret;
- mm = get_task_mm(dma->task);
- if (!mm)
+ mm = dma->mm;
+ if (!mmget_not_zero(mm))
return -ENODEV;
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
@@ -805,7 +806,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
ret = 0;
if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
- ret = vfio_lock_acct(dma, 1, true);
+ ret = vfio_lock_acct(dma, 1, false);
if (ret) {
put_pfn(*pfn_base, dma->prot);
if (ret == -ENOMEM)
@@ -1180,6 +1181,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
vfio_unmap_unpin(iommu, dma, true);
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
+ mmdrop(dma->mm);
vfio_dma_bitmap_free(dma);
if (dma->vaddr_invalid) {
iommu->vaddr_invalid_count--;
@@ -1664,29 +1666,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
* against the locked memory limit and we need to be able to do both
* outside of this call path as pinning can be asynchronous via the
* external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
- * task_struct and VM locked pages requires an mm_struct, however
- * holding an indefinite mm reference is not recommended, therefore we
- * only hold a reference to a task. We could hold a reference to
- * current, however QEMU uses this call path through vCPU threads,
- * which can be killed resulting in a NULL mm and failure in the unmap
- * path when called via a different thread. Avoid this problem by
- * using the group_leader as threads within the same group require
- * both CLONE_THREAD and CLONE_VM and will therefore use the same
- * mm_struct.
- *
- * Previously we also used the task for testing CAP_IPC_LOCK at the
- * time of pinning and accounting, however has_capability() makes use
- * of real_cred, a copy-on-write field, so we can't guarantee that it
- * matches group_leader, or in fact that it might not change by the
- * time it's evaluated. If a process were to call MAP_DMA with
- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
- * possibly see different results for an iommu_mapped vfio_dma vs
- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
- * time of calling MAP_DMA.
+ * task_struct. Save the group_leader so that all DMA tracking uses
+ * the same task, to make debugging easier. VM locked pages requires
+ * an mm_struct, so grab the mm in case the task dies.
*/
get_task_struct(current->group_leader);
dma->task = current->group_leader;
dma->lock_cap = capable(CAP_IPC_LOCK);
+ dma->mm = current->mm;
+ mmgrab(dma->mm);
dma->pfn_list = RB_ROOT;
@@ -3122,9 +3110,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
!(dma->prot & IOMMU_READ))
return -EPERM;
- mm = get_task_mm(dma->task);
-
- if (!mm)
+ mm = dma->mm;
+ if (!mmget_not_zero(mm))
return -EPERM;
if (kthread)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820515320650(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
046eca5018f8 ("vfio/type1: prevent underflow of locked_vm via exec()")
4ab4fcfce5b5 ("vfio/type1: fix vaddr_get_pfns() return in vfio_pin_page_external()")
4b6c33b32296 ("vfio/type1: Prepare for batched pinning with struct vfio_batch")
be16c1fd99f4 ("vfio/type1: Change success value of vaddr_get_pfn()")
aae7a75a821a ("vfio/type1: Add proper error unwind for vfio_iommu_replay()")
64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
bce617edecad ("mm: do page fault accounting in handle_mm_fault")
ed03d924587e ("mm/gup: use a standard migration target allocation callback")
bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware")
41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API")
19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function")
d92bbc2719bd ("mm/hugetlb: unify migration callbacks")
b4b382238ed2 ("mm/migrate: move migration helper from .h to .c")
c7073bab5772 ("mm/page_isolation: prefer the node of the source page")
3e4e28c5a8f0 ("mmap locking API: convert mmap_sem API comments")
d8ed45c5dcd4 ("mmap locking API: use coccinelle to convert mmap_sem rwsem call sites")
ca5999fde0a1 ("mm: introduce include/linux/pgtable.h")
420c2091b65a ("mm/gup: introduce pin_user_pages_locked()")
5a36f0f3f518 ("Merge tag 'vfio-v5.8-rc1' of git://github.com/awilliam/linux-vfio")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare(a)oracle.com>
Date: Tue, 31 Jan 2023 08:58:04 -0800
Subject: [PATCH] vfio/type1: prevent underflow of locked_vm via exec()
When a vfio container is preserved across exec, the task does not change,
but it gets a new mm with locked_vm=0, and loses the count from existing
dma mappings. If the user later unmaps a dma mapping, locked_vm underflows
to a large unsigned value, and a subsequent dma map request fails with
ENOMEM in __account_locked_vm.
To avoid underflow, grab and save the mm at the time a dma is mapped.
Use that mm when adjusting locked_vm, rather than re-acquiring the saved
task's mm, which may have changed. If the saved mm is dead, do nothing.
locked_vm is incremented for existing mappings in a subsequent patch.
Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Sistare <steven.sistare(a)oracle.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare…
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 144f5bb20fb8..6b757d035457 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -100,6 +100,7 @@ struct vfio_dma {
struct task_struct *task;
struct rb_root pfn_list; /* Ex-user pinned pfn list */
unsigned long *bitmap;
+ struct mm_struct *mm;
};
struct vfio_batch {
@@ -420,8 +421,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
if (!npage)
return 0;
- mm = async ? get_task_mm(dma->task) : dma->task->mm;
- if (!mm)
+ mm = dma->mm;
+ if (async && !mmget_not_zero(mm))
return -ESRCH; /* process exited */
ret = mmap_write_lock_killable(mm);
@@ -794,8 +795,8 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
struct mm_struct *mm;
int ret;
- mm = get_task_mm(dma->task);
- if (!mm)
+ mm = dma->mm;
+ if (!mmget_not_zero(mm))
return -ENODEV;
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
@@ -805,7 +806,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
ret = 0;
if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
- ret = vfio_lock_acct(dma, 1, true);
+ ret = vfio_lock_acct(dma, 1, false);
if (ret) {
put_pfn(*pfn_base, dma->prot);
if (ret == -ENOMEM)
@@ -1180,6 +1181,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
vfio_unmap_unpin(iommu, dma, true);
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
+ mmdrop(dma->mm);
vfio_dma_bitmap_free(dma);
if (dma->vaddr_invalid) {
iommu->vaddr_invalid_count--;
@@ -1664,29 +1666,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
* against the locked memory limit and we need to be able to do both
* outside of this call path as pinning can be asynchronous via the
* external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
- * task_struct and VM locked pages requires an mm_struct, however
- * holding an indefinite mm reference is not recommended, therefore we
- * only hold a reference to a task. We could hold a reference to
- * current, however QEMU uses this call path through vCPU threads,
- * which can be killed resulting in a NULL mm and failure in the unmap
- * path when called via a different thread. Avoid this problem by
- * using the group_leader as threads within the same group require
- * both CLONE_THREAD and CLONE_VM and will therefore use the same
- * mm_struct.
- *
- * Previously we also used the task for testing CAP_IPC_LOCK at the
- * time of pinning and accounting, however has_capability() makes use
- * of real_cred, a copy-on-write field, so we can't guarantee that it
- * matches group_leader, or in fact that it might not change by the
- * time it's evaluated. If a process were to call MAP_DMA with
- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
- * possibly see different results for an iommu_mapped vfio_dma vs
- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
- * time of calling MAP_DMA.
+ * task_struct. Save the group_leader so that all DMA tracking uses
+ * the same task, to make debugging easier. VM locked pages requires
+ * an mm_struct, so grab the mm in case the task dies.
*/
get_task_struct(current->group_leader);
dma->task = current->group_leader;
dma->lock_cap = capable(CAP_IPC_LOCK);
+ dma->mm = current->mm;
+ mmgrab(dma->mm);
dma->pfn_list = RB_ROOT;
@@ -3122,9 +3110,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
!(dma->prot & IOMMU_READ))
return -EPERM;
- mm = get_task_mm(dma->task);
-
- if (!mm)
+ mm = dma->mm;
+ if (!mmget_not_zero(mm))
return -EPERM;
if (kthread)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820515210204(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
046eca5018f8 ("vfio/type1: prevent underflow of locked_vm via exec()")
4ab4fcfce5b5 ("vfio/type1: fix vaddr_get_pfns() return in vfio_pin_page_external()")
4b6c33b32296 ("vfio/type1: Prepare for batched pinning with struct vfio_batch")
be16c1fd99f4 ("vfio/type1: Change success value of vaddr_get_pfn()")
aae7a75a821a ("vfio/type1: Add proper error unwind for vfio_iommu_replay()")
64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
bce617edecad ("mm: do page fault accounting in handle_mm_fault")
ed03d924587e ("mm/gup: use a standard migration target allocation callback")
bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware")
41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API")
19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function")
d92bbc2719bd ("mm/hugetlb: unify migration callbacks")
b4b382238ed2 ("mm/migrate: move migration helper from .h to .c")
c7073bab5772 ("mm/page_isolation: prefer the node of the source page")
3e4e28c5a8f0 ("mmap locking API: convert mmap_sem API comments")
d8ed45c5dcd4 ("mmap locking API: use coccinelle to convert mmap_sem rwsem call sites")
ca5999fde0a1 ("mm: introduce include/linux/pgtable.h")
420c2091b65a ("mm/gup: introduce pin_user_pages_locked()")
5a36f0f3f518 ("Merge tag 'vfio-v5.8-rc1' of git://github.com/awilliam/linux-vfio")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare(a)oracle.com>
Date: Tue, 31 Jan 2023 08:58:04 -0800
Subject: [PATCH] vfio/type1: prevent underflow of locked_vm via exec()
When a vfio container is preserved across exec, the task does not change,
but it gets a new mm with locked_vm=0, and loses the count from existing
dma mappings. If the user later unmaps a dma mapping, locked_vm underflows
to a large unsigned value, and a subsequent dma map request fails with
ENOMEM in __account_locked_vm.
To avoid underflow, grab and save the mm at the time a dma is mapped.
Use that mm when adjusting locked_vm, rather than re-acquiring the saved
task's mm, which may have changed. If the saved mm is dead, do nothing.
locked_vm is incremented for existing mappings in a subsequent patch.
Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Sistare <steven.sistare(a)oracle.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare…
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 144f5bb20fb8..6b757d035457 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -100,6 +100,7 @@ struct vfio_dma {
struct task_struct *task;
struct rb_root pfn_list; /* Ex-user pinned pfn list */
unsigned long *bitmap;
+ struct mm_struct *mm;
};
struct vfio_batch {
@@ -420,8 +421,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
if (!npage)
return 0;
- mm = async ? get_task_mm(dma->task) : dma->task->mm;
- if (!mm)
+ mm = dma->mm;
+ if (async && !mmget_not_zero(mm))
return -ESRCH; /* process exited */
ret = mmap_write_lock_killable(mm);
@@ -794,8 +795,8 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
struct mm_struct *mm;
int ret;
- mm = get_task_mm(dma->task);
- if (!mm)
+ mm = dma->mm;
+ if (!mmget_not_zero(mm))
return -ENODEV;
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
@@ -805,7 +806,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
ret = 0;
if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
- ret = vfio_lock_acct(dma, 1, true);
+ ret = vfio_lock_acct(dma, 1, false);
if (ret) {
put_pfn(*pfn_base, dma->prot);
if (ret == -ENOMEM)
@@ -1180,6 +1181,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
vfio_unmap_unpin(iommu, dma, true);
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
+ mmdrop(dma->mm);
vfio_dma_bitmap_free(dma);
if (dma->vaddr_invalid) {
iommu->vaddr_invalid_count--;
@@ -1664,29 +1666,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
* against the locked memory limit and we need to be able to do both
* outside of this call path as pinning can be asynchronous via the
* external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
- * task_struct and VM locked pages requires an mm_struct, however
- * holding an indefinite mm reference is not recommended, therefore we
- * only hold a reference to a task. We could hold a reference to
- * current, however QEMU uses this call path through vCPU threads,
- * which can be killed resulting in a NULL mm and failure in the unmap
- * path when called via a different thread. Avoid this problem by
- * using the group_leader as threads within the same group require
- * both CLONE_THREAD and CLONE_VM and will therefore use the same
- * mm_struct.
- *
- * Previously we also used the task for testing CAP_IPC_LOCK at the
- * time of pinning and accounting, however has_capability() makes use
- * of real_cred, a copy-on-write field, so we can't guarantee that it
- * matches group_leader, or in fact that it might not change by the
- * time it's evaluated. If a process were to call MAP_DMA with
- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
- * possibly see different results for an iommu_mapped vfio_dma vs
- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
- * time of calling MAP_DMA.
+ * task_struct. Save the group_leader so that all DMA tracking uses
+ * the same task, to make debugging easier. VM locked pages requires
+ * an mm_struct, so grab the mm in case the task dies.
*/
get_task_struct(current->group_leader);
dma->task = current->group_leader;
dma->lock_cap = capable(CAP_IPC_LOCK);
+ dma->mm = current->mm;
+ mmgrab(dma->mm);
dma->pfn_list = RB_ROOT;
@@ -3122,9 +3110,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
!(dma->prot & IOMMU_READ))
return -EPERM;
- mm = get_task_mm(dma->task);
-
- if (!mm)
+ mm = dma->mm;
+ if (!mmget_not_zero(mm))
return -EPERM;
if (kthread)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x ef3a3f6a294ba65fd906a291553935881796f8a5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678205132164221(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
ef3a3f6a294b ("vfio/type1: exclude mdevs from VFIO_UPDATE_VADDR")
296e505baddf ("vfio/iommu_type1: remove the "external" domain")
65cdbf106337 ("vfio/iommu_type1: initialize pgsize_bitmap in ->open")
c3c0fa9d94f7 ("vfio: clean up the check for mediated device in vfio_iommu_type1")
fda49d97f2c4 ("vfio: remove the unused mdev iommu hook")
8cc02d22d7e1 ("vfio: move the vfio_iommu_driver_ops interface out of <linux/vfio.h>")
67462037872d ("vfio: remove unused method from vfio_iommu_driver_ops")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ef3a3f6a294ba65fd906a291553935881796f8a5 Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare(a)oracle.com>
Date: Tue, 31 Jan 2023 08:58:03 -0800
Subject: [PATCH] vfio/type1: exclude mdevs from VFIO_UPDATE_VADDR
Disable the VFIO_UPDATE_VADDR capability if mediated devices are present.
Their kernel threads could be blocked indefinitely by a misbehaving
userland while trying to pin/unpin pages while vaddrs are being updated.
Do not allow groups to be added to the container while vaddr's are invalid,
so we never need to block user threads from pinning, and can delete the
vaddr-waiting code in a subsequent patch.
Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Sistare <steven.sistare(a)oracle.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Link: https://lore.kernel.org/r/1675184289-267876-2-git-send-email-steven.sistare…
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 23c24fe98c00..144f5bb20fb8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -861,6 +861,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
mutex_lock(&iommu->lock);
+ if (WARN_ONCE(iommu->vaddr_invalid_count,
+ "vfio_pin_pages not allowed with VFIO_UPDATE_VADDR\n")) {
+ ret = -EBUSY;
+ goto pin_done;
+ }
+
/*
* Wait for all necessary vaddr's to be valid so they can be used in
* the main loop without dropping the lock, to avoid racing vs unmap.
@@ -1343,6 +1349,12 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
mutex_lock(&iommu->lock);
+ /* Cannot update vaddr if mdev is present. */
+ if (invalidate_vaddr && !list_empty(&iommu->emulated_iommu_groups)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
pgshift = __ffs(iommu->pgsize_bitmap);
pgsize = (size_t)1 << pgshift;
@@ -2185,11 +2197,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy);
LIST_HEAD(group_resv_regions);
- int ret = -EINVAL;
+ int ret = -EBUSY;
mutex_lock(&iommu->lock);
+ /* Attach could require pinning, so disallow while vaddr is invalid. */
+ if (iommu->vaddr_invalid_count)
+ goto out_unlock;
+
/* Check for duplicates */
+ ret = -EINVAL;
if (vfio_iommu_find_iommu_group(iommu, iommu_group))
goto out_unlock;
@@ -2660,6 +2677,16 @@ static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu)
return ret;
}
+static bool vfio_iommu_has_emulated(struct vfio_iommu *iommu)
+{
+ bool ret;
+
+ mutex_lock(&iommu->lock);
+ ret = !list_empty(&iommu->emulated_iommu_groups);
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
unsigned long arg)
{
@@ -2668,8 +2695,13 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_NESTING_IOMMU:
case VFIO_UNMAP_ALL:
- case VFIO_UPDATE_VADDR:
return 1;
+ case VFIO_UPDATE_VADDR:
+ /*
+ * Disable this feature if mdevs are present. They cannot
+ * safely pin/unpin/rw while vaddrs are being updated.
+ */
+ return iommu && !vfio_iommu_has_emulated(iommu);
case VFIO_DMA_CC_IOMMU:
if (!iommu)
return 0;
@@ -3138,6 +3170,13 @@ static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova,
size_t done;
mutex_lock(&iommu->lock);
+
+ if (WARN_ONCE(iommu->vaddr_invalid_count,
+ "vfio_dma_rw not allowed with VFIO_UPDATE_VADDR\n")) {
+ ret = -EBUSY;
+ goto out;
+ }
+
while (count > 0) {
ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data,
count, write, &done);
@@ -3149,6 +3188,7 @@ static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova,
user_iova += done;
}
+out:
mutex_unlock(&iommu->lock);
return ret;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 23105eb036fa..0552e8dcf0cb 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -49,7 +49,11 @@
/* Supports VFIO_DMA_UNMAP_FLAG_ALL */
#define VFIO_UNMAP_ALL 9
-/* Supports the vaddr flag for DMA map and unmap */
+/*
+ * Supports the vaddr flag for DMA map and unmap. Not supported for mediated
+ * devices, so this capability is subject to change as groups are added or
+ * removed.
+ */
#define VFIO_UPDATE_VADDR 10
/*
@@ -1343,8 +1347,7 @@ struct vfio_iommu_type1_info_dma_avail {
* Map process virtual addresses to IO virtual addresses using the
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
*
- * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
- * unblock translation of host virtual addresses in the iova range. The vaddr
+ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr
* must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To
* maintain memory consistency within the user application, the updated vaddr
* must address the same memory object as originally mapped. Failure to do so
@@ -1395,9 +1398,9 @@ struct vfio_bitmap {
* must be 0. This cannot be combined with the get-dirty-bitmap flag.
*
* If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
- * virtual addresses in the iova range. Tasks that attempt to translate an
- * iova's vaddr will block. DMA to already-mapped pages continues. This
- * cannot be combined with the get-dirty-bitmap flag.
+ * virtual addresses in the iova range. DMA to already-mapped pages continues.
+ * Groups may not be added to the container while any addresses are invalid.
+ * This cannot be combined with the get-dirty-bitmap flag.
*/
struct vfio_iommu_type1_dma_unmap {
__u32 argsz;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 194b3348bdbb7db65375c72f3f774aee4cc6614e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820509210430(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
803766cbf85f ("iommu/vt-d: Fix lockdep splat in intel_pasid_get_entry()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Date: Thu, 16 Feb 2023 21:08:15 +0800
Subject: [PATCH] iommu/vt-d: Fix PASID directory pointer coherency
On platforms that do not support IOMMU Extended capability bit 0
Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
any translation structures. IOMMU access goes only directly to
memory. Intel IOMMU code was missing a flush for the PASID table
directory that resulted in the unrecoverable fault as shown below.
This patch adds clflush calls whenever allocating and updating
a PASID table directory to ensure cache coherency.
On the reverse direction, there's no need to clflush the PASID directory
pointer when we deactivate a context entry in that IOMMU hardware will
not see the old PASID directory pointer after we clear the context entry.
PASID directory entries are also never freed once allocated.
DMAR: DRHD: handling fault status reg 3
DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
[fault reason 0x51] SM: Present bit in Directory Entry is clear
DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
DMAR: pasid dir entry: 0x0000000101b4e001
DMAR: pasid table entry[0]: 0x0000000000000109
DMAR: pasid table entry[1]: 0x0000000000000001
DMAR: pasid table entry[2]: 0x0000000000000000
DMAR: pasid table entry[3]: 0x0000000000000000
DMAR: pasid table entry[4]: 0x0000000000000000
DMAR: pasid table entry[5]: 0x0000000000000000
DMAR: pasid table entry[6]: 0x0000000000000000
DMAR: pasid table entry[7]: 0x0000000000000000
DMAR: PTE not present at level 4
Cc: <stable(a)vger.kernel.org>
Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reported-by: Sukumar Ghorai <sukumar.ghorai(a)intel.com>
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.inte…
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index ec964ac7d797..9d2f05cf6164 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(pasid_table->table, size);
+
return 0;
}
@@ -215,6 +218,10 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
free_pgtable_page(entries);
goto retry;
}
+ if (!ecap_coherent(info->iommu->ecap)) {
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
+ }
}
return &entries[index];
The patch below does not apply to the 6.2-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.2.y
git checkout FETCH_HEAD
git cherry-pick -x 1ddc7618294084fff8d673217a9479550990ee84
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820500825397(a)kroah.com' --subject-prefix 'PATCH 6.2.y' HEAD^..
Possible dependencies:
1ddc76182940 ("bus: mhi: ep: Change state_lock to mutex")
47a1dcaea073 ("bus: mhi: ep: Power up/down MHI stack during MHI RESET")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ddc7618294084fff8d673217a9479550990ee84 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <mani(a)kernel.org>
Date: Mon, 23 Jan 2023 12:59:45 +0530
Subject: [PATCH] bus: mhi: ep: Change state_lock to mutex
state_lock, the spinlock type is meant to protect race against concurrent
MHI state transitions. In mhi_ep_set_m0_state(), while the state_lock is
being held, the channels are resumed in mhi_ep_resume_channels() if the
previous state was M3. This causes sleeping in atomic bug, since
mhi_ep_resume_channels() use mutex internally.
Since the state_lock is supposed to be held throughout the state change,
it is not ideal to drop the lock before calling mhi_ep_resume_channels().
So to fix this issue, let's change the type of state_lock to mutex. This
would also allow holding the lock throughout all state transitions thereby
avoiding any potential race.
Cc: <stable(a)vger.kernel.org> # 5.19
Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels")
Reported-by: Dan Carpenter <error27(a)gmail.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c
index bcaaba97ef63..528c00b232bf 100644
--- a/drivers/bus/mhi/ep/main.c
+++ b/drivers/bus/mhi/ep/main.c
@@ -1001,11 +1001,11 @@ static void mhi_ep_reset_worker(struct work_struct *work)
mhi_ep_power_down(mhi_cntrl);
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
/* Reset MMIO to signal host that the MHI_RESET is completed in endpoint */
mhi_ep_mmio_reset(mhi_cntrl);
cur_state = mhi_cntrl->mhi_state;
- spin_unlock_bh(&mhi_cntrl->state_lock);
/*
* Only proceed further if the reset is due to SYS_ERR. The host will
@@ -1014,6 +1014,8 @@ static void mhi_ep_reset_worker(struct work_struct *work)
*/
if (cur_state == MHI_STATE_SYS_ERR)
mhi_ep_power_up(mhi_cntrl);
+
+ mutex_unlock(&mhi_cntrl->state_lock);
}
/*
@@ -1386,8 +1388,8 @@ int mhi_ep_register_controller(struct mhi_ep_cntrl *mhi_cntrl,
INIT_LIST_HEAD(&mhi_cntrl->st_transition_list);
INIT_LIST_HEAD(&mhi_cntrl->ch_db_list);
- spin_lock_init(&mhi_cntrl->state_lock);
spin_lock_init(&mhi_cntrl->list_lock);
+ mutex_init(&mhi_cntrl->state_lock);
mutex_init(&mhi_cntrl->event_lock);
/* Set MHI version and AMSS EE before enumeration */
diff --git a/drivers/bus/mhi/ep/sm.c b/drivers/bus/mhi/ep/sm.c
index 3655c19e23c7..fd200b2ac0bb 100644
--- a/drivers/bus/mhi/ep/sm.c
+++ b/drivers/bus/mhi/ep/sm.c
@@ -63,24 +63,23 @@ int mhi_ep_set_m0_state(struct mhi_ep_cntrl *mhi_cntrl)
int ret;
/* If MHI is in M3, resume suspended channels */
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
old_state = mhi_cntrl->mhi_state;
if (old_state == MHI_STATE_M3)
mhi_ep_resume_channels(mhi_cntrl);
ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
- spin_unlock_bh(&mhi_cntrl->state_lock);
-
if (ret) {
mhi_ep_handle_syserr(mhi_cntrl);
- return ret;
+ goto err_unlock;
}
/* Signal host that the device moved to M0 */
ret = mhi_ep_send_state_change_event(mhi_cntrl, MHI_STATE_M0);
if (ret) {
dev_err(dev, "Failed sending M0 state change event\n");
- return ret;
+ goto err_unlock;
}
if (old_state == MHI_STATE_READY) {
@@ -88,11 +87,14 @@ int mhi_ep_set_m0_state(struct mhi_ep_cntrl *mhi_cntrl)
ret = mhi_ep_send_ee_event(mhi_cntrl, MHI_EE_AMSS);
if (ret) {
dev_err(dev, "Failed sending AMSS EE event\n");
- return ret;
+ goto err_unlock;
}
}
- return 0;
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
+ return ret;
}
int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
@@ -100,13 +102,12 @@ int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
struct device *dev = &mhi_cntrl->mhi_dev->dev;
int ret;
- spin_lock_bh(&mhi_cntrl->state_lock);
- ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
- spin_unlock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+ ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
if (ret) {
mhi_ep_handle_syserr(mhi_cntrl);
- return ret;
+ goto err_unlock;
}
mhi_ep_suspend_channels(mhi_cntrl);
@@ -115,10 +116,13 @@ int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
ret = mhi_ep_send_state_change_event(mhi_cntrl, MHI_STATE_M3);
if (ret) {
dev_err(dev, "Failed sending M3 state change event\n");
- return ret;
+ goto err_unlock;
}
- return 0;
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
+ return ret;
}
int mhi_ep_set_ready_state(struct mhi_ep_cntrl *mhi_cntrl)
@@ -127,22 +131,24 @@ int mhi_ep_set_ready_state(struct mhi_ep_cntrl *mhi_cntrl)
enum mhi_state mhi_state;
int ret, is_ready;
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
/* Ensure that the MHISTATUS is set to RESET by host */
mhi_state = mhi_ep_mmio_masked_read(mhi_cntrl, EP_MHISTATUS, MHISTATUS_MHISTATE_MASK);
is_ready = mhi_ep_mmio_masked_read(mhi_cntrl, EP_MHISTATUS, MHISTATUS_READY_MASK);
if (mhi_state != MHI_STATE_RESET || is_ready) {
dev_err(dev, "READY state transition failed. MHI host not in RESET state\n");
- spin_unlock_bh(&mhi_cntrl->state_lock);
- return -EIO;
+ ret = -EIO;
+ goto err_unlock;
}
ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_READY);
- spin_unlock_bh(&mhi_cntrl->state_lock);
-
if (ret)
mhi_ep_handle_syserr(mhi_cntrl);
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
return ret;
}
diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h
index 478aece17046..f198a8ac7ee7 100644
--- a/include/linux/mhi_ep.h
+++ b/include/linux/mhi_ep.h
@@ -70,8 +70,8 @@ struct mhi_ep_db_info {
* @cmd_ctx_cache_phys: Physical address of the host command context cache
* @chdb: Array of channel doorbell interrupt info
* @event_lock: Lock for protecting event rings
- * @list_lock: Lock for protecting state transition and channel doorbell lists
* @state_lock: Lock for protecting state transitions
+ * @list_lock: Lock for protecting state transition and channel doorbell lists
* @st_transition_list: List of state transitions
* @ch_db_list: List of queued channel doorbells
* @wq: Dedicated workqueue for handling rings and state changes
@@ -117,8 +117,8 @@ struct mhi_ep_cntrl {
struct mhi_ep_db_info chdb[4];
struct mutex event_lock;
+ struct mutex state_lock;
spinlock_t list_lock;
- spinlock_t state_lock;
struct list_head st_transition_list;
struct list_head ch_db_list;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1ddc7618294084fff8d673217a9479550990ee84
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820500829207(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
1ddc76182940 ("bus: mhi: ep: Change state_lock to mutex")
47a1dcaea073 ("bus: mhi: ep: Power up/down MHI stack during MHI RESET")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ddc7618294084fff8d673217a9479550990ee84 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <mani(a)kernel.org>
Date: Mon, 23 Jan 2023 12:59:45 +0530
Subject: [PATCH] bus: mhi: ep: Change state_lock to mutex
state_lock, the spinlock type is meant to protect race against concurrent
MHI state transitions. In mhi_ep_set_m0_state(), while the state_lock is
being held, the channels are resumed in mhi_ep_resume_channels() if the
previous state was M3. This causes sleeping in atomic bug, since
mhi_ep_resume_channels() use mutex internally.
Since the state_lock is supposed to be held throughout the state change,
it is not ideal to drop the lock before calling mhi_ep_resume_channels().
So to fix this issue, let's change the type of state_lock to mutex. This
would also allow holding the lock throughout all state transitions thereby
avoiding any potential race.
Cc: <stable(a)vger.kernel.org> # 5.19
Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels")
Reported-by: Dan Carpenter <error27(a)gmail.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c
index bcaaba97ef63..528c00b232bf 100644
--- a/drivers/bus/mhi/ep/main.c
+++ b/drivers/bus/mhi/ep/main.c
@@ -1001,11 +1001,11 @@ static void mhi_ep_reset_worker(struct work_struct *work)
mhi_ep_power_down(mhi_cntrl);
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
/* Reset MMIO to signal host that the MHI_RESET is completed in endpoint */
mhi_ep_mmio_reset(mhi_cntrl);
cur_state = mhi_cntrl->mhi_state;
- spin_unlock_bh(&mhi_cntrl->state_lock);
/*
* Only proceed further if the reset is due to SYS_ERR. The host will
@@ -1014,6 +1014,8 @@ static void mhi_ep_reset_worker(struct work_struct *work)
*/
if (cur_state == MHI_STATE_SYS_ERR)
mhi_ep_power_up(mhi_cntrl);
+
+ mutex_unlock(&mhi_cntrl->state_lock);
}
/*
@@ -1386,8 +1388,8 @@ int mhi_ep_register_controller(struct mhi_ep_cntrl *mhi_cntrl,
INIT_LIST_HEAD(&mhi_cntrl->st_transition_list);
INIT_LIST_HEAD(&mhi_cntrl->ch_db_list);
- spin_lock_init(&mhi_cntrl->state_lock);
spin_lock_init(&mhi_cntrl->list_lock);
+ mutex_init(&mhi_cntrl->state_lock);
mutex_init(&mhi_cntrl->event_lock);
/* Set MHI version and AMSS EE before enumeration */
diff --git a/drivers/bus/mhi/ep/sm.c b/drivers/bus/mhi/ep/sm.c
index 3655c19e23c7..fd200b2ac0bb 100644
--- a/drivers/bus/mhi/ep/sm.c
+++ b/drivers/bus/mhi/ep/sm.c
@@ -63,24 +63,23 @@ int mhi_ep_set_m0_state(struct mhi_ep_cntrl *mhi_cntrl)
int ret;
/* If MHI is in M3, resume suspended channels */
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
old_state = mhi_cntrl->mhi_state;
if (old_state == MHI_STATE_M3)
mhi_ep_resume_channels(mhi_cntrl);
ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
- spin_unlock_bh(&mhi_cntrl->state_lock);
-
if (ret) {
mhi_ep_handle_syserr(mhi_cntrl);
- return ret;
+ goto err_unlock;
}
/* Signal host that the device moved to M0 */
ret = mhi_ep_send_state_change_event(mhi_cntrl, MHI_STATE_M0);
if (ret) {
dev_err(dev, "Failed sending M0 state change event\n");
- return ret;
+ goto err_unlock;
}
if (old_state == MHI_STATE_READY) {
@@ -88,11 +87,14 @@ int mhi_ep_set_m0_state(struct mhi_ep_cntrl *mhi_cntrl)
ret = mhi_ep_send_ee_event(mhi_cntrl, MHI_EE_AMSS);
if (ret) {
dev_err(dev, "Failed sending AMSS EE event\n");
- return ret;
+ goto err_unlock;
}
}
- return 0;
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
+ return ret;
}
int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
@@ -100,13 +102,12 @@ int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
struct device *dev = &mhi_cntrl->mhi_dev->dev;
int ret;
- spin_lock_bh(&mhi_cntrl->state_lock);
- ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
- spin_unlock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+ ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
if (ret) {
mhi_ep_handle_syserr(mhi_cntrl);
- return ret;
+ goto err_unlock;
}
mhi_ep_suspend_channels(mhi_cntrl);
@@ -115,10 +116,13 @@ int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
ret = mhi_ep_send_state_change_event(mhi_cntrl, MHI_STATE_M3);
if (ret) {
dev_err(dev, "Failed sending M3 state change event\n");
- return ret;
+ goto err_unlock;
}
- return 0;
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
+ return ret;
}
int mhi_ep_set_ready_state(struct mhi_ep_cntrl *mhi_cntrl)
@@ -127,22 +131,24 @@ int mhi_ep_set_ready_state(struct mhi_ep_cntrl *mhi_cntrl)
enum mhi_state mhi_state;
int ret, is_ready;
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
/* Ensure that the MHISTATUS is set to RESET by host */
mhi_state = mhi_ep_mmio_masked_read(mhi_cntrl, EP_MHISTATUS, MHISTATUS_MHISTATE_MASK);
is_ready = mhi_ep_mmio_masked_read(mhi_cntrl, EP_MHISTATUS, MHISTATUS_READY_MASK);
if (mhi_state != MHI_STATE_RESET || is_ready) {
dev_err(dev, "READY state transition failed. MHI host not in RESET state\n");
- spin_unlock_bh(&mhi_cntrl->state_lock);
- return -EIO;
+ ret = -EIO;
+ goto err_unlock;
}
ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_READY);
- spin_unlock_bh(&mhi_cntrl->state_lock);
-
if (ret)
mhi_ep_handle_syserr(mhi_cntrl);
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
return ret;
}
diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h
index 478aece17046..f198a8ac7ee7 100644
--- a/include/linux/mhi_ep.h
+++ b/include/linux/mhi_ep.h
@@ -70,8 +70,8 @@ struct mhi_ep_db_info {
* @cmd_ctx_cache_phys: Physical address of the host command context cache
* @chdb: Array of channel doorbell interrupt info
* @event_lock: Lock for protecting event rings
- * @list_lock: Lock for protecting state transition and channel doorbell lists
* @state_lock: Lock for protecting state transitions
+ * @list_lock: Lock for protecting state transition and channel doorbell lists
* @st_transition_list: List of state transitions
* @ch_db_list: List of queued channel doorbells
* @wq: Dedicated workqueue for handling rings and state changes
@@ -117,8 +117,8 @@ struct mhi_ep_cntrl {
struct mhi_ep_db_info chdb[4];
struct mutex event_lock;
+ struct mutex state_lock;
spinlock_t list_lock;
- spinlock_t state_lock;
struct list_head st_transition_list;
struct list_head ch_db_list;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x ac91e6980563ed53afadd925fa6585ffd2bc4a2c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820491313734(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
ac91e6980563 ("PCI: Unify delay handling for reset and resume")
8ef0217227b4 ("PCI/PM: Observe reset delay irrespective of bridge_d3")
730643d33e2d ("PCI/PM: Resume subordinate bus in bus type callbacks")
18a94192e20d ("PCI/PM: Define pci_restore_standard_config() only for CONFIG_PM_SLEEP")
0c5c62ddf88c ("Merge tag 'pci-v5.16-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ac91e6980563ed53afadd925fa6585ffd2bc4a2c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Sun, 15 Jan 2023 09:20:32 +0100
Subject: [PATCH] PCI: Unify delay handling for reset and resume
Sheng Bi reports that pci_bridge_secondary_bus_reset() may fail to wait
for devices on the secondary bus to become accessible after reset:
Although it does call pci_dev_wait(), it erroneously passes the bridge's
pci_dev rather than that of a child. The bridge of course is always
accessible while its secondary bus is reset, so pci_dev_wait() returns
immediately.
Sheng Bi proposes introducing a new pci_bridge_secondary_bus_wait()
function which is called from pci_bridge_secondary_bus_reset():
https://lore.kernel.org/linux-pci/20220523171517.32407-1-windy.bi.enflame@g…
However we already have pci_bridge_wait_for_secondary_bus() which does
almost exactly what we need. So far it's only called on resume from
D3cold (which implies a Fundamental Reset per PCIe r6.0 sec 5.8).
Re-using it for Secondary Bus Resets is a leaner and more rational
approach than introducing a new function.
That only requires a few minor tweaks:
- Amend pci_bridge_wait_for_secondary_bus() to await accessibility of
the first device on the secondary bus by calling pci_dev_wait() after
performing the prescribed delays. pci_dev_wait() needs two parameters,
a reset reason and a timeout, which callers must now pass to
pci_bridge_wait_for_secondary_bus(). The timeout is 1 sec for resume
(PCIe r6.0 sec 6.6.1) and 60 sec for reset (commit 821cdad5c46c ("PCI:
Wait up to 60 seconds for device to become ready after FLR")).
Introduce a PCI_RESET_WAIT macro for the 1 sec timeout.
- Amend pci_bridge_wait_for_secondary_bus() to return 0 on success or
-ENOTTY on error for consumption by pci_bridge_secondary_bus_reset().
- Drop an unnecessary 1 sec delay from pci_reset_secondary_bus() which
is now performed by pci_bridge_wait_for_secondary_bus(). A static
delay this long is only necessary for Conventional PCI, so modern
PCIe systems benefit from shorter reset times as a side effect.
Fixes: 6b2f1351af56 ("PCI: Wait for device to become ready after secondary bus reset")
Link: https://lore.kernel.org/r/da77c92796b99ec568bd070cbe4725074a117038.16737695…
Reported-by: Sheng Bi <windy.bi.enflame(a)gmail.com>
Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri(a)intel.com>
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v4.17+
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a2ceeacc33eb..7a19f11daca3 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -572,7 +572,7 @@ static void pci_pm_default_resume_early(struct pci_dev *pci_dev)
static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev)
{
- pci_bridge_wait_for_secondary_bus(pci_dev);
+ pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
/*
* When powering on a bridge from D3cold, the whole hierarchy may be
* powered on into D0uninitialized state, resume them to give them a
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f43f3e84f634..509f6b5c9e14 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1174,7 +1174,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
return -ENOTTY;
}
- if (delay > 1000)
+ if (delay > PCI_RESET_WAIT)
pci_info(dev, "not ready %dms after %s; waiting\n",
delay - 1, reset_type);
@@ -1183,7 +1183,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
pci_read_config_dword(dev, PCI_COMMAND, &id);
}
- if (delay > 1000)
+ if (delay > PCI_RESET_WAIT)
pci_info(dev, "ready %dms after %s\n", delay - 1,
reset_type);
@@ -4948,24 +4948,31 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
/**
* pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
* @dev: PCI bridge
+ * @reset_type: reset type in human-readable form
+ * @timeout: maximum time to wait for devices on secondary bus (milliseconds)
*
* Handle necessary delays before access to the devices on the secondary
- * side of the bridge are permitted after D3cold to D0 transition.
+ * side of the bridge are permitted after D3cold to D0 transition
+ * or Conventional Reset.
*
* For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
* conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
* 4.3.2.
+ *
+ * Return 0 on success or -ENOTTY if the first device on the secondary bus
+ * failed to become accessible.
*/
-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
+ int timeout)
{
struct pci_dev *child;
int delay;
if (pci_dev_is_disconnected(dev))
- return;
+ return 0;
if (!pci_is_bridge(dev))
- return;
+ return 0;
down_read(&pci_bus_sem);
@@ -4977,14 +4984,14 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
*/
if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
up_read(&pci_bus_sem);
- return;
+ return 0;
}
/* Take d3cold_delay requirements into account */
delay = pci_bus_max_d3cold_delay(dev->subordinate);
if (!delay) {
up_read(&pci_bus_sem);
- return;
+ return 0;
}
child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
@@ -4993,14 +5000,12 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
/*
* Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
- * accessing the device after reset (that is 1000 ms + 100 ms). In
- * practice this should not be needed because we don't do power
- * management for them (see pci_bridge_d3_possible()).
+ * accessing the device after reset (that is 1000 ms + 100 ms).
*/
if (!pci_is_pcie(dev)) {
pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
msleep(1000 + delay);
- return;
+ return 0;
}
/*
@@ -5017,11 +5022,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
* configuration requests if we only wait for 100 ms (see
* https://bugzilla.kernel.org/show_bug.cgi?id=203885).
*
- * Therefore we wait for 100 ms and check for the device presence.
- * If it is still not present give it an additional 100 ms.
+ * Therefore we wait for 100 ms and check for the device presence
+ * until the timeout expires.
*/
if (!pcie_downstream_port(dev))
- return;
+ return 0;
if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
@@ -5032,14 +5037,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
if (!pcie_wait_for_link_delay(dev, true, delay)) {
/* Did not train, no need to wait any further */
pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
- return;
+ return -ENOTTY;
}
}
- if (!pci_device_is_present(child)) {
- pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
- msleep(delay);
- }
+ return pci_dev_wait(child, reset_type, timeout - delay);
}
void pci_reset_secondary_bus(struct pci_dev *dev)
@@ -5058,15 +5060,6 @@ void pci_reset_secondary_bus(struct pci_dev *dev)
ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
-
- /*
- * Trhfa for conventional PCI is 2^25 clock cycles.
- * Assuming a minimum 33MHz clock this results in a 1s
- * delay before we can consider subordinate devices to
- * be re-initialized. PCIe has some ways to shorten this,
- * but we don't make use of them yet.
- */
- ssleep(1);
}
void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
@@ -5085,7 +5078,8 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
{
pcibios_reset_secondary_bus(dev);
- return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
+ return pci_bridge_wait_for_secondary_bus(dev, "bus reset",
+ PCIE_RESET_READY_POLL_MS);
}
EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9ed3b5550043..ce1fc3a90b3f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -64,6 +64,13 @@ struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
#define PCI_PM_D3HOT_WAIT 10 /* msec */
#define PCI_PM_D3COLD_WAIT 100 /* msec */
+/*
+ * Following exit from Conventional Reset, devices must be ready within 1 sec
+ * (PCIe r6.0 sec 6.6.1). A D3cold to D0 transition implies a Conventional
+ * Reset (PCIe r6.0 sec 5.8).
+ */
+#define PCI_RESET_WAIT 1000 /* msec */
+
void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
void pci_refresh_power_state(struct pci_dev *dev);
int pci_power_up(struct pci_dev *dev);
@@ -86,8 +93,9 @@ void pci_msi_init(struct pci_dev *dev);
void pci_msix_init(struct pci_dev *dev);
bool pci_bridge_d3_possible(struct pci_dev *dev);
void pci_bridge_d3_update(struct pci_dev *dev);
-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
+int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
+ int timeout);
static inline void pci_wakeup_event(struct pci_dev *dev)
{
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1eac28201ac0725192f5ced34192d281a06692e5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678204825350(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
1eac28201ac0 ("RISC-V: fix ordering of Zbb extension")
80c200b34ee8 ("RISC-V: resort all extensions in consistent orders")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1eac28201ac0725192f5ced34192d281a06692e5 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko.stuebner(a)vrull.eu>
Date: Wed, 8 Feb 2023 23:53:27 +0100
Subject: [PATCH] RISC-V: fix ordering of Zbb extension
As Andrew reported,
Zb* comes after Zi* according 27.11 "Subset Naming Convention"
so fix the ordering accordingly.
Reported-by: Andrew Jones <ajones(a)ventanamicro.com>
Signed-off-by: Heiko Stuebner <heiko.stuebner(a)vrull.eu>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Reviewed-by: Andrew Jones <ajones(a)ventanamicro.com>
Tested-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230208225328.1636017-2-heiko@sntech.de
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 420228e219f7..8400f0cc9704 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -185,9 +185,9 @@ arch_initcall(riscv_cpuinfo_init);
* New entries to this struct should follow the ordering rules described above.
*/
static struct riscv_isa_ext_data isa_ext_arr[] = {
- __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
+ __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
The patch below does not apply to the 6.2-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.2.y
git checkout FETCH_HEAD
git cherry-pick -x dac8bf14bb49aecd1de99ebb5498fa03152f2d40
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820481563250(a)kroah.com' --subject-prefix 'PATCH 6.2.y' HEAD^..
Possible dependencies:
dac8bf14bb49 ("riscv: hwcap: Don't alphabetize ISA extension IDs")
9daca9a5b9ac ("Merge patch series "riscv: improve boot time isa extensions handling"")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dac8bf14bb49aecd1de99ebb5498fa03152f2d40 Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones(a)ventanamicro.com>
Date: Thu, 9 Feb 2023 13:36:36 +0100
Subject: [PATCH] riscv: hwcap: Don't alphabetize ISA extension IDs
While the comment above the ISA extension ID definitions says
"Entries are sorted alphabetically.", this stopped being good
advice with commit d8a3d8a75206 ("riscv: hwcap: make ISA extension
ids can be used in asm"), as we now use macros instead of enums.
Reshuffling defines is error-prone, so, since they don't need to be
in any particular order, change the advice to just adding new
extensions at the bottom. Also, take the opportunity to change
spaces to tabs, merge three comments into one, and move the base
and max defines into more logical locations wrt the ID definitions.
Signed-off-by: Andrew Jones <ajones(a)ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230209123636.123537-1-ajones@ventanamicro.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 831bebacb7fb..8f3994a7f0ca 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -24,29 +24,27 @@
#define RISCV_ISA_EXT_u ('u' - 'a')
/*
- * Increse this to higher value as kernel support more ISA extensions.
- */
-#define RISCV_ISA_EXT_MAX 64
-#define RISCV_ISA_EXT_NAME_LEN_MAX 32
-
-/* The base ID for multi-letter ISA extensions */
-#define RISCV_ISA_EXT_BASE 26
-
-/*
- * These macros represent the logical ID for each multi-letter RISC-V ISA extension.
- * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed
- * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter
- * extensions while all the multi-letter extensions should define the next
- * available logical extension id.
- * Entries are sorted alphabetically.
+ * These macros represent the logical IDs of each multi-letter RISC-V ISA
+ * extension and are used in the ISA bitmap. The logical IDs start from
+ * RISCV_ISA_EXT_BASE, which allows the 0-25 range to be reserved for single
+ * letter extensions. The maximum, RISCV_ISA_EXT_MAX, is defined in order
+ * to allocate the bitmap and may be increased when necessary.
+ *
+ * New extensions should just be added to the bottom, rather than added
+ * alphabetically, in order to avoid unnecessary shuffling.
*/
-#define RISCV_ISA_EXT_SSCOFPMF 26
-#define RISCV_ISA_EXT_SSTC 27
-#define RISCV_ISA_EXT_SVINVAL 28
-#define RISCV_ISA_EXT_SVPBMT 29
-#define RISCV_ISA_EXT_ZBB 30
-#define RISCV_ISA_EXT_ZICBOM 31
-#define RISCV_ISA_EXT_ZIHINTPAUSE 32
+#define RISCV_ISA_EXT_BASE 26
+
+#define RISCV_ISA_EXT_SSCOFPMF 26
+#define RISCV_ISA_EXT_SSTC 27
+#define RISCV_ISA_EXT_SVINVAL 28
+#define RISCV_ISA_EXT_SVPBMT 29
+#define RISCV_ISA_EXT_ZBB 30
+#define RISCV_ISA_EXT_ZICBOM 31
+#define RISCV_ISA_EXT_ZIHINTPAUSE 32
+
+#define RISCV_ISA_EXT_MAX 64
+#define RISCV_ISA_EXT_NAME_LEN_MAX 32
#ifndef __ASSEMBLY__
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x dac8bf14bb49aecd1de99ebb5498fa03152f2d40
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678204814209236(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
dac8bf14bb49 ("riscv: hwcap: Don't alphabetize ISA extension IDs")
9daca9a5b9ac ("Merge patch series "riscv: improve boot time isa extensions handling"")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dac8bf14bb49aecd1de99ebb5498fa03152f2d40 Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones(a)ventanamicro.com>
Date: Thu, 9 Feb 2023 13:36:36 +0100
Subject: [PATCH] riscv: hwcap: Don't alphabetize ISA extension IDs
While the comment above the ISA extension ID definitions says
"Entries are sorted alphabetically.", this stopped being good
advice with commit d8a3d8a75206 ("riscv: hwcap: make ISA extension
ids can be used in asm"), as we now use macros instead of enums.
Reshuffling defines is error-prone, so, since they don't need to be
in any particular order, change the advice to just adding new
extensions at the bottom. Also, take the opportunity to change
spaces to tabs, merge three comments into one, and move the base
and max defines into more logical locations wrt the ID definitions.
Signed-off-by: Andrew Jones <ajones(a)ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230209123636.123537-1-ajones@ventanamicro.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 831bebacb7fb..8f3994a7f0ca 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -24,29 +24,27 @@
#define RISCV_ISA_EXT_u ('u' - 'a')
/*
- * Increse this to higher value as kernel support more ISA extensions.
- */
-#define RISCV_ISA_EXT_MAX 64
-#define RISCV_ISA_EXT_NAME_LEN_MAX 32
-
-/* The base ID for multi-letter ISA extensions */
-#define RISCV_ISA_EXT_BASE 26
-
-/*
- * These macros represent the logical ID for each multi-letter RISC-V ISA extension.
- * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed
- * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter
- * extensions while all the multi-letter extensions should define the next
- * available logical extension id.
- * Entries are sorted alphabetically.
+ * These macros represent the logical IDs of each multi-letter RISC-V ISA
+ * extension and are used in the ISA bitmap. The logical IDs start from
+ * RISCV_ISA_EXT_BASE, which allows the 0-25 range to be reserved for single
+ * letter extensions. The maximum, RISCV_ISA_EXT_MAX, is defined in order
+ * to allocate the bitmap and may be increased when necessary.
+ *
+ * New extensions should just be added to the bottom, rather than added
+ * alphabetically, in order to avoid unnecessary shuffling.
*/
-#define RISCV_ISA_EXT_SSCOFPMF 26
-#define RISCV_ISA_EXT_SSTC 27
-#define RISCV_ISA_EXT_SVINVAL 28
-#define RISCV_ISA_EXT_SVPBMT 29
-#define RISCV_ISA_EXT_ZBB 30
-#define RISCV_ISA_EXT_ZICBOM 31
-#define RISCV_ISA_EXT_ZIHINTPAUSE 32
+#define RISCV_ISA_EXT_BASE 26
+
+#define RISCV_ISA_EXT_SSCOFPMF 26
+#define RISCV_ISA_EXT_SSTC 27
+#define RISCV_ISA_EXT_SVINVAL 28
+#define RISCV_ISA_EXT_SVPBMT 29
+#define RISCV_ISA_EXT_ZBB 30
+#define RISCV_ISA_EXT_ZICBOM 31
+#define RISCV_ISA_EXT_ZIHINTPAUSE 32
+
+#define RISCV_ISA_EXT_MAX 64
+#define RISCV_ISA_EXT_NAME_LEN_MAX 32
#ifndef __ASSEMBLY__
The patch below does not apply to the 6.2-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.2.y
git checkout FETCH_HEAD
git cherry-pick -x 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678204801201186(a)kroah.com' --subject-prefix 'PATCH 6.2.y' HEAD^..
Possible dependencies:
8ac6e619d9d5 ("riscv: Add header include guards to insn.h")
47f05757d3d8 ("RISC-V: add helpers for handling immediates in U-type and I-type pairs")
c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h")
ec5f90877516 ("RISC-V: Move riscv_insn_is_* macros into a common header")
bf0cc402d7cd ("RISC-V: add prefix to all constants/macros in parse_asm.h")
a3775634f6da ("RISC-V: fix funct4 definition for c.jalr in parse_asm.h")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1(a)huawei.com>
Date: Sun, 29 Jan 2023 17:42:42 +0800
Subject: [PATCH] riscv: Add header include guards to insn.h
Add header include guards to insn.h to prevent repeating declaration of
any identifiers in insn.h.
Fixes: edde5584c7ab ("riscv: Add SW single-step support for KDB")
Signed-off-by: Liao Chang <liaochang1(a)huawei.com>
Reviewed-by: Andrew Jones <ajones(a)ventanamicro.com>
Fixes: c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h")
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230129094242.282620-1-liaochang1@huawei.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 6567cd5ed6ba..8d5c84f2d5ef 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -3,6 +3,9 @@
* Copyright (C) 2020 SiFive
*/
+#ifndef _ASM_RISCV_INSN_H
+#define _ASM_RISCV_INSN_H
+
#include <linux/bits.h>
#define RV_INSN_FUNCT3_MASK GENMASK(14, 12)
@@ -375,3 +378,4 @@ static inline void riscv_insn_insert_utype_itype_imm(u32 *utype_insn, u32 *itype
*utype_insn |= (imm & RV_U_IMM_31_12_MASK) + ((imm & BIT(11)) << 1);
*itype_insn |= ((imm & RV_I_IMM_11_0_MASK) << RV_I_IMM_11_0_OPOFF);
}
+#endif /* _ASM_RISCV_INSN_H */
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16782048001545(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
8ac6e619d9d5 ("riscv: Add header include guards to insn.h")
47f05757d3d8 ("RISC-V: add helpers for handling immediates in U-type and I-type pairs")
c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h")
ec5f90877516 ("RISC-V: Move riscv_insn_is_* macros into a common header")
bf0cc402d7cd ("RISC-V: add prefix to all constants/macros in parse_asm.h")
a3775634f6da ("RISC-V: fix funct4 definition for c.jalr in parse_asm.h")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1(a)huawei.com>
Date: Sun, 29 Jan 2023 17:42:42 +0800
Subject: [PATCH] riscv: Add header include guards to insn.h
Add header include guards to insn.h to prevent repeating declaration of
any identifiers in insn.h.
Fixes: edde5584c7ab ("riscv: Add SW single-step support for KDB")
Signed-off-by: Liao Chang <liaochang1(a)huawei.com>
Reviewed-by: Andrew Jones <ajones(a)ventanamicro.com>
Fixes: c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h")
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230129094242.282620-1-liaochang1@huawei.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 6567cd5ed6ba..8d5c84f2d5ef 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -3,6 +3,9 @@
* Copyright (C) 2020 SiFive
*/
+#ifndef _ASM_RISCV_INSN_H
+#define _ASM_RISCV_INSN_H
+
#include <linux/bits.h>
#define RV_INSN_FUNCT3_MASK GENMASK(14, 12)
@@ -375,3 +378,4 @@ static inline void riscv_insn_insert_utype_itype_imm(u32 *utype_insn, u32 *itype
*utype_insn |= (imm & RV_U_IMM_31_12_MASK) + ((imm & BIT(11)) << 1);
*itype_insn |= ((imm & RV_I_IMM_11_0_MASK) << RV_I_IMM_11_0_OPOFF);
}
+#endif /* _ASM_RISCV_INSN_H */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8547649981e6631328cd64f583667501ae385531
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820471419475(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
8547649981e6 ("riscv: ftrace: Fixup panic by disabling preemption")
f32b4b467ebd ("RISC-V: enable dynamic ftrace for RV32I")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8547649981e6631328cd64f583667501ae385531 Mon Sep 17 00:00:00 2001
From: Andy Chiu <andy.chiu(a)sifive.com>
Date: Thu, 12 Jan 2023 04:05:57 -0500
Subject: [PATCH] riscv: ftrace: Fixup panic by disabling preemption
In RISCV, we must use an AUIPC + JALR pair to encode an immediate,
forming a jump that jumps to an address over 4K. This may cause errors
if we want to enable kernel preemption and remove dependency from
patching code with stop_machine(). For example, if a task was switched
out on auipc. And, if we changed the ftrace function before it was
switched back, then it would jump to an address that has updated 11:0
bits mixing with previous XLEN:12 part.
p: patched area performed by dynamic ftrace
ftrace_prologue:
p| REG_S ra, -SZREG(sp)
p| auipc ra, 0x? ------------> preempted
...
change ftrace function
...
p| jalr -?(ra) <------------- switched back
p| REG_L ra, -SZREG(sp)
func:
xxx
ret
Fixes: afc76b8b8011 ("riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")
Signed-off-by: Andy Chiu <andy.chiu(a)sifive.com>
Signed-off-by: Guo Ren <guoren(a)kernel.org>
Link: https://lore.kernel.org/r/20230112090603.1295340-2-guoren@kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e2b656043abf..ee0d39b26794 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -138,7 +138,7 @@ config RISCV
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_TRACER if !XIP_KERNEL
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 8547649981e6631328cd64f583667501ae385531
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678204713100113(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
8547649981e6 ("riscv: ftrace: Fixup panic by disabling preemption")
f32b4b467ebd ("RISC-V: enable dynamic ftrace for RV32I")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8547649981e6631328cd64f583667501ae385531 Mon Sep 17 00:00:00 2001
From: Andy Chiu <andy.chiu(a)sifive.com>
Date: Thu, 12 Jan 2023 04:05:57 -0500
Subject: [PATCH] riscv: ftrace: Fixup panic by disabling preemption
In RISCV, we must use an AUIPC + JALR pair to encode an immediate,
forming a jump that jumps to an address over 4K. This may cause errors
if we want to enable kernel preemption and remove dependency from
patching code with stop_machine(). For example, if a task was switched
out on auipc. And, if we changed the ftrace function before it was
switched back, then it would jump to an address that has updated 11:0
bits mixing with previous XLEN:12 part.
p: patched area performed by dynamic ftrace
ftrace_prologue:
p| REG_S ra, -SZREG(sp)
p| auipc ra, 0x? ------------> preempted
...
change ftrace function
...
p| jalr -?(ra) <------------- switched back
p| REG_L ra, -SZREG(sp)
func:
xxx
ret
Fixes: afc76b8b8011 ("riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")
Signed-off-by: Andy Chiu <andy.chiu(a)sifive.com>
Signed-off-by: Guo Ren <guoren(a)kernel.org>
Link: https://lore.kernel.org/r/20230112090603.1295340-2-guoren@kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e2b656043abf..ee0d39b26794 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -138,7 +138,7 @@ config RISCV
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_TRACER if !XIP_KERNEL
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 130aee3fd9981297ff9354e5d5609cd59aafbbea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678204667104220(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
130aee3fd998 ("riscv: Avoid enabling interrupts in die()")
f2913d006fcd ("RISC-V: Avoid dereferening NULL regs in die()")
3f1901110a89 ("RISC-V: Add fast call path of crash_kexec()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 130aee3fd9981297ff9354e5d5609cd59aafbbea Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mnissler(a)rivosinc.com>
Date: Wed, 15 Feb 2023 14:48:28 +0000
Subject: [PATCH] riscv: Avoid enabling interrupts in die()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
While working on something else, I noticed that the kernel would start
accepting interrupts again after crashing in an interrupt handler. Since
the kernel is already in inconsistent state, enabling interrupts is
dangerous and opens up risk of kernel state deteriorating further.
Interrupts do get enabled via what looks like an unintended side effect of
spin_unlock_irq, so switch to the more cautious
spin_lock_irqsave/spin_unlock_irqrestore instead.
Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code")
Signed-off-by: Mattias Nissler <mnissler(a)rivosinc.com>
Reviewed-by: Björn Töpel <bjorn(a)kernel.org>
Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 549bde5c970a..70c98ce23be2 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -34,10 +34,11 @@ void die(struct pt_regs *regs, const char *str)
static int die_counter;
int ret;
long cause;
+ unsigned long flags;
oops_enter();
- spin_lock_irq(&die_lock);
+ spin_lock_irqsave(&die_lock, flags);
console_verbose();
bust_spinlocks(1);
@@ -54,7 +55,7 @@ void die(struct pt_regs *regs, const char *str)
bust_spinlocks(0);
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
- spin_unlock_irq(&die_lock);
+ spin_unlock_irqrestore(&die_lock, flags);
oops_exit();
if (in_interrupt())
Starting with release 10.38 PCRE2 drops default support for using \K in
lookaround patterns as described in [1]. Unfortunately, scripts/tags.sh
relies on such functionality to collect all_compiled_soures() leading to
the following error:
$ make COMPILED_SOURCE=1 tags
GEN tags
grep: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
The usage of \K for this pattern was introduced in commit 4f491bb6ea2a
("scripts/tags.sh: collect compiled source precisely") which speeds up
the generation of tags significantly.
In order to fix this issue without compromising the performance we can
switch over to an equivalent sed expression. The same matching pattern
is preserved here except \K is replaced with a backreference \1.
[1] https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC11
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Cristian Ciocaltea <cristian.ciocaltea(a)collabora.com>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Jialu Xu <xujialu(a)vimux.org>
Cc: Vipin Sharma <vipinsh(a)google.com>
Cc: stable(a)vger.kernel.org
Fixes: 4f491bb6ea2a ("scripts/tags.sh: collect compiled source precisely")
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
---
v2: add Fixes tag and cc stable
scripts/tags.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/tags.sh b/scripts/tags.sh
index e137cf15aae9..0d045182c08c 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -91,7 +91,7 @@ all_compiled_sources()
{
echo include/generated/autoconf.h
find $ignore -name "*.cmd" -exec \
- grep -Poh '(?(?=^source_.* \K).*|(?=^ \K\S).*(?= \\))' {} \+ |
+ sed -n -E 's/^source_.* (.*)/\1/p; s/^ (\S.*) \\/\1/p' {} \+ |
awk '!a[$0]++'
} | xargs realpath -esq $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) |
sort -u
--
2.39.1.637.g21b0678d19-goog
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819226214734(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
8eb29c4fbf96 ("dm flakey: fix a bug with 32-bit highmem systems")
f50714b57aec ("dm flakey: don't corrupt the zero page")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:03:31 -0500
Subject: [PATCH] dm flakey: fix a bug with 32-bit highmem systems
The function page_address does not work with 32-bit systems with high
memory. Use bvec_kmap_local/kunmap_local instead.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ff9ca5b2a47e..33608d436cec 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -307,8 +307,9 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
struct page *page = bio_iter_page(bio, iter);
if (unlikely(page == ZERO_PAGE(0)))
break;
- segment = (page_address(page) + bio_iter_offset(bio, iter));
+ segment = bvec_kmap_local(&bvec);
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
+ kunmap_local(segment);
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819226210440(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8eb29c4fbf96 ("dm flakey: fix a bug with 32-bit highmem systems")
f50714b57aec ("dm flakey: don't corrupt the zero page")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:03:31 -0500
Subject: [PATCH] dm flakey: fix a bug with 32-bit highmem systems
The function page_address does not work with 32-bit systems with high
memory. Use bvec_kmap_local/kunmap_local instead.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ff9ca5b2a47e..33608d436cec 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -307,8 +307,9 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
struct page *page = bio_iter_page(bio, iter);
if (unlikely(page == ZERO_PAGE(0)))
break;
- segment = (page_address(page) + bio_iter_offset(bio, iter));
+ segment = bvec_kmap_local(&bvec);
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
+ kunmap_local(segment);
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678192261241235(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8eb29c4fbf96 ("dm flakey: fix a bug with 32-bit highmem systems")
f50714b57aec ("dm flakey: don't corrupt the zero page")
a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:03:31 -0500
Subject: [PATCH] dm flakey: fix a bug with 32-bit highmem systems
The function page_address does not work with 32-bit systems with high
memory. Use bvec_kmap_local/kunmap_local instead.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ff9ca5b2a47e..33608d436cec 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -307,8 +307,9 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
struct page *page = bio_iter_page(bio, iter);
if (unlikely(page == ZERO_PAGE(0)))
break;
- segment = (page_address(page) + bio_iter_offset(bio, iter));
+ segment = bvec_kmap_local(&bvec);
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
+ kunmap_local(segment);
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819226054249(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
8eb29c4fbf96 ("dm flakey: fix a bug with 32-bit highmem systems")
f50714b57aec ("dm flakey: don't corrupt the zero page")
a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:03:31 -0500
Subject: [PATCH] dm flakey: fix a bug with 32-bit highmem systems
The function page_address does not work with 32-bit systems with high
memory. Use bvec_kmap_local/kunmap_local instead.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ff9ca5b2a47e..33608d436cec 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -307,8 +307,9 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
struct page *page = bio_iter_page(bio, iter);
if (unlikely(page == ZERO_PAGE(0)))
break;
- segment = (page_address(page) + bio_iter_offset(bio, iter));
+ segment = bvec_kmap_local(&bvec);
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
+ kunmap_local(segment);
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f50714b57aecb6b3dc81d578e295f86d9c73f078
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678192217253116(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
f50714b57aec ("dm flakey: don't corrupt the zero page")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f50714b57aecb6b3dc81d578e295f86d9c73f078 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:02:57 -0500
Subject: [PATCH] dm flakey: don't corrupt the zero page
When we need to zero some range on a block device, the function
__blkdev_issue_zero_pages submits a write bio with the bio vector pointing
to the zero page. If we use dm-flakey with corrupt bio writes option, it
will corrupt the content of the zero page which results in crashes of
various userspace programs. Glibc assumes that memory returned by mmap is
zeroed and it uses it for calloc implementation; if the newly mapped
memory is not zeroed, calloc will return non-zeroed memory.
Fix this bug by testing if the page is equal to ZERO_PAGE(0) and
avoiding the corruption in this case.
Cc: stable(a)vger.kernel.org
Fixes: a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 89fa7a68c6c4..ff9ca5b2a47e 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -303,8 +303,11 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
*/
bio_for_each_segment(bvec, bio, iter) {
if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
- char *segment = (page_address(bio_iter_page(bio, iter))
- + bio_iter_offset(bio, iter));
+ char *segment;
+ struct page *page = bio_iter_page(bio, iter);
+ if (unlikely(page == ZERO_PAGE(0)))
+ break;
+ segment = (page_address(page) + bio_iter_offset(bio, iter));
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f50714b57aecb6b3dc81d578e295f86d9c73f078
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819221615494(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f50714b57aec ("dm flakey: don't corrupt the zero page")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f50714b57aecb6b3dc81d578e295f86d9c73f078 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:02:57 -0500
Subject: [PATCH] dm flakey: don't corrupt the zero page
When we need to zero some range on a block device, the function
__blkdev_issue_zero_pages submits a write bio with the bio vector pointing
to the zero page. If we use dm-flakey with corrupt bio writes option, it
will corrupt the content of the zero page which results in crashes of
various userspace programs. Glibc assumes that memory returned by mmap is
zeroed and it uses it for calloc implementation; if the newly mapped
memory is not zeroed, calloc will return non-zeroed memory.
Fix this bug by testing if the page is equal to ZERO_PAGE(0) and
avoiding the corruption in this case.
Cc: stable(a)vger.kernel.org
Fixes: a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 89fa7a68c6c4..ff9ca5b2a47e 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -303,8 +303,11 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
*/
bio_for_each_segment(bvec, bio, iter) {
if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
- char *segment = (page_address(bio_iter_page(bio, iter))
- + bio_iter_offset(bio, iter));
+ char *segment;
+ struct page *page = bio_iter_page(bio, iter);
+ if (unlikely(page == ZERO_PAGE(0)))
+ break;
+ segment = (page_address(page) + bio_iter_offset(bio, iter));
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f50714b57aecb6b3dc81d578e295f86d9c73f078
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678192215115168(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
f50714b57aec ("dm flakey: don't corrupt the zero page")
a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f50714b57aecb6b3dc81d578e295f86d9c73f078 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:02:57 -0500
Subject: [PATCH] dm flakey: don't corrupt the zero page
When we need to zero some range on a block device, the function
__blkdev_issue_zero_pages submits a write bio with the bio vector pointing
to the zero page. If we use dm-flakey with corrupt bio writes option, it
will corrupt the content of the zero page which results in crashes of
various userspace programs. Glibc assumes that memory returned by mmap is
zeroed and it uses it for calloc implementation; if the newly mapped
memory is not zeroed, calloc will return non-zeroed memory.
Fix this bug by testing if the page is equal to ZERO_PAGE(0) and
avoiding the corruption in this case.
Cc: stable(a)vger.kernel.org
Fixes: a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 89fa7a68c6c4..ff9ca5b2a47e 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -303,8 +303,11 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
*/
bio_for_each_segment(bvec, bio, iter) {
if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
- char *segment = (page_address(bio_iter_page(bio, iter))
- + bio_iter_offset(bio, iter));
+ char *segment;
+ struct page *page = bio_iter_page(bio, iter);
+ if (unlikely(page == ZERO_PAGE(0)))
+ break;
+ segment = (page_address(page) + bio_iter_offset(bio, iter));
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x f50714b57aecb6b3dc81d578e295f86d9c73f078
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819221463150(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
f50714b57aec ("dm flakey: don't corrupt the zero page")
a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f50714b57aecb6b3dc81d578e295f86d9c73f078 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 22 Jan 2023 14:02:57 -0500
Subject: [PATCH] dm flakey: don't corrupt the zero page
When we need to zero some range on a block device, the function
__blkdev_issue_zero_pages submits a write bio with the bio vector pointing
to the zero page. If we use dm-flakey with corrupt bio writes option, it
will corrupt the content of the zero page which results in crashes of
various userspace programs. Glibc assumes that memory returned by mmap is
zeroed and it uses it for calloc implementation; if the newly mapped
memory is not zeroed, calloc will return non-zeroed memory.
Fix this bug by testing if the page is equal to ZERO_PAGE(0) and
avoiding the corruption in this case.
Cc: stable(a)vger.kernel.org
Fixes: a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.")
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 89fa7a68c6c4..ff9ca5b2a47e 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -303,8 +303,11 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
*/
bio_for_each_segment(bvec, bio, iter) {
if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
- char *segment = (page_address(bio_iter_page(bio, iter))
- + bio_iter_offset(bio, iter));
+ char *segment;
+ struct page *page = bio_iter_page(bio, iter);
+ if (unlikely(page == ZERO_PAGE(0)))
+ break;
+ segment = (page_address(page) + bio_iter_offset(bio, iter));
segment[corrupt_bio_byte] = fc->corrupt_bio_value;
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 7e8a936a2d0f98dd6e5d05d4838affabe606cabc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678191630243162(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
7e8a936a2d0f ("scsi: qla2xxx: Remove unintended flag clearing")
31e6cdbe0eae ("scsi: qla2xxx: Implement ref count for SRB")
d4523bd6fd5d ("scsi: qla2xxx: Refactor asynchronous command initialization")
2cabf10dbbe3 ("scsi: qla2xxx: Fix hang on NVMe command timeouts")
e3d2612f583b ("scsi: qla2xxx: Fix use after free in debug code")
9efea843a906 ("scsi: qla2xxx: edif: Add detection of secure device")
dd30706e73b7 ("scsi: qla2xxx: edif: Add key update")
fac2807946c1 ("scsi: qla2xxx: edif: Add extraction of auth_els from the wire")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7878f22a2e03 ("scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
960204ecca5e ("scsi: qla2xxx: Simplify if statement")
a04658594399 ("scsi: qla2xxx: Wait for ABTS response on I/O timeouts for NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
605e74025f95 ("scsi: qla2xxx: Move sess cmd list/lock to driver")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7e8a936a2d0f98dd6e5d05d4838affabe606cabc Mon Sep 17 00:00:00 2001
From: Quinn Tran <qutran(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:44 -0800
Subject: [PATCH] scsi: qla2xxx: Remove unintended flag clearing
FCF_ASYNC_SENT flag is used in session management. This flag is cleared in
task management path by accident. Remove unintended flag clearing.
Fixes: 388a49959ee4 ("scsi: qla2xxx: Fix panic from use after free in qla2x00_async_tm_cmd")
Cc: stable(a)vger.kernel.org
Signed-off-by: Quinn Tran <qutran(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 745fee298d56..6968e8d08968 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2082,7 +2082,6 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
done_free_sp:
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
- fcport->flags &= ~FCF_ASYNC_SENT;
done:
return rval;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7e8a936a2d0f98dd6e5d05d4838affabe606cabc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819162916579(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
7e8a936a2d0f ("scsi: qla2xxx: Remove unintended flag clearing")
31e6cdbe0eae ("scsi: qla2xxx: Implement ref count for SRB")
d4523bd6fd5d ("scsi: qla2xxx: Refactor asynchronous command initialization")
2cabf10dbbe3 ("scsi: qla2xxx: Fix hang on NVMe command timeouts")
e3d2612f583b ("scsi: qla2xxx: Fix use after free in debug code")
9efea843a906 ("scsi: qla2xxx: edif: Add detection of secure device")
dd30706e73b7 ("scsi: qla2xxx: edif: Add key update")
fac2807946c1 ("scsi: qla2xxx: edif: Add extraction of auth_els from the wire")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7878f22a2e03 ("scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
960204ecca5e ("scsi: qla2xxx: Simplify if statement")
a04658594399 ("scsi: qla2xxx: Wait for ABTS response on I/O timeouts for NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
605e74025f95 ("scsi: qla2xxx: Move sess cmd list/lock to driver")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7e8a936a2d0f98dd6e5d05d4838affabe606cabc Mon Sep 17 00:00:00 2001
From: Quinn Tran <qutran(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:44 -0800
Subject: [PATCH] scsi: qla2xxx: Remove unintended flag clearing
FCF_ASYNC_SENT flag is used in session management. This flag is cleared in
task management path by accident. Remove unintended flag clearing.
Fixes: 388a49959ee4 ("scsi: qla2xxx: Fix panic from use after free in qla2x00_async_tm_cmd")
Cc: stable(a)vger.kernel.org
Signed-off-by: Quinn Tran <qutran(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 745fee298d56..6968e8d08968 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2082,7 +2082,6 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
done_free_sp:
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
- fcport->flags &= ~FCF_ASYNC_SENT;
done:
return rval;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7e8a936a2d0f98dd6e5d05d4838affabe606cabc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678191628234164(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
7e8a936a2d0f ("scsi: qla2xxx: Remove unintended flag clearing")
31e6cdbe0eae ("scsi: qla2xxx: Implement ref count for SRB")
d4523bd6fd5d ("scsi: qla2xxx: Refactor asynchronous command initialization")
2cabf10dbbe3 ("scsi: qla2xxx: Fix hang on NVMe command timeouts")
e3d2612f583b ("scsi: qla2xxx: Fix use after free in debug code")
9efea843a906 ("scsi: qla2xxx: edif: Add detection of secure device")
dd30706e73b7 ("scsi: qla2xxx: edif: Add key update")
fac2807946c1 ("scsi: qla2xxx: edif: Add extraction of auth_els from the wire")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7878f22a2e03 ("scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
960204ecca5e ("scsi: qla2xxx: Simplify if statement")
a04658594399 ("scsi: qla2xxx: Wait for ABTS response on I/O timeouts for NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
605e74025f95 ("scsi: qla2xxx: Move sess cmd list/lock to driver")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7e8a936a2d0f98dd6e5d05d4838affabe606cabc Mon Sep 17 00:00:00 2001
From: Quinn Tran <qutran(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:44 -0800
Subject: [PATCH] scsi: qla2xxx: Remove unintended flag clearing
FCF_ASYNC_SENT flag is used in session management. This flag is cleared in
task management path by accident. Remove unintended flag clearing.
Fixes: 388a49959ee4 ("scsi: qla2xxx: Fix panic from use after free in qla2x00_async_tm_cmd")
Cc: stable(a)vger.kernel.org
Signed-off-by: Quinn Tran <qutran(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 745fee298d56..6968e8d08968 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2082,7 +2082,6 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
done_free_sp:
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
- fcport->flags &= ~FCF_ASYNC_SENT;
done:
return rval;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 0c227dc22ca18856055983f27594feb2e0149965
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819159220160(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
0c227dc22ca1 ("scsi: qla2xxx: Check if port is online before sending ELS")
7fb223d0ad80 ("scsi: qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
3e6efab865ac ("scsi: qla2xxx: Fix reset of MPI firmware")
44f5a37d1e3e ("scsi: qla2xxx: Fix buffer-buffer credit extraction error")
89c72f4245a8 ("scsi: qla2xxx: Add IOCB resource tracking")
49db4d4e02aa ("scsi: qla2xxx: Performance tweak")
1e98fb0f9208 ("scsi: qla2xxx: Setup debugfs entries for remote ports")
897d68eb816b ("scsi: qla2xxx: Fix WARN_ON in qla_nvme_register_hba")
81b9d1e19d62 ("scsi: qla2xxx: Reduce noisy debug message")
9f2475fe7406 ("scsi: qla2xxx: SAN congestion management implementation")
62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests")
818dbde78e0f ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c227dc22ca18856055983f27594feb2e0149965 Mon Sep 17 00:00:00 2001
From: Shreyas Deodhar <sdeodhar(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:38 -0800
Subject: [PATCH] scsi: qla2xxx: Check if port is online before sending ELS
CT Ping and ELS cmds fail for NVMe targets. Check if port is online before
sending ELS instead of sending login.
Cc: stable(a)vger.kernel.org
Signed-off-by: Shreyas Deodhar <sdeodhar(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index cd75b179410d..dba7bba788d7 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -278,8 +278,8 @@ qla2x00_process_els(struct bsg_job *bsg_job)
const char *type;
int req_sg_cnt, rsp_sg_cnt;
int rval = (DID_ERROR << 16);
- uint16_t nextlid = 0;
uint32_t els_cmd = 0;
+ int qla_port_allocated = 0;
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
rport = fc_bsg_to_rport(bsg_job);
@@ -329,9 +329,9 @@ qla2x00_process_els(struct bsg_job *bsg_job)
/* make sure the rport is logged in,
* if not perform fabric login
*/
- if (qla2x00_fabric_login(vha, fcport, &nextlid)) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE) {
ql_dbg(ql_dbg_user, vha, 0x7003,
- "Failed to login port %06X for ELS passthru.\n",
+ "Port %06X is not online for ELS passthru.\n",
fcport->d_id.b24);
rval = -EIO;
goto done;
@@ -348,6 +348,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done;
}
+ qla_port_allocated = 1;
/* Initialize all required fields of fcport */
fcport->vha = vha;
fcport->d_id.b.al_pa =
@@ -432,7 +433,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done_free_fcport;
done_free_fcport:
- if (bsg_request->msgcode != FC_BSG_RPT_ELS)
+ if (qla_port_allocated)
qla2x00_free_fcport(fcport);
done:
return rval;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 0c227dc22ca18856055983f27594feb2e0149965
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678191591182209(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
0c227dc22ca1 ("scsi: qla2xxx: Check if port is online before sending ELS")
7fb223d0ad80 ("scsi: qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
3e6efab865ac ("scsi: qla2xxx: Fix reset of MPI firmware")
44f5a37d1e3e ("scsi: qla2xxx: Fix buffer-buffer credit extraction error")
89c72f4245a8 ("scsi: qla2xxx: Add IOCB resource tracking")
49db4d4e02aa ("scsi: qla2xxx: Performance tweak")
1e98fb0f9208 ("scsi: qla2xxx: Setup debugfs entries for remote ports")
897d68eb816b ("scsi: qla2xxx: Fix WARN_ON in qla_nvme_register_hba")
81b9d1e19d62 ("scsi: qla2xxx: Reduce noisy debug message")
9f2475fe7406 ("scsi: qla2xxx: SAN congestion management implementation")
62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests")
818dbde78e0f ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c227dc22ca18856055983f27594feb2e0149965 Mon Sep 17 00:00:00 2001
From: Shreyas Deodhar <sdeodhar(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:38 -0800
Subject: [PATCH] scsi: qla2xxx: Check if port is online before sending ELS
CT Ping and ELS cmds fail for NVMe targets. Check if port is online before
sending ELS instead of sending login.
Cc: stable(a)vger.kernel.org
Signed-off-by: Shreyas Deodhar <sdeodhar(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index cd75b179410d..dba7bba788d7 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -278,8 +278,8 @@ qla2x00_process_els(struct bsg_job *bsg_job)
const char *type;
int req_sg_cnt, rsp_sg_cnt;
int rval = (DID_ERROR << 16);
- uint16_t nextlid = 0;
uint32_t els_cmd = 0;
+ int qla_port_allocated = 0;
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
rport = fc_bsg_to_rport(bsg_job);
@@ -329,9 +329,9 @@ qla2x00_process_els(struct bsg_job *bsg_job)
/* make sure the rport is logged in,
* if not perform fabric login
*/
- if (qla2x00_fabric_login(vha, fcport, &nextlid)) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE) {
ql_dbg(ql_dbg_user, vha, 0x7003,
- "Failed to login port %06X for ELS passthru.\n",
+ "Port %06X is not online for ELS passthru.\n",
fcport->d_id.b24);
rval = -EIO;
goto done;
@@ -348,6 +348,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done;
}
+ qla_port_allocated = 1;
/* Initialize all required fields of fcport */
fcport->vha = vha;
fcport->d_id.b.al_pa =
@@ -432,7 +433,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done_free_fcport;
done_free_fcport:
- if (bsg_request->msgcode != FC_BSG_RPT_ELS)
+ if (qla_port_allocated)
qla2x00_free_fcport(fcport);
done:
return rval;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 0c227dc22ca18856055983f27594feb2e0149965
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819159023047(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
0c227dc22ca1 ("scsi: qla2xxx: Check if port is online before sending ELS")
7fb223d0ad80 ("scsi: qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
3e6efab865ac ("scsi: qla2xxx: Fix reset of MPI firmware")
44f5a37d1e3e ("scsi: qla2xxx: Fix buffer-buffer credit extraction error")
89c72f4245a8 ("scsi: qla2xxx: Add IOCB resource tracking")
49db4d4e02aa ("scsi: qla2xxx: Performance tweak")
1e98fb0f9208 ("scsi: qla2xxx: Setup debugfs entries for remote ports")
897d68eb816b ("scsi: qla2xxx: Fix WARN_ON in qla_nvme_register_hba")
81b9d1e19d62 ("scsi: qla2xxx: Reduce noisy debug message")
9f2475fe7406 ("scsi: qla2xxx: SAN congestion management implementation")
62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests")
818dbde78e0f ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c227dc22ca18856055983f27594feb2e0149965 Mon Sep 17 00:00:00 2001
From: Shreyas Deodhar <sdeodhar(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:38 -0800
Subject: [PATCH] scsi: qla2xxx: Check if port is online before sending ELS
CT Ping and ELS cmds fail for NVMe targets. Check if port is online before
sending ELS instead of sending login.
Cc: stable(a)vger.kernel.org
Signed-off-by: Shreyas Deodhar <sdeodhar(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index cd75b179410d..dba7bba788d7 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -278,8 +278,8 @@ qla2x00_process_els(struct bsg_job *bsg_job)
const char *type;
int req_sg_cnt, rsp_sg_cnt;
int rval = (DID_ERROR << 16);
- uint16_t nextlid = 0;
uint32_t els_cmd = 0;
+ int qla_port_allocated = 0;
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
rport = fc_bsg_to_rport(bsg_job);
@@ -329,9 +329,9 @@ qla2x00_process_els(struct bsg_job *bsg_job)
/* make sure the rport is logged in,
* if not perform fabric login
*/
- if (qla2x00_fabric_login(vha, fcport, &nextlid)) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE) {
ql_dbg(ql_dbg_user, vha, 0x7003,
- "Failed to login port %06X for ELS passthru.\n",
+ "Port %06X is not online for ELS passthru.\n",
fcport->d_id.b24);
rval = -EIO;
goto done;
@@ -348,6 +348,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done;
}
+ qla_port_allocated = 1;
/* Initialize all required fields of fcport */
fcport->vha = vha;
fcport->d_id.b.al_pa =
@@ -432,7 +433,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done_free_fcport;
done_free_fcport:
- if (bsg_request->msgcode != FC_BSG_RPT_ELS)
+ if (qla_port_allocated)
qla2x00_free_fcport(fcport);
done:
return rval;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0c227dc22ca18856055983f27594feb2e0149965
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819159028180(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
0c227dc22ca1 ("scsi: qla2xxx: Check if port is online before sending ELS")
7fb223d0ad80 ("scsi: qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()")
84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els")
7ebb336e45ef ("scsi: qla2xxx: edif: Add start + stop bsgs")
d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
f7a0ed479e66 ("scsi: qla2xxx: Fix crash in PCIe error handling")
2ce35c0821af ("scsi: qla2xxx: Fix use after free in bsg")
5777fef788a5 ("scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe")
dbf1f53cfd23 ("scsi: qla2xxx: Implementation to get and manage host, target stats and initiator port")
707531bc2626 ("scsi: qla2xxx: If fcport is undergoing deletion complete I/O with retry")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c227dc22ca18856055983f27594feb2e0149965 Mon Sep 17 00:00:00 2001
From: Shreyas Deodhar <sdeodhar(a)marvell.com>
Date: Mon, 19 Dec 2022 03:07:38 -0800
Subject: [PATCH] scsi: qla2xxx: Check if port is online before sending ELS
CT Ping and ELS cmds fail for NVMe targets. Check if port is online before
sending ELS instead of sending login.
Cc: stable(a)vger.kernel.org
Signed-off-by: Shreyas Deodhar <sdeodhar(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index cd75b179410d..dba7bba788d7 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -278,8 +278,8 @@ qla2x00_process_els(struct bsg_job *bsg_job)
const char *type;
int req_sg_cnt, rsp_sg_cnt;
int rval = (DID_ERROR << 16);
- uint16_t nextlid = 0;
uint32_t els_cmd = 0;
+ int qla_port_allocated = 0;
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
rport = fc_bsg_to_rport(bsg_job);
@@ -329,9 +329,9 @@ qla2x00_process_els(struct bsg_job *bsg_job)
/* make sure the rport is logged in,
* if not perform fabric login
*/
- if (qla2x00_fabric_login(vha, fcport, &nextlid)) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE) {
ql_dbg(ql_dbg_user, vha, 0x7003,
- "Failed to login port %06X for ELS passthru.\n",
+ "Port %06X is not online for ELS passthru.\n",
fcport->d_id.b24);
rval = -EIO;
goto done;
@@ -348,6 +348,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done;
}
+ qla_port_allocated = 1;
/* Initialize all required fields of fcport */
fcport->vha = vha;
fcport->d_id.b.al_pa =
@@ -432,7 +433,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
goto done_free_fcport;
done_free_fcport:
- if (bsg_request->msgcode != FC_BSG_RPT_ELS)
+ if (qla_port_allocated)
qla2x00_free_fcport(fcport);
done:
return rval;
In some cases, driver trees to send Status Stage twice.
The first one from upper layer of gadget usb subsystem and
second time from controller driver.
This patch fixes this issue and remove tricky handling of
SET_INTERFACE from controller driver which is no longer
needed.
cc: <stable(a)vger.kernel.org>
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
drivers/usb/cdns3/cdnsp-ep0.c | 19 +------------------
1 file changed, 1 insertion(+), 18 deletions(-)
diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c
index 9b8325f82499..d63d5d92f255 100644
--- a/drivers/usb/cdns3/cdnsp-ep0.c
+++ b/drivers/usb/cdns3/cdnsp-ep0.c
@@ -403,20 +403,6 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev,
case USB_REQ_SET_ISOCH_DELAY:
ret = cdnsp_ep0_set_isoch_delay(pdev, ctrl);
break;
- case USB_REQ_SET_INTERFACE:
- /*
- * Add request into pending list to block sending status stage
- * by libcomposite.
- */
- list_add_tail(&pdev->ep0_preq.list,
- &pdev->ep0_preq.pep->pending_list);
-
- ret = cdnsp_ep0_delegate_req(pdev, ctrl);
- if (ret == -EBUSY)
- ret = 0;
-
- list_del(&pdev->ep0_preq.list);
- break;
default:
ret = cdnsp_ep0_delegate_req(pdev, ctrl);
break;
@@ -474,9 +460,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
else
ret = cdnsp_ep0_delegate_req(pdev, ctrl);
- if (!len)
- pdev->ep0_stage = CDNSP_STATUS_STAGE;
-
if (ret == USB_GADGET_DELAYED_STATUS) {
trace_cdnsp_ep0_status_stage("delayed");
return;
@@ -484,6 +467,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
out:
if (ret < 0)
cdnsp_ep0_stall(pdev);
- else if (pdev->ep0_stage == CDNSP_STATUS_STAGE)
+ else if (!len && pdev->ep0_stage != CDNSP_STATUS_STAGE)
cdnsp_status_stage(pdev);
}
--
2.25.1
Hello Dear
How are you doing?I'm van der kuil Johannes gerhardus Maria. I am a
lawyer from the Netherlands who reside in Belgium and I am working on
the donation file of my client, Mr. Bartos Pierre Nationality of
Belgium. I would like to know if you will accept my
client's donation Mr. Bartos Pierre?
Waiting to hear from you soon
Commit 130a96d698d7 ("usb: typec: ucsi: acpi: Increase command
completion timeout value") increased the timeout from 5 seconds
to 60 seconds due to issues related to alternate mode discovery.
After the alternate mode discovery switch to polled mode
the timeout was reduced, but instead of being set back to
5 seconds it was reduced to 1 second.
This is causing problems when using a Lenovo ThinkPad X1 yoga gen7
connected over Type-C to a LG 27UL850-W (charging DP over Type-C).
When the monitor is already connected at boot the following error
is logged: "PPM init failed (-110)", /sys/class/typec is empty and
on unplugging the NULL pointer deref fixed earlier in this series
happens.
When the monitor is connected after boot the following error
is logged instead: "GET_CONNECTOR_STATUS failed (-110)".
Setting the timeout back to 5 seconds fixes both cases.
Fixes: e08065069fc7 ("usb: typec: ucsi: acpi: Reduce the command completion timeout")
Cc: stable(a)vger.kernel.org
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index ce0c8ef80c04..62206a6b8ea7 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
if (ret)
goto out_clear_bit;
- if (!wait_for_completion_timeout(&ua->complete, HZ))
+ if (!wait_for_completion_timeout(&ua->complete, 5 * HZ))
ret = -ETIMEDOUT;
out_clear_bit:
--
2.39.1
When ucsi_init() fails, ucsi->connector is NULL, yet in case of
ucsi_acpi we may still get events which cause the ucs_acpi code to call
ucsi_connector_change(), which then derefs the NULL ucsi->connector
pointer.
Fix this by not setting ucsi->ntfy inside ucsi_init() until ucsi_init()
has succeeded, so that ucsi_connector_change() ignores the events
because UCSI_ENABLE_NTFY_CONNECTOR_CHANGE is not set in the ntfy mask.
Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217106
Cc: stable(a)vger.kernel.org
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
- Delay setting ucsi->ntfy in ucsi_init() instead of adding a NULL pointer
check to ucsi_connector_change()
Changes in v3:
- Add Link tag to commitmsg
---
drivers/usb/typec/ucsi/ucsi.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 1cf8947c6d66..8cbbb002fefe 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1205,7 +1205,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
static int ucsi_init(struct ucsi *ucsi)
{
struct ucsi_connector *con;
- u64 command;
+ u64 command, ntfy;
int ret;
int i;
@@ -1217,8 +1217,8 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Enable basic notifications */
- ucsi->ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
+ ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
+ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
ret = ucsi_send_command(ucsi, command, NULL, 0);
if (ret < 0)
goto err_reset;
@@ -1250,12 +1250,13 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Enable all notifications */
- ucsi->ntfy = UCSI_ENABLE_NTFY_ALL;
- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
+ ntfy = UCSI_ENABLE_NTFY_ALL;
+ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
ret = ucsi_send_command(ucsi, command, NULL, 0);
if (ret < 0)
goto err_unregister;
+ ucsi->ntfy = ntfy;
return 0;
err_unregister:
--
2.39.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182911148202(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
03ebe48e235f ("Merge branches 'iommu/fixes', 'arm/renesas', 'arm/mediatek', 'arm/tegra', 'arm/omap', 'arm/smmu', 'x86/vt-d', 'x86/amd' and 'core' into next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182911197225(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
03ebe48e235f ("Merge branches 'iommu/fixes', 'arm/renesas', 'arm/mediatek', 'arm/tegra', 'arm/omap', 'arm/smmu', 'x86/vt-d', 'x86/amd' and 'core' into next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818291020229(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182909225188(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182908118156(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 4e7d2a8f0b52abf23b1dc13b3d88bc0923383cd5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818283215417(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
4e7d2a8f0b52 ("ktest.pl: Add RUN_TIMEOUT option with default unlimited")
3e1d3678844b ("ktest: Add CONNECT_TIMEOUT to change the connection timeout time")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e7d2a8f0b52abf23b1dc13b3d88bc0923383cd5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Wed, 18 Jan 2023 16:37:25 -0500
Subject: [PATCH] ktest.pl: Add RUN_TIMEOUT option with default unlimited
There is a disconnect between the run_command function and the
wait_for_input. The wait_for_input has a default timeout of 2 minutes. But
if that happens, the run_command loop will exit out to the waitpid() of
the executing command. This fails in that it no longer monitors the
command, and also, the ssh to the test box can hang when its finished, as
it's waiting for the pipe it's writing to to flush, but the loop that
reads that pipe has already exited, leaving the command stuck, and the
test hangs.
Instead, make the default "wait_for_input" of the run_command infinite,
and allow the user to override it if they want with a default timeout
option "RUN_TIMEOUT".
But this fixes the hang that happens when the pipe is full and the ssh
session never exits.
Cc: stable(a)vger.kernel.org
Fixes: 6e98d1b4415fe ("ktest: Add timeout to ssh command")
Signed-off-by: Steven Rostedt <rostedt(a)goodmis.org>
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 74801811372f..822794ca4029 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -178,6 +178,7 @@ my $store_failures;
my $store_successes;
my $test_name;
my $timeout;
+my $run_timeout;
my $connect_timeout;
my $config_bisect_exec;
my $booted_timeout;
@@ -340,6 +341,7 @@ my %option_map = (
"STORE_SUCCESSES" => \$store_successes,
"TEST_NAME" => \$test_name,
"TIMEOUT" => \$timeout,
+ "RUN_TIMEOUT" => \$run_timeout,
"CONNECT_TIMEOUT" => \$connect_timeout,
"CONFIG_BISECT_EXEC" => \$config_bisect_exec,
"BOOTED_TIMEOUT" => \$booted_timeout,
@@ -1858,6 +1860,14 @@ sub run_command {
$command =~ s/\$SSH_USER/$ssh_user/g;
$command =~ s/\$MACHINE/$machine/g;
+ if (!defined($timeout)) {
+ $timeout = $run_timeout;
+ }
+
+ if (!defined($timeout)) {
+ $timeout = -1; # tell wait_for_input to wait indefinitely
+ }
+
doprint("$command ... ");
$start_time = time;
@@ -1884,13 +1894,10 @@ sub run_command {
while (1) {
my $fp = \*CMD;
- if (defined($timeout)) {
- doprint "timeout = $timeout\n";
- }
my $line = wait_for_input($fp, $timeout);
if (!defined($line)) {
my $now = time;
- if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+ if ($timeout >= 0 && (($now - $start_time) >= $timeout)) {
doprint "Hit timeout of $timeout, killing process\n";
$hit_timeout = 1;
kill 9, $pid;
@@ -2062,6 +2069,11 @@ sub wait_for_input {
$time = $timeout;
}
+ if ($time < 0) {
+ # Negative number means wait indefinitely
+ undef $time;
+ }
+
$rin = '';
vec($rin, fileno($fp), 1) = 1;
vec($rin, fileno(\*STDIN), 1) = 1;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 2d0fe15a096d..f43477a9b857 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -817,6 +817,11 @@
# is issued instead of a reboot.
# CONNECT_TIMEOUT = 25
+# The timeout in seconds for how long to wait for any running command
+# to timeout. If not defined, it will let it go indefinitely.
+# (default undefined)
+#RUN_TIMEOUT = 600
+
# In between tests, a reboot of the box may occur, and this
# is the time to wait for the console after it stops producing
# output. Some machines may not produce a large lag on reboot
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182774218237(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818277111745(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182764221188(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781827635613(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781827629218(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 83d29d439cd3ef23041570d55841f814af2ecac0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818274565224(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
83d29d439cd3 ("ktest.pl: Give back console on Ctrt^C on monitor")
c2d84ddb338c ("ktest.pl: Add MAIL_COMMAND option to define how to send email")
59f89eb1e3dd ("ktest.pl: Use run_command to execute sending mail")
8604b0c4bc9a ("ktest.pl: Kill test if mailer is not supported")
be1546b87f3b ("ktest.pl: Add MAIL_PATH option to define where to find the mailer")
f5ef48855733 ("ktest.pl: No need to print no mailer is specified when mailto is not")
eaaa1e283ada ("Ktest: add email options to sample.config")
92db453e7eeb ("Ktest: Add SigInt handling")
2ceb2d85b669 ("Ktest: Add email support")
40667fb5fda0 ("ktest.pl: Make finding config-bisect.pl dynamic")
133087f0623e ("ktest.pl: Have ktest.pl pass -r to config-bisect.pl to reset bisect")
b337f9790a0c ("ktest.pl: Allow for the config-bisect.pl output to display to console")
a9adc261e978 ("ktest: Use config-bisect.pl in ktest.pl")
0f0db065999c ("ktest: Add standalone config-bisect.pl program")
3e1d3678844b ("ktest: Add CONNECT_TIMEOUT to change the connection timeout time")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 83d29d439cd3ef23041570d55841f814af2ecac0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Wed, 18 Jan 2023 16:32:13 -0500
Subject: [PATCH] ktest.pl: Give back console on Ctrt^C on monitor
When monitoring the console output, the stdout is being redirected to do
so. If Ctrl^C is hit during this mode, the stdout is not back to the
console, the user does not see anything they type (no echo).
Add "end_monitor" to the SIGINT interrupt handler to give back the console
on Ctrl^C.
Cc: stable(a)vger.kernel.org
Fixes: 9f2cdcbbb90e7 ("ktest: Give console process a dedicated tty")
Signed-off-by: Steven Rostedt <rostedt(a)goodmis.org>
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 62823a4232ab..74801811372f 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -4201,6 +4201,9 @@ sub send_email {
}
sub cancel_test {
+ if ($monitor_cnt) {
+ end_monitor;
+ }
if ($email_when_canceled) {
my $name = get_test_name;
send_email("KTEST: Your [$name] test was cancelled",
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818241553113(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e5cfefa97bcc ("block: fix scan partition for exclusively open device again")
0f77b29ad14e ("block: Revert "block: Do not reread partition table on exclusively open device"")
36369f46e917 ("block: Do not reread partition table on exclusively open device")
704b914f15fb ("blk-mq: move srcu from blk_mq_hw_ctx to request_queue")
2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock")
1e9c23034d7b ("blk-mq: move more plug handling from blk_mq_submit_bio into blk_add_rq_to_plug")
0c5bcc92d94a ("blk-mq: simplify the plug handling in blk_mq_submit_bio")
e16e506ccd67 ("block: merge disk_scan_partitions and blkdev_reread_part")
95febeb61bf8 ("block: fix missing queue put in error path")
b637108a4022 ("blk-mq: fix filesystem I/O request allocation")
b131f2011115 ("blk-mq: rename blk_attempt_bio_merge")
9ef4d0209cba ("blk-mq: add one API for waiting until quiesce is done")
900e08075202 ("block: move queue enter logic into blk_mq_submit_bio()")
c98cb5bbdab1 ("block: make bio_queue_enter() fast-path available inline")
71539717c105 ("block: split request allocation components into helpers")
a1cb65377e70 ("blk-mq: only try to run plug merge if request has same queue with incoming bio")
781dd830ec4f ("block: move RQF_ELV setting into allocators")
a2247f19ee1c ("block: Add independent access ranges support")
e94f68527a35 ("block: kill extra rcu lock/unlock in queue enter")
179ae84f7ef5 ("block: clean up blk_mq_submit_bio() merging")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Fri, 17 Feb 2023 10:22:00 +0800
Subject: [PATCH] block: fix scan partition for exclusively open device again
As explained in commit 36369f46e917 ("block: Do not reread partition table
on exclusively open device"), reread partition on the device that is
exclusively opened by someone else is problematic.
This patch will make sure partition scan will only be proceed if current
thread open the device exclusively, or the device is not opened
exclusively, and in the later case, other scanners and exclusive openers
will be blocked temporarily until partition scan is done.
Fixes: 10c70d95c0f2 ("block: remove the bd_openers checks in blk_drop_partitions")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20230217022200.3092987-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/genhd.c b/block/genhd.c
index 820e27d88cbf..c0a73cd76bb1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -359,6 +359,7 @@ EXPORT_SYMBOL_GPL(disk_uevent);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
{
struct block_device *bdev;
+ int ret = 0;
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
return -EINVAL;
@@ -368,11 +369,27 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
return -EBUSY;
set_bit(GD_NEED_PART_SCAN, &disk->state);
- bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
+ /*
+ * If the device is opened exclusively by current thread already, it's
+ * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
+ * synchronize with other exclusive openers and other partition
+ * scanners.
+ */
+ if (!(mode & FMODE_EXCL)) {
+ ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+ if (ret)
+ return ret;
+ }
+
+ bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- blkdev_put(bdev, mode);
- return 0;
+ ret = PTR_ERR(bdev);
+ else
+ blkdev_put(bdev, mode);
+
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(disk->part0, disk_scan_partitions);
+ return ret;
}
/**
@@ -494,6 +511,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
if (ret)
goto out_unregister_bdi;
+ /* Make sure the first partition scan will be proceed */
+ if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
+ !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+
bdev_add(disk->part0, ddev->devt);
if (get_capacity(disk))
disk_scan_partitions(disk, FMODE_READ);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6dd49d877584..9c5f637ff153 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -528,7 +528,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
- return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL);
+ return disk_scan_partitions(bdev->bd_disk, mode);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182415126165(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e5cfefa97bcc ("block: fix scan partition for exclusively open device again")
0f77b29ad14e ("block: Revert "block: Do not reread partition table on exclusively open device"")
36369f46e917 ("block: Do not reread partition table on exclusively open device")
704b914f15fb ("blk-mq: move srcu from blk_mq_hw_ctx to request_queue")
2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock")
1e9c23034d7b ("blk-mq: move more plug handling from blk_mq_submit_bio into blk_add_rq_to_plug")
0c5bcc92d94a ("blk-mq: simplify the plug handling in blk_mq_submit_bio")
e16e506ccd67 ("block: merge disk_scan_partitions and blkdev_reread_part")
95febeb61bf8 ("block: fix missing queue put in error path")
b637108a4022 ("blk-mq: fix filesystem I/O request allocation")
b131f2011115 ("blk-mq: rename blk_attempt_bio_merge")
9ef4d0209cba ("blk-mq: add one API for waiting until quiesce is done")
900e08075202 ("block: move queue enter logic into blk_mq_submit_bio()")
c98cb5bbdab1 ("block: make bio_queue_enter() fast-path available inline")
71539717c105 ("block: split request allocation components into helpers")
a1cb65377e70 ("blk-mq: only try to run plug merge if request has same queue with incoming bio")
781dd830ec4f ("block: move RQF_ELV setting into allocators")
a2247f19ee1c ("block: Add independent access ranges support")
e94f68527a35 ("block: kill extra rcu lock/unlock in queue enter")
179ae84f7ef5 ("block: clean up blk_mq_submit_bio() merging")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Fri, 17 Feb 2023 10:22:00 +0800
Subject: [PATCH] block: fix scan partition for exclusively open device again
As explained in commit 36369f46e917 ("block: Do not reread partition table
on exclusively open device"), reread partition on the device that is
exclusively opened by someone else is problematic.
This patch will make sure partition scan will only be proceed if current
thread open the device exclusively, or the device is not opened
exclusively, and in the later case, other scanners and exclusive openers
will be blocked temporarily until partition scan is done.
Fixes: 10c70d95c0f2 ("block: remove the bd_openers checks in blk_drop_partitions")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20230217022200.3092987-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/genhd.c b/block/genhd.c
index 820e27d88cbf..c0a73cd76bb1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -359,6 +359,7 @@ EXPORT_SYMBOL_GPL(disk_uevent);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
{
struct block_device *bdev;
+ int ret = 0;
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
return -EINVAL;
@@ -368,11 +369,27 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
return -EBUSY;
set_bit(GD_NEED_PART_SCAN, &disk->state);
- bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
+ /*
+ * If the device is opened exclusively by current thread already, it's
+ * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
+ * synchronize with other exclusive openers and other partition
+ * scanners.
+ */
+ if (!(mode & FMODE_EXCL)) {
+ ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+ if (ret)
+ return ret;
+ }
+
+ bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- blkdev_put(bdev, mode);
- return 0;
+ ret = PTR_ERR(bdev);
+ else
+ blkdev_put(bdev, mode);
+
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(disk->part0, disk_scan_partitions);
+ return ret;
}
/**
@@ -494,6 +511,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
if (ret)
goto out_unregister_bdi;
+ /* Make sure the first partition scan will be proceed */
+ if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
+ !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+
bdev_add(disk->part0, ddev->devt);
if (get_capacity(disk))
disk_scan_partitions(disk, FMODE_READ);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6dd49d877584..9c5f637ff153 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -528,7 +528,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
- return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL);
+ return disk_scan_partitions(bdev->bd_disk, mode);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 80d2c29e09e663761c2778167a625b25ffe01b6f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182329223204(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
80d2c29e09e6 ("regulator: core: Use ktime_get_boottime() to determine how long a regulator was off")
218320fec294 ("regulator: core: Fix off-on-delay-us for always-on/boot-on regulators")
261f06315cf7 ("regulator: Flag uncontrollable regulators as always_on")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 80d2c29e09e663761c2778167a625b25ffe01b6f Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka(a)chromium.org>
Date: Thu, 23 Feb 2023 00:33:30 +0000
Subject: [PATCH] regulator: core: Use ktime_get_boottime() to determine how
long a regulator was off
For regulators with 'off-on-delay-us' the regulator framework currently
uses ktime_get() to determine how long the regulator has been off
before re-enabling it (after a delay if needed). A problem with using
ktime_get() is that it doesn't account for the time the system is
suspended. As a result a regulator with a longer 'off-on-delay' (e.g.
500ms) that was switched off during suspend might still incurr in a
delay on resume before it is re-enabled, even though the regulator
might have been off for hours. ktime_get_boottime() accounts for
suspend time, use it instead of ktime_get().
Fixes: a8ce7bd89689 ("regulator: core: Fix off_on_delay handling")
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Stephen Boyd <swboyd(a)chromium.org>
Link: https://lore.kernel.org/r/20230223003301.v2.1.I9719661b8eb0a73b8c416f9c26cf…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index ae69e493913d..4fcd36055b02 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1584,7 +1584,7 @@ static int set_machine_constraints(struct regulator_dev *rdev)
}
if (rdev->desc->off_on_delay)
- rdev->last_off = ktime_get();
+ rdev->last_off = ktime_get_boottime();
/* If the constraints say the regulator should be on at this point
* and we have control then make sure it is enabled.
@@ -2673,7 +2673,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
* this regulator was disabled.
*/
ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay);
- s64 remaining = ktime_us_delta(end, ktime_get());
+ s64 remaining = ktime_us_delta(end, ktime_get_boottime());
if (remaining > 0)
_regulator_delay_helper(remaining);
@@ -2912,7 +2912,7 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
}
if (rdev->desc->off_on_delay)
- rdev->last_off = ktime_get();
+ rdev->last_off = ktime_get_boottime();
trace_regulator_disable_complete(rdev_get_name(rdev));
Hi,
This series of two patches fixes the issue introduced in
cf586021642d80 ("drm/i915/gt: Pipelined page migration") where,
as reported by Matt, in a chain of requests an error is reported
only if happens in the last request.
However Chris noticed that without ensuring exclusivity in the
locking we might end up in some deadlock. That's why patch 1
throttles for the ringspace in order to make sure that no one is
holding it.
Version 1 of this patch has been reviewed by matt and this
version is adding Chris exclusive locking.
Thanks Chris for this work.
Andi
Changelog
=========
v1 -> v2
- Add patch 1 for ensuring exclusive locking of the timeline
- Reword git commit of patch 2.
Andi Shyti (1):
drm/i915/gt: Make sure that errors are propagated through request
chains
Chris Wilson (1):
drm/i915: Throttle for ringspace prior to taking the timeline mutex
drivers/gpu/drm/i915/gt/intel_context.c | 41 +++++++++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_context.h | 2 ++
drivers/gpu/drm/i915/gt/intel_migrate.c | 39 +++++++++++++++++------
drivers/gpu/drm/i915/i915_request.c | 3 ++
4 files changed, 75 insertions(+), 10 deletions(-)
--
2.39.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e74a68468062d7ebd8ce17069e12ccc64cc6a58c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781822685041(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e74a68468062 ("arm64: Reset KASAN tag in copy_highpage with HW tags only")
d77e59a8fccd ("arm64: mte: Lock a page for MTE tag initialisation")
e059853d14ca ("arm64: mte: Fix/clarify the PG_mte_tagged semantics")
a8e5e5146ad0 ("arm64: mte: Avoid setting PG_mte_tagged if no tags cleared or restored")
20794545c146 ("arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"")
70c248aca9e7 ("mm: kasan: Skip unpoisoning of user pages")
da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()")
b1d0ec3a9a25 ("mm/shmem: convert shmem_getpage_gfp to use a folio")
72827e5c2bcb ("mm/shmem: convert shmem_alloc_and_acct_page to use a folio")
069d849cde3a ("mm/shmem: turn shmem_should_replace_page into shmem_should_replace_folio")
b7dd44a12cf2 ("mm/shmem: convert shmem_add_to_page_cache to take a folio")
dfe98499ef28 ("shmem: convert shmem_alloc_hugepage() to use vma_alloc_folio()")
e9d0ca922816 ("kasan, page_alloc: rework kasan_unpoison_pages call site")
7e3cbba65de2 ("kasan, page_alloc: move kernel_init_free_pages in post_alloc_hook")
89b271163328 ("kasan, page_alloc: move SetPageSkipKASanPoison in post_alloc_hook")
9294b1281d0a ("kasan, page_alloc: combine tag_clear_highpage calls in post_alloc_hook")
b42090ae6f3a ("kasan, page_alloc: merge kasan_alloc_pages into post_alloc_hook")
b8491b9052fe ("kasan, page_alloc: refactor init checks in post_alloc_hook")
1c0e5b24f117 ("kasan: only apply __GFP_ZEROTAGS when memory is zeroed")
7c13c163e036 ("kasan, page_alloc: merge kasan_free_pages into free_pages_prepare")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e74a68468062d7ebd8ce17069e12ccc64cc6a58c Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc(a)google.com>
Date: Tue, 14 Feb 2023 21:09:11 -0800
Subject: [PATCH] arm64: Reset KASAN tag in copy_highpage with HW tags only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
During page migration, the copy_highpage function is used to copy the
page data to the target page. If the source page is a userspace page
with MTE tags, the KASAN tag of the target page must have the match-all
tag in order to avoid tag check faults during subsequent accesses to the
page by the kernel. However, the target page may have been allocated in
a number of ways, some of which will use the KASAN allocator and will
therefore end up setting the KASAN tag to a non-match-all tag. Therefore,
update the target page's KASAN tag to match the source page.
We ended up unintentionally fixing this issue as a result of a bad
merge conflict resolution between commit e059853d14ca ("arm64: mte:
Fix/clarify the PG_mte_tagged semantics") and commit 20794545c146 ("arm64:
kasan: Revert "arm64: mte: reset the page tag in page->flags""), which
preserved a tag reset for PG_mte_tagged pages which was considered to be
unnecessary at the time. Because SW tags KASAN uses separate tag storage,
update the code to only reset the tags when HW tags KASAN is enabled.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/If303d8a709438d3ff5af5fd8570650583…
Reported-by: "Kuan-Ying Lee (李冠穎)" <Kuan-Ying.Lee(a)mediatek.com>
Cc: <stable(a)vger.kernel.org> # 6.1
Fixes: 20794545c146 ("arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"")
Reviewed-by: Andrey Konovalov <andreyknvl(a)gmail.com>
Link: https://lore.kernel.org/r/20230215050911.1433132-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 8dd5a8fe64b4..4aadcfb01754 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -22,7 +22,8 @@ void copy_highpage(struct page *to, struct page *from)
copy_page(kto, kfrom);
if (system_supports_mte() && page_mte_tagged(from)) {
- page_kasan_tag_reset(to);
+ if (kasan_hw_tags_enabled())
+ page_kasan_tag_reset(to);
/* It's a new page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(to));
mte_copy_page_tags(kto, kfrom);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 33e2c595e2e4016991ead44933a29d1ef93d5f26
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781821949666(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
33e2c595e2e4 ("ARM: dts: exynos: correct TMU phandle in Exynos5250")
7e86ef5cc896 ("ARM: dts: exynos: Override thermal by label in Exynos5250")
be00300147ae ("ARM: dts: exynos: Move pmu and timer nodes out of soc")
670734f55810 ("ARM: dts: exynos: Add all CPUs in cooling maps")
eb9e16d8573e ("ARM: dts: exynos: Convert exynos5250.dtsi to opp-v2 bindings")
672f33198bee ("arm: dts: exynos: Add missing cooling device properties for CPUs")
c769eaf7a85d ("ARM: dts: exynos: Split Trats2 DTS in preparation for Midas boards")
cd6f55457eb4 ("ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes")
73a901d09a21 ("ARM: dts: exynos: Add soc node to exynos4")
9097b4bd9fce ("ARM: dts: exynos: Use pmu label in exynos4412")
8ce5c46d0230 ("ARM: dts: exynos: Use labels instead of full paths in exynos4412-trats2")
e030be47ac48 ("ARM: dts: exynos: Use labels instead of full paths in exynos4210")
e58864515240 ("ARM: dts: exynos: Use pinctrl labels in exynos4412-pinctrl")
88c166cec136 ("ARM: dts: exynos: Use pinctrl labels in exynos4210-pinctrl")
3be1ecf291df ("ARM: dts: exynos: Use lower case hex addresses in node unit addresses")
6351fe9375f4 ("ARM: dts: exynos: Add G3D power domain to Exynos5250")
c0d40bb3ac71 ("ARM: dts: exynos: Add audio power domain to Exynos5250")
9fbb4c096323 ("ARM: dts: exynos: Fix power domain node names for Exynos5250")
528832d4c01a ("ARM: dts: exynos: Add audio power domain support to Exynos542x SoCs")
cdd745c8c76b ("ARM: dts: exynos: Remove duplicate definitions of SSS nodes for Exynos5")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e2c595e2e4016991ead44933a29d1ef93d5f26 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:38 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos5250
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle.
Cc: <stable(a)vger.kernel.org>
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Link: https://lore.kernel.org/r/20230209105841.779596-3-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4708dcd575a7..01751706ff96 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -1107,7 +1107,7 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
+ thermal-sensors = <&tmu>;
cooling-maps {
map0 {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 33e2c595e2e4016991ead44933a29d1ef93d5f26
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182193224134(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
33e2c595e2e4 ("ARM: dts: exynos: correct TMU phandle in Exynos5250")
7e86ef5cc896 ("ARM: dts: exynos: Override thermal by label in Exynos5250")
be00300147ae ("ARM: dts: exynos: Move pmu and timer nodes out of soc")
670734f55810 ("ARM: dts: exynos: Add all CPUs in cooling maps")
eb9e16d8573e ("ARM: dts: exynos: Convert exynos5250.dtsi to opp-v2 bindings")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e2c595e2e4016991ead44933a29d1ef93d5f26 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:38 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos5250
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle.
Cc: <stable(a)vger.kernel.org>
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Link: https://lore.kernel.org/r/20230209105841.779596-3-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4708dcd575a7..01751706ff96 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -1107,7 +1107,7 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
+ thermal-sensors = <&tmu>;
cooling-maps {
map0 {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 33e2c595e2e4016991ead44933a29d1ef93d5f26
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818219214813(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
33e2c595e2e4 ("ARM: dts: exynos: correct TMU phandle in Exynos5250")
7e86ef5cc896 ("ARM: dts: exynos: Override thermal by label in Exynos5250")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e2c595e2e4016991ead44933a29d1ef93d5f26 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:38 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos5250
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle.
Cc: <stable(a)vger.kernel.org>
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Link: https://lore.kernel.org/r/20230209105841.779596-3-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4708dcd575a7..01751706ff96 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -1107,7 +1107,7 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
+ thermal-sensors = <&tmu>;
cooling-maps {
map0 {
The interconnect framework currently expects that providers are only
removed when there are no users and after all nodes have been removed.
There is currently nothing that guarantees this to be the case and the
framework does not do any reference counting, but refusing to remove the
provider is never correct as that would leave a dangling pointer to a
resource that is about to be released in the global provider list (e.g.
accessible through debugfs).
Replace the current sanity checks with WARN_ON() so that the provider is
always removed.
Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API")
Cc: stable(a)vger.kernel.org # 5.1: 680f8666baf6: interconnect: Make icc_provider_del() return void
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/interconnect/core.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 5217f449eeec..cabb6f5df83e 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1065,18 +1065,8 @@ EXPORT_SYMBOL_GPL(icc_provider_add);
void icc_provider_del(struct icc_provider *provider)
{
mutex_lock(&icc_lock);
- if (provider->users) {
- pr_warn("interconnect provider still has %d users\n",
- provider->users);
- mutex_unlock(&icc_lock);
- return;
- }
-
- if (!list_empty(&provider->nodes)) {
- pr_warn("interconnect provider still has nodes\n");
- mutex_unlock(&icc_lock);
- return;
- }
+ WARN_ON(provider->users);
+ WARN_ON(!list_empty(&provider->nodes));
list_del(&provider->provider_list);
mutex_unlock(&icc_lock);
--
2.39.2
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 408ab6786dbf6dd696488054c9559681112ef994
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182153823(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
408ab6786dbf ("ARM: dts: exynos: correct TMU phandle in Exynos4210")
1708f56081e2 ("ARM: dts: exynos: Override thermal by label in Exynos4210")
c31b11c3eb4d ("ARM: dts: exynos: Fix language typo and indentation")
9a8665ab920b ("ARM: dts: exynos: Add soc node to exynos4210")
e030be47ac48 ("ARM: dts: exynos: Use labels instead of full paths in exynos4210")
88c166cec136 ("ARM: dts: exynos: Use pinctrl labels in exynos4210-pinctrl")
3be1ecf291df ("ARM: dts: exynos: Use lower case hex addresses in node unit addresses")
6351fe9375f4 ("ARM: dts: exynos: Add G3D power domain to Exynos5250")
c0d40bb3ac71 ("ARM: dts: exynos: Add audio power domain to Exynos5250")
9fbb4c096323 ("ARM: dts: exynos: Fix power domain node names for Exynos5250")
528832d4c01a ("ARM: dts: exynos: Add audio power domain support to Exynos542x SoCs")
cdd745c8c76b ("ARM: dts: exynos: Remove duplicate definitions of SSS nodes for Exynos5")
8dccafaa281a ("arm: dts: fix unit-address leading 0s")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 408ab6786dbf6dd696488054c9559681112ef994 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:37 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos4210
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle. Since thermal-sensors property is
already defined in included exynos4-cpu-thermal.dtsi, drop it from
exynos4210.dtsi to fix the error and remoev redundancy.
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20230209105841.779596-2-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 2c25cc37934e..f8c6c5d1906a 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -393,7 +393,6 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
};
&gic {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 408ab6786dbf6dd696488054c9559681112ef994
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182152118124(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
408ab6786dbf ("ARM: dts: exynos: correct TMU phandle in Exynos4210")
1708f56081e2 ("ARM: dts: exynos: Override thermal by label in Exynos4210")
c31b11c3eb4d ("ARM: dts: exynos: Fix language typo and indentation")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 408ab6786dbf6dd696488054c9559681112ef994 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:37 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos4210
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle. Since thermal-sensors property is
already defined in included exynos4-cpu-thermal.dtsi, drop it from
exynos4210.dtsi to fix the error and remoev redundancy.
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20230209105841.779596-2-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 2c25cc37934e..f8c6c5d1906a 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -393,7 +393,6 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
};
&gic {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 408ab6786dbf6dd696488054c9559681112ef994
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781821519722(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
408ab6786dbf ("ARM: dts: exynos: correct TMU phandle in Exynos4210")
1708f56081e2 ("ARM: dts: exynos: Override thermal by label in Exynos4210")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 408ab6786dbf6dd696488054c9559681112ef994 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:37 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos4210
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle. Since thermal-sensors property is
already defined in included exynos4-cpu-thermal.dtsi, drop it from
exynos4210.dtsi to fix the error and remoev redundancy.
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20230209105841.779596-2-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 2c25cc37934e..f8c6c5d1906a 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -393,7 +393,6 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
};
&gic {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f0f0cfdc3a024e21161714f2e05f0df3b84d42ad
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678181773130199(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f0f0cfdc3a02 ("mtd: spi-nor: Fix shift-out-of-bounds in spi_nor_set_erase_type")
cb481b92d10f ("mtd: spi-nor: Move SFDP logic out of the core")
a0900d0195d2 ("mtd: spi-nor: Prepare core / manufacturer code split")
69228a0224c5 ("Merge tag 'mtk-mtd-spi-move' into spi-nor/next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f0f0cfdc3a024e21161714f2e05f0df3b84d42ad Mon Sep 17 00:00:00 2001
From: Louis Rannou <lrannou(a)baylibre.com>
Date: Fri, 3 Feb 2023 09:07:54 +0200
Subject: [PATCH] mtd: spi-nor: Fix shift-out-of-bounds in
spi_nor_set_erase_type
spi_nor_set_erase_type() was used either to set or to mask out an erase
type. When we used it to mask out an erase type a shift-out-of-bounds
was hit:
UBSAN: shift-out-of-bounds in drivers/mtd/spi-nor/core.c:2237:24
shift exponent 4294967295 is too large for 32-bit type 'int'
The setting of the size_{shift, mask} and of the opcode are unnecessary
when the erase size is zero, as throughout the code just the erase size
is considered to determine whether an erase type is supported or not.
Setting the opcode to 0xFF was wrong too as nobody guarantees that 0xFF
is an unused opcode. Thus when masking out an erase type, just set the
erase size to zero. This will fix the shift-out-of-bounds.
Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories")
Cc: stable(a)vger.kernel.org
Reported-by: Alexander Stein <Alexander.Stein(a)tq-group.com>
Signed-off-by: Louis Rannou <lrannou(a)baylibre.com>
Tested-by: Alexander Stein <Alexander.Stein(a)tq-group.com>
Link: https://lore.kernel.org/r/20230203070754.50677-1-tudor.ambarus@linaro.org
[ta: refine changes, new commit message, fix compilation error]
Signed-off-by: Tudor Ambarus <tudor.ambarus(a)linaro.org>
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index b500655f7937..3012758bbafa 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -2026,6 +2026,15 @@ void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
erase->size_mask = (1 << erase->size_shift) - 1;
}
+/**
+ * spi_nor_mask_erase_type() - mask out a SPI NOR erase type
+ * @erase: pointer to a structure that describes a SPI NOR erase type
+ */
+void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase)
+{
+ erase->size = 0;
+}
+
/**
* spi_nor_init_uniform_erase_map() - Initialize uniform erase map
* @map: the erase map of the SPI NOR
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index f6d012e1f681..25423225c29d 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -681,6 +681,7 @@ void spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, u8 opcode,
void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
u8 opcode);
+void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase);
struct spi_nor_erase_region *
spi_nor_region_next(struct spi_nor_erase_region *region);
void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c
index 3acc01c3a900..526c5d57b905 100644
--- a/drivers/mtd/spi-nor/sfdp.c
+++ b/drivers/mtd/spi-nor/sfdp.c
@@ -875,7 +875,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
*/
for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
if (!(regions_erase_type & BIT(erase[i].idx)))
- spi_nor_set_erase_type(&erase[i], 0, 0xFF);
+ spi_nor_mask_erase_type(&erase[i]);
return 0;
}
@@ -1089,7 +1089,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
erase_type[i].opcode = (dwords[SFDP_DWORD(2)] >>
erase_type[i].idx * 8) & 0xFF;
else
- spi_nor_set_erase_type(&erase_type[i], 0u, 0xFF);
+ spi_nor_mask_erase_type(&erase_type[i]);
}
/*
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b5d3ae202fbfe055aa2a8ae8524531ee1dcab717
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16780996985892(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
b5d3ae202fbf ("io_uring: handle TIF_NOTIFY_RESUME when checking for task_work")
7cfe7a09489c ("io_uring: clear TIF_NOTIFY_SIGNAL if set and task_work not available")
46a525e199e4 ("io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL")
c0e0d6ba25f1 ("io_uring: add IORING_SETUP_DEFER_TASKRUN")
b4c98d59a787 ("io_uring: introduce io_has_work")
78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()")
c34398a8c018 ("io_uring: remove __io_req_task_work_add")
ed5ccb3beeba ("io_uring: remove priority tw list optimisation")
625d38b3fd34 ("io_uring: improve io_run_task_work()")
4a0fef62788b ("io_uring: optimize io_uring_task layout")
253993210bd8 ("io_uring: introduce locking helpers for CQE posting")
305bef988708 ("io_uring: hide eventfd assumptions in eventfd paths")
affa87db9010 ("io_uring: fix multi ctx cancellation")
d9dee4302a7c ("io_uring: remove ->flush_cqes optimisation")
a830ffd28780 ("io_uring: move io_eventfd_signal()")
9046c6415be6 ("io_uring: reshuffle io_uring/io_uring.h")
d142c3ec8d16 ("io_uring: remove extra io_commit_cqring()")
ab1c84d855cf ("io_uring: make io_uring_types.h public")
68494a65d0e2 ("io_uring: introduce io_req_cqe_overflow()")
faf88dde060f ("io_uring: don't inline __io_get_cqe()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b5d3ae202fbfe055aa2a8ae8524531ee1dcab717 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Tue, 24 Jan 2023 08:24:25 -0700
Subject: [PATCH] io_uring: handle TIF_NOTIFY_RESUME when checking for
task_work
If TIF_NOTIFY_RESUME is set, then we need to call resume_user_mode_work()
for PF_IO_WORKER threads. They never return to usermode, hence never get
a chance to process any items that are marked by this flag. Most notably
this includes the final put of files, but also any throttling markers set
by block cgroups.
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index c68edf9872a5..d58cfe062da9 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -3,6 +3,7 @@
#include <linux/errno.h>
#include <linux/lockdep.h>
+#include <linux/resume_user_mode.h>
#include <linux/io_uring_types.h>
#include <uapi/linux/eventpoll.h>
#include "io-wq.h"
@@ -274,6 +275,13 @@ static inline int io_run_task_work(void)
*/
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
clear_notify_signal();
+ /*
+ * PF_IO_WORKER never returns to userspace, so check here if we have
+ * notify work that needs processing.
+ */
+ if (current->flags & PF_IO_WORKER &&
+ test_thread_flag(TIF_NOTIFY_RESUME))
+ resume_user_mode_work(NULL);
if (task_work_pending(current)) {
__set_current_state(TASK_RUNNING);
task_work_run();
When ucsi_init() fails, ucsi->connector is NULL, yet in case of
ucsi_acpi we may still get events which cause the ucs_acpi code to call
ucsi_connector_change(), which then derefs the NULL ucsi->connector
pointer.
Fix this by not setting ucsi->ntfy inside ucsi_init() until ucsi_init()
has succeeded, so that ucsi_connector_change() ignores the events
because UCSI_ENABLE_NTFY_CONNECTOR_CHANGE is not set in the ntfy mask.
Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
-Delay setting ucsi->ntfy in ucsi_init() instead of adding a NULL pointer
check to ucsi_connector_change()
---
drivers/usb/typec/ucsi/ucsi.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 1cf8947c6d66..8cbbb002fefe 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1205,7 +1205,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
static int ucsi_init(struct ucsi *ucsi)
{
struct ucsi_connector *con;
- u64 command;
+ u64 command, ntfy;
int ret;
int i;
@@ -1217,8 +1217,8 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Enable basic notifications */
- ucsi->ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
+ ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
+ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
ret = ucsi_send_command(ucsi, command, NULL, 0);
if (ret < 0)
goto err_reset;
@@ -1250,12 +1250,13 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Enable all notifications */
- ucsi->ntfy = UCSI_ENABLE_NTFY_ALL;
- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
+ ntfy = UCSI_ENABLE_NTFY_ALL;
+ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
ret = ucsi_send_command(ucsi, command, NULL, 0);
if (ret < 0)
goto err_unregister;
+ ucsi->ntfy = ntfy;
return 0;
err_unregister:
--
2.39.1
Commit 130a96d698d7 ("usb: typec: ucsi: acpi: Increase command
completion timeout value") increased the timeout from 5 seconds
to 60 seconds due to issues related to alternate mode discovery.
After the alternate mode discovery switch to polled mode
the timeout was reduced, but instead of being set back to
5 seconds it was reduced to 1 second.
This is causing problems when using a Lenovo ThinkPad X1 yoga gen7
connected over Type-C to a LG 27UL850-W (charging DP over Type-C).
When the monitor is already connected at boot the following error
is logged: "PPM init failed (-110)", /sys/class/typec is empty and
on unplugging the NULL pointer deref fixed earlier in this series
happens.
When the monitor is connected after boot the following error
is logged instead: "GET_CONNECTOR_STATUS failed (-110)".
Setting the timeout back to 5 seconds fixes both cases.
Fixes: e08065069fc7 ("usb: typec: ucsi: acpi: Reduce the command completion timeout")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index ce0c8ef80c04..62206a6b8ea7 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
if (ret)
goto out_clear_bit;
- if (!wait_for_completion_timeout(&ua->complete, HZ))
+ if (!wait_for_completion_timeout(&ua->complete, 5 * HZ))
ret = -ETIMEDOUT;
out_clear_bit:
--
2.39.1
ucsi_init() which runs from a workqueue sets ucsi->connector and
on an error will clear it again.
ucsi->connector gets dereferenced by ucsi_resume(), this checks for
ucsi->connector being NULL in case ucsi_init() has not finished yet;
or in case ucsi_init() has failed.
ucsi_init() setting ucsi->connector and then clearing it again on
an error creates a race where the check in ucsi_resume() may pass,
only to have ucsi->connector free-ed underneath it when ucsi_init()
hits an error.
Fix this race by making ucsi_init() store the connector array in
a local variable and only assign it to ucsi->connector on success.
Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/usb/typec/ucsi/ucsi.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 8cbbb002fefe..15a2c91581a8 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1039,9 +1039,8 @@ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con)
return NULL;
}
-static int ucsi_register_port(struct ucsi *ucsi, int index)
+static int ucsi_register_port(struct ucsi *ucsi, int index, struct ucsi_connector *con)
{
- struct ucsi_connector *con = &ucsi->connector[index];
struct typec_capability *cap = &con->typec_cap;
enum typec_accessory *accessory = cap->accessory;
enum usb_role u_role = USB_ROLE_NONE;
@@ -1204,7 +1203,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
*/
static int ucsi_init(struct ucsi *ucsi)
{
- struct ucsi_connector *con;
+ struct ucsi_connector *con, *connector;
u64 command, ntfy;
int ret;
int i;
@@ -1235,16 +1234,15 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Allocate the connectors. Released in ucsi_unregister() */
- ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1,
- sizeof(*ucsi->connector), GFP_KERNEL);
- if (!ucsi->connector) {
+ connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*connector), GFP_KERNEL);
+ if (!connector) {
ret = -ENOMEM;
goto err_reset;
}
/* Register all connectors */
for (i = 0; i < ucsi->cap.num_connectors; i++) {
- ret = ucsi_register_port(ucsi, i);
+ ret = ucsi_register_port(ucsi, i, &connector[i]);
if (ret)
goto err_unregister;
}
@@ -1256,11 +1254,12 @@ static int ucsi_init(struct ucsi *ucsi)
if (ret < 0)
goto err_unregister;
+ ucsi->connector = connector;
ucsi->ntfy = ntfy;
return 0;
err_unregister:
- for (con = ucsi->connector; con->port; con++) {
+ for (con = connector; con->port; con++) {
ucsi_unregister_partner(con);
ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
ucsi_unregister_port_psy(con);
@@ -1269,10 +1268,7 @@ static int ucsi_init(struct ucsi *ucsi)
typec_unregister_port(con->port);
con->port = NULL;
}
-
- kfree(ucsi->connector);
- ucsi->connector = NULL;
-
+ kfree(connector);
err_reset:
memset(&ucsi->cap, 0, sizeof(ucsi->cap));
ucsi_reset_ppm(ucsi);
--
2.39.1
If the controller is not marked as cache coherent, then kernel will
try to ensure coherency during dma-ops and that may cause data corruption.
So, mark the PCIe node as dma-coherent as the devices on PCIe bus are
cache coherent.
Cc: stable(a)vger.kernel.org
Fixes: 92e0ee9f83b3 ("arm64: dts: qcom: sc7280: Add PCIe and PHY related node")
Signed-off-by: Krishna chaitanya chundru <quic_krichai(a)quicinc.com>
---
changes since v1:
- Updated the commit text.
---
---
arch/arm64/boot/dts/qcom/sc7280.dtsi | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index bdcb749..8f4ab6b 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -2131,6 +2131,8 @@
pinctrl-names = "default";
pinctrl-0 = <&pcie1_clkreq_n>;
+ dma-coherent;
+
iommus = <&apps_smmu 0x1c80 0x1>;
iommu-map = <0x0 &apps_smmu 0x1c80 0x1>,
--
2.7.4
The slice IDs for CVPFW, CPUSS1 and CPUWHT currently overflow the 32bit
LLCC config registers, which means it is writing beyond the upper limit
of the ATTR0_CFGn and ATTR1_CFGn range of registers. But the most obvious
impact is the fact that the mentioned slices do not get configured at all,
which will result in reduced performance. Fix that by using the slice ID
values taken from the latest LLCC SC table.
Fixes: ec69dfbdc426 ("soc: qcom: llcc: Add sc8180x and sc8280xp configurations")
Cc: stable(a)vger.kernel.org # 5.19+
Signed-off-by: Abel Vesa <abel.vesa(a)linaro.org>
Tested-by: Juerg Haefliger <juerg.haefliger(a)canonical.com>
Reviewed-by: Sai Prakash Ranjan <quic_saipraka(a)quicinc.com>
Acked-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Johan Hovold <johan+linaro(a)kernel.org>
---
The v3 is here:
https://lore.kernel.org/all/20230219165701.2557446-1-abel.vesa@linaro.org/
Changes since v3:
* explicitly mentioned in the commit message the fact that some random
registers might get written and the fact that there is an impact
on performance.
* Added Johan's R-b tag
Changes since v2:
* specifically mentioned the 3 slice IDs that are being fixed and
what is happening without this patch
* added stabke Cc line
* added Juerg's T-b tag
* added Sai's R-b tag
* added Konrad's A-b tag
Changes since v1:
* dropped the LLCC_GPU and LLCC_WRCACHE max_cap changes
* took the new values from documentatio this time rather than
downstream kernel
drivers/soc/qcom/llcc-qcom.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index 23ce2f78c4ed..26efe12012a0 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -191,9 +191,9 @@ static const struct llcc_slice_config sc8280xp_data[] = {
{ LLCC_CVP, 28, 512, 3, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
{ LLCC_APTCM, 30, 1024, 3, 1, 0x0, 0x1, 1, 0, 0, 1, 0, 0 },
{ LLCC_WRCACHE, 31, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 },
- { LLCC_CVPFW, 32, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
- { LLCC_CPUSS1, 33, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
- { LLCC_CPUHWT, 36, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 },
+ { LLCC_CVPFW, 17, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
+ { LLCC_CPUSS1, 3, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
+ { LLCC_CPUHWT, 5, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 },
};
static const struct llcc_slice_config sdm845_data[] = {
--
2.34.1
From: "andrew.yang" <andrew.yang(a)mediatek.com>
commit 3f98c9a62c338bbe06a215c9491e6166ea39bf82 upstream.
damon_get_folio() would always increase folio _refcount and
folio_isolate_lru() would increase folio _refcount if the folio's lru flag
is set.
If an unevictable folio isolated successfully, there will be two more
_refcount. The one from folio_isolate_lru() will be decreased in
folio_puback_lru(), but the other one from damon_get_folio() will be left
behind. This causes a pin page.
Whatever the case, the _refcount from damon_get_folio() should be
decreased.
Link: https://lkml.kernel.org/r/20230222064223.6735-1-andrew.yang@mediatek.com
Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme")
Signed-off-by: andrew.yang <andrew.yang(a)mediatek.com>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [5.16.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
This is a backport of the mainline patch for v6.1.y and v6.2.y.
mm/damon/paddr.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index e1a4315c4be6..402d30b37aba 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -219,12 +219,11 @@ static unsigned long damon_pa_pageout(struct damon_region *r)
put_page(page);
continue;
}
- if (PageUnevictable(page)) {
+ if (PageUnevictable(page))
putback_lru_page(page);
- } else {
+ else
list_add(&page->lru, &page_list);
- put_page(page);
- }
+ put_page(page);
}
applied = reclaim_pages(&page_list);
cond_resched();
--
2.25.1
HELLO,
AM PASCAL DONALD,A CITIZEN OF ENGLAND.I WOULD LIKE TO APPLY THROUGH
THIS MEDIUM FOR YOUR MAXIMUM COOPERATION TO SECURE AN OPPORTUNITY TO
TRANSFER A SUBSTANTIAL FUND FOR AN INVESTMENT IN YOUR COUNTRY.
I WILL BE LOOKING FORWARD TO HEAR FROM YOU URGENTLY.
PLEASE SEND EVERY REPLY TO MY DIRECT EMAIL AS STATED BELOW:
REGARDS,
MR.PASCAL DONALD.(ESQ)
dr.pascaldonald(a)outlook.com
The patch titled
Subject: mm/damon/paddr: fix folio_nr_pages() after folio_put() in damon_pa_mark_accessed_or_deactivate()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-paddr-fix-folio_nr_pages-after-folio_put-in-damon_pa_mark_accessed_or_deactivate.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/paddr: fix folio_nr_pages() after folio_put() in damon_pa_mark_accessed_or_deactivate()
Date: Sat, 4 Mar 2023 19:39:49 +0000
damon_pa_mark_accessed_or_deactivate() is accessing a folio via
folio_nr_pages() after folio_put() for the folio has invoked. Fix it.
Link: https://lkml.kernel.org/r/20230304193949.296391-3-sj@kernel.org
Fixes: f70da5ee8fe1 ("mm/damon: convert damon_pa_mark_accessed_or_deactivate() to use folios")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [6.2.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/damon/paddr.c~mm-damon-paddr-fix-folio_nr_pages-after-folio_put-in-damon_pa_mark_accessed_or_deactivate
+++ a/mm/damon/paddr.c
@@ -280,8 +280,8 @@ static inline unsigned long damon_pa_mar
folio_mark_accessed(folio);
else
folio_deactivate(folio);
- folio_put(folio);
applied += folio_nr_pages(folio);
+ folio_put(folio);
}
return applied * PAGE_SIZE;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-paddr-fix-folio_size-call-after-folio_put-in-damon_pa_young.patch
mm-damon-paddr-fix-folio_nr_pages-after-folio_put-in-damon_pa_mark_accessed_or_deactivate.patch
This patch introduce a new internal flag per lkb value to handle
internal flags which are handled not on wire. The current lkb internal
flags stored as lkb->lkb_flags are split in upper and lower bits, the
lower bits are used to share internal flags over wire for other cluster
wide lkb copies on other nodes.
In commit 61bed0baa4db ("fs: dlm: use a non-static queue for callbacks")
we introduced a new internal flag for pending callbacks for the dlm
callback queue. This flag is protected by the lkb->lkb_cb_lock lock.
This patch overlooked that on dlm receive path and the mentioned upper
and lower bits, that dlm will read the flags, mask it and write it
back. As example receive_flags() in fs/dlm/lock.c. This flag
manipulation is not done atomically and is not protected by
lkb->lkb_cb_lock. This has unknown side effects of the current callback
handling.
In future we should move to set/clear/test bit functionality and avoid
read, mask and writing back flag values. In later patches we will move
the upper parts to the new introduced internal lkb flags which are not
shared between other cluster nodes to the new non shared internal flag
field to avoid similar issues.
Cc: stable(a)vger.kernel.org
Fixes: 61bed0baa4db ("fs: dlm: use a non-static queue for callbacks")
Reported-by: Bob Peterson <rpeterso(a)redhat.com>
Signed-off-by: Alexander Aring <aahringo(a)redhat.com>
---
changes since v2:
- change the name of DLM_IFL_CB_PENDING to DLM_IFL_CB_PENDING_BIT so
the user of this flag knows it has a BIT meaning.
fs/dlm/ast.c | 9 ++++-----
fs/dlm/dlm_internal.h | 5 ++++-
fs/dlm/user.c | 2 +-
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 26fef9945cc9..39805aea3336 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -45,7 +45,7 @@ void dlm_purge_lkb_callbacks(struct dlm_lkb *lkb)
kref_put(&cb->ref, dlm_release_callback);
}
- lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ clear_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags);
/* invalidate */
dlm_callback_set_last_ptr(&lkb->lkb_last_cast, NULL);
@@ -103,10 +103,9 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
cb->sb_status = status;
cb->sb_flags = (sbflags & 0x000000FF);
kref_init(&cb->ref);
- if (!(lkb->lkb_flags & DLM_IFL_CB_PENDING)) {
- lkb->lkb_flags |= DLM_IFL_CB_PENDING;
+ if (!test_and_set_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags))
rv = DLM_ENQUEUE_CALLBACK_NEED_SCHED;
- }
+
list_add_tail(&cb->list, &lkb->lkb_callbacks);
if (flags & DLM_CB_CAST)
@@ -209,7 +208,7 @@ void dlm_callback_work(struct work_struct *work)
spin_lock(&lkb->lkb_cb_lock);
rv = dlm_dequeue_lkb_callback(lkb, &cb);
if (rv == DLM_DEQUEUE_CALLBACK_EMPTY) {
- lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ clear_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags);
spin_unlock(&lkb->lkb_cb_lock);
break;
}
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index ab1a55337a6e..9bf70962bc49 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -211,7 +211,9 @@ struct dlm_args {
#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
-#define DLM_IFL_CB_PENDING 0x04000000
+
+#define DLM_IFL_CB_PENDING_BIT 0
+
/* least significant 2 bytes are message changed, they are full transmitted
* but at receive side only the 2 bytes LSB will be set.
*
@@ -246,6 +248,7 @@ struct dlm_lkb {
uint32_t lkb_exflags; /* external flags from caller */
uint32_t lkb_sbflags; /* lksb flags */
uint32_t lkb_flags; /* internal flags */
+ unsigned long lkb_iflags; /* internal flags */
uint32_t lkb_lvbseq; /* lvb sequence number */
int8_t lkb_status; /* granted, waiting, convert */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 35129505ddda..688a480879e4 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -884,7 +884,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
goto try_another;
case DLM_DEQUEUE_CALLBACK_LAST:
list_del_init(&lkb->lkb_cb_list);
- lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ clear_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags);
break;
case DLM_DEQUEUE_CALLBACK_SUCCESS:
break;
--
2.31.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f58680085478dd292435727210122960d38e8014
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678099737150217(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
f58680085478 ("io_uring: add reschedule point to handle_tw_list()")
c6dd763c2460 ("io_uring: trace task_work_run")
3a0c037b0e16 ("io_uring: batch task_work")
f88262e60bb9 ("io_uring: lockless task list")
ed5ccb3beeba ("io_uring: remove priority tw list optimisation")
4a0fef62788b ("io_uring: optimize io_uring_task layout")
253993210bd8 ("io_uring: introduce locking helpers for CQE posting")
305bef988708 ("io_uring: hide eventfd assumptions in eventfd paths")
d9dee4302a7c ("io_uring: remove ->flush_cqes optimisation")
a830ffd28780 ("io_uring: move io_eventfd_signal()")
9046c6415be6 ("io_uring: reshuffle io_uring/io_uring.h")
d142c3ec8d16 ("io_uring: remove extra io_commit_cqring()")
68494a65d0e2 ("io_uring: introduce io_req_cqe_overflow()")
faf88dde060f ("io_uring: don't inline __io_get_cqe()")
d245bca6375b ("io_uring: don't expose io_fill_cqe_aux()")
6a02e4be8187 ("io_uring: inline ->registered_rings")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f58680085478dd292435727210122960d38e8014 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Fri, 27 Jan 2023 09:50:31 -0700
Subject: [PATCH] io_uring: add reschedule point to handle_tw_list()
If CONFIG_PREEMPT_NONE is set and the task_work chains are long, we
could be running into issues blocking others for too long. Add a
reschedule check in handle_tw_list(), and flush the ctx if we need to
reschedule.
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index fab581a31dc1..acf6d9680d76 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1179,10 +1179,16 @@ static unsigned int handle_tw_list(struct llist_node *node,
/* if not contended, grab and improve batching */
*locked = mutex_trylock(&(*ctx)->uring_lock);
percpu_ref_get(&(*ctx)->refs);
- }
+ } else if (!*locked)
+ *locked = mutex_trylock(&(*ctx)->uring_lock);
req->io_task_work.func(req, locked);
node = next;
count++;
+ if (unlikely(need_resched())) {
+ ctx_flush_and_put(*ctx, locked);
+ *ctx = NULL;
+ cond_resched();
+ }
}
return count;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128552113160(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
3a1d24ca9573 ("irq/irqdomain: Fix comment typo")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128551129114(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
3a1d24ca9573 ("irq/irqdomain: Fix comment typo")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781285504764(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781285492232(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812851896254(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy")
87f2d1c662fa ("genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128518177232(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy")
87f2d1c662fa ("genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812851762200(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy")
87f2d1c662fa ("genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781285161731(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6ded703c56c21bfb259725d4f1831a5feb563e9b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812503219822(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
6ded703c56c2 ("brd: check for REQ_NOWAIT and set correct page allocation mask")
db0ccc44a20b ("brd: return 0/-error from brd_insert_page()")
ba91fd01aad2 ("block/brd: Use the enum req_op type")
3e08773c3841 ("block: switch polling to be bio based")
6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED")
d729cf9acb93 ("io_uring: don't sleep when polling for I/O")
ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument")
28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid")
efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal")
c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper")
f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper")
71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO")
0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio")
47c122e35d7e ("block: pre-allocate requests if plug is started and is a batch")
24b83deb29b7 ("block: move struct request to blk-mq.h")
badf7f643787 ("block: move a few merge helpers out of <linux/blkdev.h>")
2e9bc3465ac5 ("block: move elevator.h to block/")
9778ac77c202 ("block: remove the struct blk_queue_ctx forward declaration")
90138237a562 ("block: remove the unused blk_queue_state enum")
cc9c884dd7f4 ("block: call submit_bio_checks under q_usage_counter")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6ded703c56c21bfb259725d4f1831a5feb563e9b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 16 Feb 2023 08:01:08 -0700
Subject: [PATCH] brd: check for REQ_NOWAIT and set correct page allocation
mask
If REQ_NOWAIT is set, then do a non-blocking allocation if the operation
is a write and we need to insert a new page. Currently REQ_NOWAIT cannot
be set as the queue isn't marked as supporting nowait, this change is in
preparation for allowing that.
radix_tree_preload() warns on attempting to call it with an allocation
mask that doesn't allow blocking. While that warning could arguably
be removed, we need to handle radix insertion failures anyway as they
are more likely if we cannot block to get memory.
Remove legacy BUG_ON()'s and turn them into proper errors instead, one
for the allocation failure and one for finding a page that doesn't
match the correct index.
Cc: stable(a)vger.kernel.org # 5.10+
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 15a148d5aad9..00f3c5b51a01 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -80,26 +80,21 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector)
+static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
struct page *page;
- gfp_t gfp_flags;
+ int ret = 0;
page = brd_lookup_page(brd, sector);
if (page)
return 0;
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
- page = alloc_page(gfp_flags);
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
if (!page)
return -ENOMEM;
- if (radix_tree_preload(GFP_NOIO)) {
+ if (gfpflags_allow_blocking(gfp) && radix_tree_preload(gfp)) {
__free_page(page);
return -ENOMEM;
}
@@ -110,15 +105,17 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector)
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
__free_page(page);
page = radix_tree_lookup(&brd->brd_pages, idx);
- BUG_ON(!page);
- BUG_ON(page->index != idx);
+ if (!page)
+ ret = -ENOMEM;
+ else if (page->index != idx)
+ ret = -EIO;
} else {
brd->brd_nr_pages++;
}
spin_unlock(&brd->brd_lock);
radix_tree_preload_end();
- return 0;
+ return ret;
}
/*
@@ -167,19 +164,20 @@ static void brd_free_pages(struct brd_device *brd)
/*
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
+ gfp_t gfp)
{
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
size_t copy;
int ret;
copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
if (ret)
return ret;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
}
return ret;
}
@@ -254,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, enum req_op op,
+ unsigned int len, unsigned int off, blk_opf_t opf,
sector_t sector)
{
void *mem;
int err = 0;
- if (op_is_write(op)) {
- err = copy_to_brd_setup(brd, sector, len);
+ if (op_is_write(opf)) {
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+ * block or filesystem layers from page reclaim.
+ */
+ gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+
+ err = copy_to_brd_setup(brd, sector, len, gfp);
if (err)
goto out;
}
mem = kmap_atomic(page);
- if (!op_is_write(op)) {
+ if (!op_is_write(opf)) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else {
@@ -296,8 +300,12 @@ static void brd_submit_bio(struct bio *bio)
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
+ bio->bi_opf, sector);
if (err) {
+ if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
bio_io_error(bio);
return;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6ded703c56c21bfb259725d4f1831a5feb563e9b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678125031200144(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
6ded703c56c2 ("brd: check for REQ_NOWAIT and set correct page allocation mask")
db0ccc44a20b ("brd: return 0/-error from brd_insert_page()")
ba91fd01aad2 ("block/brd: Use the enum req_op type")
3e08773c3841 ("block: switch polling to be bio based")
6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED")
d729cf9acb93 ("io_uring: don't sleep when polling for I/O")
ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument")
28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid")
efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal")
c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper")
f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper")
71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO")
0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio")
47c122e35d7e ("block: pre-allocate requests if plug is started and is a batch")
24b83deb29b7 ("block: move struct request to blk-mq.h")
badf7f643787 ("block: move a few merge helpers out of <linux/blkdev.h>")
2e9bc3465ac5 ("block: move elevator.h to block/")
9778ac77c202 ("block: remove the struct blk_queue_ctx forward declaration")
90138237a562 ("block: remove the unused blk_queue_state enum")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6ded703c56c21bfb259725d4f1831a5feb563e9b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 16 Feb 2023 08:01:08 -0700
Subject: [PATCH] brd: check for REQ_NOWAIT and set correct page allocation
mask
If REQ_NOWAIT is set, then do a non-blocking allocation if the operation
is a write and we need to insert a new page. Currently REQ_NOWAIT cannot
be set as the queue isn't marked as supporting nowait, this change is in
preparation for allowing that.
radix_tree_preload() warns on attempting to call it with an allocation
mask that doesn't allow blocking. While that warning could arguably
be removed, we need to handle radix insertion failures anyway as they
are more likely if we cannot block to get memory.
Remove legacy BUG_ON()'s and turn them into proper errors instead, one
for the allocation failure and one for finding a page that doesn't
match the correct index.
Cc: stable(a)vger.kernel.org # 5.10+
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 15a148d5aad9..00f3c5b51a01 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -80,26 +80,21 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector)
+static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
struct page *page;
- gfp_t gfp_flags;
+ int ret = 0;
page = brd_lookup_page(brd, sector);
if (page)
return 0;
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
- page = alloc_page(gfp_flags);
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
if (!page)
return -ENOMEM;
- if (radix_tree_preload(GFP_NOIO)) {
+ if (gfpflags_allow_blocking(gfp) && radix_tree_preload(gfp)) {
__free_page(page);
return -ENOMEM;
}
@@ -110,15 +105,17 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector)
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
__free_page(page);
page = radix_tree_lookup(&brd->brd_pages, idx);
- BUG_ON(!page);
- BUG_ON(page->index != idx);
+ if (!page)
+ ret = -ENOMEM;
+ else if (page->index != idx)
+ ret = -EIO;
} else {
brd->brd_nr_pages++;
}
spin_unlock(&brd->brd_lock);
radix_tree_preload_end();
- return 0;
+ return ret;
}
/*
@@ -167,19 +164,20 @@ static void brd_free_pages(struct brd_device *brd)
/*
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
+ gfp_t gfp)
{
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
size_t copy;
int ret;
copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
if (ret)
return ret;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
}
return ret;
}
@@ -254,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, enum req_op op,
+ unsigned int len, unsigned int off, blk_opf_t opf,
sector_t sector)
{
void *mem;
int err = 0;
- if (op_is_write(op)) {
- err = copy_to_brd_setup(brd, sector, len);
+ if (op_is_write(opf)) {
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+ * block or filesystem layers from page reclaim.
+ */
+ gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+
+ err = copy_to_brd_setup(brd, sector, len, gfp);
if (err)
goto out;
}
mem = kmap_atomic(page);
- if (!op_is_write(op)) {
+ if (!op_is_write(opf)) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else {
@@ -296,8 +300,12 @@ static void brd_submit_bio(struct bio *bio)
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
+ bio->bi_opf, sector);
if (err) {
+ if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
bio_io_error(bio);
return;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 67205f80be9910207481406c47f7d85e703fb2e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812500026148(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
67205f80be99 ("brd: mark as nowait compatible")
e1528830bd4e ("block/brd: add error handling support for add_disk()")
f7bf35862477 ("brd: reduce the brd_devices_mutex scope")
7f9b348cb5e9 ("brd: convert to blk_alloc_disk/blk_cleanup_disk")
f4be591f1436 ("brd: expose number of allocated pages in debugfs")
7cc178a6b994 ("brd: use __register_blkdev to allocate devices on demand")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67205f80be9910207481406c47f7d85e703fb2e9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Wed, 15 Feb 2023 16:43:47 -0700
Subject: [PATCH] brd: mark as nowait compatible
By default, non-mq drivers do not support nowait. This causes io_uring
to use a slower path as the driver cannot be trust not to block. brd
can safely set the nowait flag, as worst case all it does is a NOIO
allocation.
For io_uring, this makes a substantial difference. Before:
submitter=0, tid=453, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31
IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32
IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31
IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32
IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31
and after:
submitter=0, tid=354, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31
IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32
IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31
or about an 8x in difference. Now that brd is prepared to deal with
REQ_NOWAIT reads/writes, mark it as supporting that.
Cc: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 00f3c5b51a01..740631dcdd0e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,6 +418,7 @@ static int brd_alloc(int i)
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 67205f80be9910207481406c47f7d85e703fb2e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812499941176(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
67205f80be99 ("brd: mark as nowait compatible")
e1528830bd4e ("block/brd: add error handling support for add_disk()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67205f80be9910207481406c47f7d85e703fb2e9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Wed, 15 Feb 2023 16:43:47 -0700
Subject: [PATCH] brd: mark as nowait compatible
By default, non-mq drivers do not support nowait. This causes io_uring
to use a slower path as the driver cannot be trust not to block. brd
can safely set the nowait flag, as worst case all it does is a NOIO
allocation.
For io_uring, this makes a substantial difference. Before:
submitter=0, tid=453, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31
IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32
IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31
IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32
IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31
and after:
submitter=0, tid=354, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31
IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32
IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31
or about an 8x in difference. Now that brd is prepared to deal with
REQ_NOWAIT reads/writes, mark it as supporting that.
Cc: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 00f3c5b51a01..740631dcdd0e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,6 +418,7 @@ static int brd_alloc(int i)
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
Hi -stable team,
please backport the commit f1aa2eb5ea05 ("sysctl: fix proc_dobool() usability")
to the stable kernels containing commit 83efeeeb3d04 ("tty: Allow TIOCSTI to be disabled").
(Which seems only to be the 6.2 branch only at the moment)
Without this backport the sysctl dev.net.legacy_tiocsti to enable
ioctl(TIOCSTI) is not functional. So on kernels that don't enable
CONFIG_LEGACY_TIOCSTI, ioctl(TIOCSTI) is not usable at all.
This ioctl is used for the copy-and-paste functionality of the
screenreader "fenrir".
( https://github.com/chrys87/fenrir )
Reported-by: Storm Dragon <stormdragon2976(a)gmail.com>
Link: https://lore.kernel.org/lkml/ZAOi9hDBTYqoAZuI@hotmail.com/
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781236386157(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown")
ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
4d1d0977a215 ("x86: Fix a handful of typos")
22ca7ee933a3 ("x86/apic: Provide and use helper for send_IPI_allbutself()")
6a1cb5f5c641 ("x86/apic: Add static key to Control IPI shorthands")
bdda3b93e660 ("x86/apic: Move no_ipi_broadcast() out of 32bit")
9c92374b631d ("x86/cpu: Move arch_smt_update() to a neutral place")
c94f0718fb1c ("x86/apic: Consolidate the apic local headers")
ba77b2a02e00 ("x86/apic: Move apic_flat_64 header into apic directory")
8b542da37287 ("x86/apic: Move ipi header into apic directory")
521b82fee98c ("x86/apic: Cleanup the include maze")
cdc86c9d1f82 ("x86/apic: Move IPI inlines into ipi.c")
2591bc4e8d70 ("x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI")
747d5a1bf293 ("x86/reboot: Always use NMI fallback when shutdown via reboot vector IPI fails")
7e300dabb7e7 ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 223")
fa4bff165070 ("Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:36:47 +0000
Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid
double shootdown
Disable virtualization in crash_nmi_callback() and rework the
emergency_vmx_disable_all() path to do an NMI shootdown if and only if a
shootdown has not already occurred. NMI crash shootdown fundamentally
can't support multiple invocations as responding CPUs are deliberately
put into halt state without unblocking NMIs. But, the emergency reboot
path doesn't have any work of its own, it simply cares about disabling
virtualization, i.e. so long as a shootdown occurred, emergency reboot
doesn't care who initiated the shootdown, or when.
If "crash_kexec_post_notifiers" is specified on the kernel command line,
panic() will invoke crash_smp_send_stop() and result in a second call to
nmi_shootdown_cpus() during native_machine_emergency_restart().
Invoke the callback _before_ disabling virtualization, as the current
VMCS needs to be cleared before doing VMXOFF. Note, this results in a
subtle change in ordering between disabling virtualization and stopping
Intel PT on the responding CPUs. While VMX and Intel PT do interact,
VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one
another, which is all that matters when panicking.
Harden nmi_shootdown_cpus() against multiple invocations to try and
capture any such kernel bugs via a WARN instead of hanging the system
during a crash/dump, e.g. prior to the recent hardening of
register_nmi_handler(), re-registering the NMI handler would trigger a
double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y.
list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800.
WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70
Call Trace:
__register_nmi_handler+0xcf/0x130
nmi_shootdown_cpus+0x39/0x90
native_machine_emergency_restart+0x1c9/0x1d0
panic+0x237/0x29b
Extract the disabling logic to a common helper to deduplicate code, and
to prepare for doing the shootdown in the emergency reboot path if SVM
is supported.
Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit
VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected
against a second invocation by a cpu_vmx_enabled() check as the kdump
handler would disable VMX if it ran first.
Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..bc5b4d788c08 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+void cpu_emergency_disable_virtualization(void);
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 305514431f26..cdd92ab43cda 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3636ea4aa71..374c14b3a9bf 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,10 +528,7 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
static void emergency_vmx_disable_all(void)
@@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void)
__cpu_emergency_vmxoff();
/* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}