These stable fixes were not correctly noted as fixes when
originally submitted for 5.2-rc1. We are addressing the internal
gap that led to this miss.
Please consider these patches for all stable kernels older than
5.2.0, I tried on 4.19 and 3 out of 4 apply cleanly with a cherry
pick from Linus' tree, but one of them I had to rebase, so I'm
just sending the whole series.
If you'd rather I send one at a time in the format specified at
option 2) of the stable documentation, please just let me know.
Patch 4 depends on patch 1.
I tried to follow the stable commit format for each of the
individual patches, referencing the upstream commit ID. I also
added a "Fixes" to each, trying to assist the automation in
knowing how far back to backport.
Shortlog:
Grzegorz Siwik (1):
i40e: Wrong truncation from u16 to u8
Martyna Szapar (2):
i40e: Fix of memory leak and integer truncation in i40e_virtchnl.c
i40e: Memory leak in i40e_config_iwarp_qvlist
Sergey Nemov (1):
i40e: add num_vectors checker in iwarp handler
.../ethernet/intel/i40e/i40e_virtchnl_pf.c | 51 +++++++++++++------
1 file changed, 36 insertions(+), 15 deletions(-)
--
2.25.4
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 41ea93cf7ba4e0f0cc46ebfdda8b6ff27c67bc91 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Date: Thu, 2 Jul 2020 11:52:03 +0000
Subject: [PATCH] powerpc/kasan: Fix shadow pages allocation failure
Doing kasan pages allocation in MMU_init is too early, kernel doesn't
have access yet to the entire memory space and memblock_alloc() fails
when the kernel is a bit big.
Do it from kasan_init() instead.
Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
Fixes: d2a91cef9bbd ("powerpc/kasan: Fix shadow pages allocation failure")
Cc: stable(a)vger.kernel.org
Reported-by: Erhard F. <erhard_f(a)mailbox.org>
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208181
Link: https://lore.kernel.org/r/63048fcea8a1c02f75429ba3152f80f7853f87fc.15936907…
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 4813c6d50889..019b0c0bbbf3 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -120,11 +120,24 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
void __init kasan_mmu_init(void)
{
int ret;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
+ IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
struct memblock_region *reg;
for_each_memblock(memory, reg) {
phys_addr_t base = reg->base;
phys_addr_t top = min(base + reg->size, total_lowmem);
+ int ret;
if (base >= top)
continue;
@@ -134,18 +147,6 @@ void __init kasan_mmu_init(void)
panic("kasan: kasan_init_region() failed");
}
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
- IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
-
-}
-
-void __init kasan_init(void)
-{
kasan_remap_early_shadow_ro();
clear_page(kasan_early_shadow_page);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 41ea93cf7ba4e0f0cc46ebfdda8b6ff27c67bc91 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Date: Thu, 2 Jul 2020 11:52:03 +0000
Subject: [PATCH] powerpc/kasan: Fix shadow pages allocation failure
Doing kasan pages allocation in MMU_init is too early, kernel doesn't
have access yet to the entire memory space and memblock_alloc() fails
when the kernel is a bit big.
Do it from kasan_init() instead.
Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
Fixes: d2a91cef9bbd ("powerpc/kasan: Fix shadow pages allocation failure")
Cc: stable(a)vger.kernel.org
Reported-by: Erhard F. <erhard_f(a)mailbox.org>
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208181
Link: https://lore.kernel.org/r/63048fcea8a1c02f75429ba3152f80f7853f87fc.15936907…
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 4813c6d50889..019b0c0bbbf3 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -120,11 +120,24 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
void __init kasan_mmu_init(void)
{
int ret;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
+ IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
struct memblock_region *reg;
for_each_memblock(memory, reg) {
phys_addr_t base = reg->base;
phys_addr_t top = min(base + reg->size, total_lowmem);
+ int ret;
if (base >= top)
continue;
@@ -134,18 +147,6 @@ void __init kasan_mmu_init(void)
panic("kasan: kasan_init_region() failed");
}
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
- IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
-
-}
-
-void __init kasan_init(void)
-{
kasan_remap_early_shadow_ro();
clear_page(kasan_early_shadow_page);
commit 4b836a1426cb0f1ef2a6e211d7e553221594f8fc upstream.
Binder is designed such that a binder_proc never has references to
itself. If this rule is violated, memory corruption can occur when a
process sends a transaction to itself; see e.g.
<https://syzkaller.appspot.com/bug?extid=09e05aba06723a94d43d>.
There is a remaining edgecase through which such a transaction-to-self
can still occur from the context of a task with BINDER_SET_CONTEXT_MGR
access:
- task A opens /dev/binder twice, creating binder_proc instances P1
and P2
- P1 becomes context manager
- P2 calls ACQUIRE on the magic handle 0, allocating index 0 in its
handle table
- P1 dies (by closing the /dev/binder fd and waiting a bit)
- P2 becomes context manager
- P2 calls ACQUIRE on the magic handle 0, allocating index 1 in its
handle table
[this triggers a warning: "binder: 1974:1974 tried to acquire
reference to desc 0, got 1 instead"]
- task B opens /dev/binder once, creating binder_proc instance P3
- P3 calls P2 (via magic handle 0) with (void*)1 as argument (two-way
transaction)
- P2 receives the handle and uses it to call P3 (two-way transaction)
- P3 calls P2 (via magic handle 0) (two-way transaction)
- P2 calls P2 (via handle 1) (two-way transaction)
And then, if P2 does *NOT* accept the incoming transaction work, but
instead closes the binder fd, we get a crash.
Solve it by preventing the context manager from using ACQUIRE on ref 0.
There shouldn't be any legitimate reason for the context manager to do
that.
Additionally, print a warning if someone manages to find another way to
trigger a transaction-to-self bug in the future.
Cc: stable(a)vger.kernel.org
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Acked-by: Todd Kjos <tkjos(a)google.com>
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Martijn Coenen <maco(a)android.com>
Link: https://lore.kernel.org/r/20200727120424.1627555-1-jannh@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[manual backport: remove fine-grained locking and error reporting that
don't exist in <=4.9]
Signed-off-by: Jann Horn <jannh(a)google.com>
---
drivers/android/binder.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e12288c245b5..f4c0b6295945 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1427,6 +1427,10 @@ static void binder_transaction(struct binder_proc *proc,
return_error = BR_DEAD_REPLY;
goto err_dead_binder;
}
+ if (WARN_ON(proc == target_proc)) {
+ return_error = BR_FAILED_REPLY;
+ goto err_invalid_target_handle;
+ }
if (security_binder_transaction(proc->tsk,
target_proc->tsk) < 0) {
return_error = BR_FAILED_REPLY;
@@ -1830,6 +1834,11 @@ static int binder_thread_write(struct binder_proc *proc,
ptr += sizeof(uint32_t);
if (target == 0 && binder_context_mgr_node &&
(cmd == BC_INCREFS || cmd == BC_ACQUIRE)) {
+ if (binder_context_mgr_node->proc == proc) {
+ binder_user_error("%d:%d context manager tried to acquire desc 0\n",
+ proc->pid, thread->pid);
+ return -EINVAL;
+ }
ref = binder_get_ref_for_node(proc,
binder_context_mgr_node);
if (ref->desc != target) {
base-commit: 8d6b541290cb9293bd2a7bb00c1d58d01abe183b
--
2.28.0.236.gb10cc79966-goog
Further investigation of the L-R swap problem on the MS2109 reveals that
the problem isn't that the channels are swapped, but rather that they
are swapped and also out of phase by one sample. In other words, the
issue is actually that the very first frame that comes from the hardware
is a half-frame containing only the right channel, and after that
everything becomes offset.
So introduce a new quirk field to drop the very first 2 bytes that come
in after the format is configured and a capture stream starts. This puts
the channels in phase and in the correct order.
Cc: stable(a)vger.kernel.org
Signed-off-by: Hector Martin <marcan(a)marcan.st>
---
sound/usb/card.h | 1 +
sound/usb/pcm.c | 6 ++++++
sound/usb/quirks.c | 3 +++
sound/usb/stream.c | 1 +
4 files changed, 11 insertions(+)
diff --git a/sound/usb/card.h b/sound/usb/card.h
index de43267b9c8a..5351d7183b1b 100644
--- a/sound/usb/card.h
+++ b/sound/usb/card.h
@@ -137,6 +137,7 @@ struct snd_usb_substream {
unsigned int tx_length_quirk:1; /* add length specifier to transfers */
unsigned int fmt_type; /* USB audio format type (1-3) */
unsigned int pkt_offset_adj; /* Bytes to drop from beginning of packets (for non-compliant devices) */
+ unsigned int stream_offset_adj; /* Bytes to drop from beginning of stream (for non-compliant devices) */
unsigned int running: 1; /* running status */
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 415bfec49a01..5600751803cf 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -1420,6 +1420,12 @@ static void retire_capture_urb(struct snd_usb_substream *subs,
// continue;
}
bytes = urb->iso_frame_desc[i].actual_length;
+ if (subs->stream_offset_adj > 0) {
+ unsigned int adj = min(subs->stream_offset_adj, bytes);
+ cp += adj;
+ bytes -= adj;
+ subs->stream_offset_adj -= adj;
+ }
frames = bytes / stride;
if (!subs->txfr_quirk)
bytes = frames * stride;
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index c551141f337e..abf99b814a0f 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1495,6 +1495,9 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs,
case USB_ID(0x2b73, 0x000a): /* Pioneer DJ DJM-900NXS2 */
pioneer_djm_set_format_quirk(subs);
break;
+ case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */
+ subs->stream_offset_adj = 2;
+ break;
}
}
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index 4d1e6579e54d..ca76ba5b5c0b 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -94,6 +94,7 @@ static void snd_usb_init_substream(struct snd_usb_stream *as,
subs->tx_length_quirk = as->chip->tx_length_quirk;
subs->speed = snd_usb_get_speed(subs->dev);
subs->pkt_offset_adj = 0;
+ subs->stream_offset_adj = 0;
snd_usb_set_pcm_ops(as->pcm, stream);
--
2.27.0
Note2myself: Check MAINTAINERS > STABLE BRANCH.
- Sedat -
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/MAI…
On Mon, Aug 10, 2020 at 11:52 AM Sedat Dilek <sedat.dilek(a)gmail.com> wrote:
>
> [ Hope I have the correct CC for linux-stable ML ]
>
> Hi Greg and Sasha,
>
> The base for <linux-stable-rc.git#queue/5.8> is Linux v5.7.14 where it
> should be Linux v5.8.
>
> Can you please look at this?
>
> Thanks.
>
> Regards,
> - Sedat -
The patch titled
Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid()
has been removed from the -mm tree. Its filename was
khugepaged-khugepaged_test_exit-check-mmget_still_valid.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid()
Move collapse_huge_page()'s mmget_still_valid() check into
khugepaged_test_exit() itself. collapse_huge_page() is used for anon THP
only, and earned its mmget_still_valid() check because it inserts a huge
pmd entry in place of the page table's pmd entry; whereas
collapse_file()'s retract_page_tables() or collapse_pte_mapped_thp()
merely clears the page table's pmd entry. But core dumping without mmap
lock must have been as open to mistaking a racily cleared pmd entry for a
page table at physical page 0, as exit_mmap() was. And we certainly have
no interest in mapping as a THP once dumping core.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021217020.27773@eggly.anvils
Fixes: 59ea6d06cfa9 ("coredump: fix race condition between collapse_huge_page() and core dumping")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/mm/khugepaged.c~khugepaged-khugepaged_test_exit-check-mmget_still_valid
+++ a/mm/khugepaged.c
@@ -431,7 +431,7 @@ static void insert_to_mm_slots_hash(stru
static inline int khugepaged_test_exit(struct mm_struct *mm)
{
- return atomic_read(&mm->mm_users) == 0;
+ return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
}
static bool hugepage_vma_check(struct vm_area_struct *vma,
@@ -1100,9 +1100,6 @@ static void collapse_huge_page(struct mm
* handled by the anon_vma lock + PG_lock.
*/
mmap_write_lock(mm);
- result = SCAN_ANY_PROCESS;
- if (!mmget_still_valid(mm))
- goto out;
result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
_
Patches currently in -mm which might be from hughd(a)google.com are
The patch titled
Subject: khugepaged: retract_page_tables() remember to test exit
has been removed from the -mm tree. Its filename was
khugepaged-retract_page_tables-remember-to-test-exit.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: retract_page_tables() remember to test exit
Only once have I seen this scenario (and forgot even to notice what forced
the eventual crash): a sequence of "BUG: Bad page map" alerts from
vm_normal_page(), from zap_pte_range() servicing exit_mmap();
pmd:00000000, pte values corresponding to data in physical page 0.
The pte mappings being zapped in this case were supposed to be from a huge
page of ext4 text (but could as well have been shmem): my belief is that
it was racing with collapse_file()'s retract_page_tables(), found *pmd
pointing to a page table, locked it, but *pmd had become 0 by the time
start_pte was decided.
In most cases, that possibility is excluded by holding mmap lock; but
exit_mmap() proceeds without mmap lock. Most of what's run by khugepaged
checks khugepaged_test_exit() after acquiring mmap lock:
khugepaged_collapse_pte_mapped_thps() and hugepage_vma_revalidate() do so,
for example. But retract_page_tables() did not: fix that.
The fix is for retract_page_tables() to check khugepaged_test_exit(),
after acquiring mmap lock, before doing anything to the page table.
Getting the mmap lock serializes with __mmput(), which briefly takes and
drops it in __khugepaged_exit(); then the khugepaged_test_exit() check on
mm_users makes sure we don't touch the page table once exit_mmap() might
reach it, since exit_mmap() will be proceeding without mmap lock, not
expecting anyone to be racing with it.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021215400.27773@eggly.anvils
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
--- a/mm/khugepaged.c~khugepaged-retract_page_tables-remember-to-test-exit
+++ a/mm/khugepaged.c
@@ -1532,6 +1532,7 @@ out:
static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
{
struct vm_area_struct *vma;
+ struct mm_struct *mm;
unsigned long addr;
pmd_t *pmd, _pmd;
@@ -1560,7 +1561,8 @@ static void retract_page_tables(struct a
continue;
if (vma->vm_end < addr + HPAGE_PMD_SIZE)
continue;
- pmd = mm_find_pmd(vma->vm_mm, addr);
+ mm = vma->vm_mm;
+ pmd = mm_find_pmd(mm, addr);
if (!pmd)
continue;
/*
@@ -1570,17 +1572,19 @@ static void retract_page_tables(struct a
* mmap_lock while holding page lock. Fault path does it in
* reverse order. Trylock is a way to avoid deadlock.
*/
- if (mmap_write_trylock(vma->vm_mm)) {
- spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
- /* assume page table is clear */
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
- spin_unlock(ptl);
- mmap_write_unlock(vma->vm_mm);
- mm_dec_nr_ptes(vma->vm_mm);
- pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+ if (mmap_write_trylock(mm)) {
+ if (!khugepaged_test_exit(mm)) {
+ spinlock_t *ptl = pmd_lock(mm, pmd);
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+ }
+ mmap_write_unlock(mm);
} else {
/* Try again later */
- khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
+ khugepaged_add_pte_mapped_thp(mm, addr);
}
}
i_mmap_unlock_write(mapping);
_
Patches currently in -mm which might be from hughd(a)google.com are
The patch titled
Subject: khugepaged: collapse_pte_mapped_thp() protect the pmd lock
has been removed from the -mm tree. Its filename was
khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: collapse_pte_mapped_thp() protect the pmd lock
When retract_page_tables() removes a page table to make way for a huge
pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and
pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the
case when the original mmap_write_trylock had failed), only
mmap_write_trylock and pmd lock are held.
That's not enough. One machine has twice crashed under load, with "BUG:
spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b. Examining the second
crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving
page_referenced() on a file THP, that had found a page table at *pmd)
discovers that the page table page and its lock have already been freed by
the time it comes to unlock.
Follow the example of retract_page_tables(), but we only need one of huge
page lock or i_mmap_lock_write to secure against this: because it's the
narrower lock, and because it simplifies collapse_pte_mapped_thp() to know
the hpage earlier, choose to rely on huge page lock here.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021213070.27773@eggly.anvils
Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 44 +++++++++++++++++++-------------------------
1 file changed, 19 insertions(+), 25 deletions(-)
--- a/mm/khugepaged.c~khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock
+++ a/mm/khugepaged.c
@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_s
{
unsigned long haddr = addr & HPAGE_PMD_MASK;
struct vm_area_struct *vma = find_vma(mm, haddr);
- struct page *hpage = NULL;
+ struct page *hpage;
pte_t *start_pte, *pte;
pmd_t *pmd, _pmd;
spinlock_t *ptl;
@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_s
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
return;
+ hpage = find_lock_page(vma->vm_file->f_mapping,
+ linear_page_index(vma, haddr));
+ if (!hpage)
+ return;
+
+ if (!PageHead(hpage))
+ goto drop_hpage;
+
pmd = mm_find_pmd(mm, haddr);
if (!pmd)
- return;
+ goto drop_hpage;
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_s
page = vm_normal_page(vma, addr, *pte);
- if (!page || !PageCompound(page))
- goto abort;
-
- if (!hpage) {
- hpage = compound_head(page);
- /*
- * The mapping of the THP should not change.
- *
- * Note that uprobe, debugger, or MAP_PRIVATE may
- * change the page table, but the new page will
- * not pass PageCompound() check.
- */
- if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
- goto abort;
- }
-
/*
- * Confirm the page maps to the correct subpage.
- *
- * Note that uprobe, debugger, or MAP_PRIVATE may change
- * the page table, but the new page will not pass
- * PageCompound() check.
+ * Note that uprobe, debugger, or MAP_PRIVATE may change the
+ * page table, but the new page will not be a subpage of hpage.
*/
- if (WARN_ON(hpage + i != page))
+ if (hpage + i != page)
goto abort;
count++;
}
@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_s
pte_unmap_unlock(start_pte, ptl);
/* step 3: set proper refcount and mm_counters. */
- if (hpage) {
+ if (count) {
page_ref_sub(hpage, count);
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
}
@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_s
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
+
+drop_hpage:
+ unlock_page(hpage);
+ put_page(hpage);
return;
abort:
pte_unmap_unlock(start_pte, ptl);
+ goto drop_hpage;
}
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
_
Patches currently in -mm which might be from hughd(a)google.com are
The patch titled
Subject: khugepaged: collapse_pte_mapped_thp() flush the right range
has been removed from the -mm tree. Its filename was
khugepaged-collapse_pte_mapped_thp-flush-the-right-range.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: collapse_pte_mapped_thp() flush the right range
pmdp_collapse_flush() should be given the start address at which the huge
page is mapped, haddr: it was given addr, which at that point has been
used as a local variable, incremented to the end address of the extent.
Found by source inspection while chasing a hugepage locking bug, which I
then could not explain by this. At first I thought this was very bad;
then saw that all of the page translations that were not flushed would
actually still point to the right pages afterwards, so harmless; then
realized that I know nothing of how different architectures and models
cache intermediate paging structures, so maybe it matters after all -
particularly since the page table concerned is immediately freed.
Much easier to fix than to think about.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021204390.27773@eggly.anvils
Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/khugepaged.c~khugepaged-collapse_pte_mapped_thp-flush-the-right-range
+++ a/mm/khugepaged.c
@@ -1502,7 +1502,7 @@ void collapse_pte_mapped_thp(struct mm_s
/* step 4: collapse pmd */
ptl = pmd_lock(vma->vm_mm, pmd);
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
_
Patches currently in -mm which might be from hughd(a)google.com are
The patch titled
Subject: mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible
has been removed from the -mm tree. Its filename was
mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible
+++ a/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-do-page-fault-accounting-in-handle_mm_fault.patch
mm-alpha-use-general-page-fault-accounting.patch
mm-arc-use-general-page-fault-accounting.patch
mm-arm-use-general-page-fault-accounting.patch
mm-arm64-use-general-page-fault-accounting.patch
mm-csky-use-general-page-fault-accounting.patch
mm-hexagon-use-general-page-fault-accounting.patch
mm-ia64-use-general-page-fault-accounting.patch
mm-m68k-use-general-page-fault-accounting.patch
mm-microblaze-use-general-page-fault-accounting.patch
mm-mips-use-general-page-fault-accounting.patch
mm-nds32-use-general-page-fault-accounting.patch
mm-nios2-use-general-page-fault-accounting.patch
mm-openrisc-use-general-page-fault-accounting.patch
mm-parisc-use-general-page-fault-accounting.patch
mm-powerpc-use-general-page-fault-accounting.patch
mm-riscv-use-general-page-fault-accounting.patch
mm-s390-use-general-page-fault-accounting.patch
mm-sh-use-general-page-fault-accounting.patch
mm-sparc32-use-general-page-fault-accounting.patch
mm-sparc64-use-general-page-fault-accounting.patch
mm-x86-use-general-page-fault-accounting.patch
mm-xtensa-use-general-page-fault-accounting.patch
mm-clean-up-the-last-pieces-of-page-fault-accountings.patch
mm-gup-remove-task_struct-pointer-for-all-gup-code.patch
The patch titled
Subject: mm/page_counter.c: fix protection usage propagation
has been removed from the -mm tree. Its filename was
mm-fix-protection-usage-propagation.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Michal Koutný <mkoutny(a)suse.com>
Subject: mm/page_counter.c: fix protection usage propagation
When workload runs in cgroups that aren't directly below root cgroup and
their parent specifies reclaim protection, it may end up ineffective.
The reason is that propagate_protected_usage() is not called in all
hierarchy up. All the protected usage is incorrectly accumulated in the
workload's parent. This means that siblings_low_usage is overestimated
and effective protection underestimated. Even though it is transitional
phenomenon (uncharge path does correct propagation and fixes the wrong
children_low_usage), it can undermine the intended protection
unexpectedly.
We have noticed this problem while seeing a swap out in a descendant of a
protected memcg (intermediate node) while the parent was conveniently
under its protection limit and the memory pressure was external to that
hierarchy. Michal has pinpointed this down to the wrong
siblings_low_usage which led to the unwanted reclaim.
The fix is simply updating children_low_usage in respective ancestors also
in the charging path.
Link: http://lkml.kernel.org/r/20200803153231.15477-1-mhocko@kernel.org
Fixes: 230671533d64 ("mm: memory.low hierarchical behavior")
Signed-off-by: Michal Koutný <mkoutny(a)suse.com>
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Roman Gushchin <guro(a)fb.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [4.18+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_counter.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/page_counter.c~mm-fix-protection-usage-propagation
+++ a/mm/page_counter.c
@@ -72,7 +72,7 @@ void page_counter_charge(struct page_cou
long new;
new = atomic_long_add_return(nr_pages, &c->usage);
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
@@ -116,7 +116,7 @@ bool page_counter_try_charge(struct page
new = atomic_long_add_return(nr_pages, &c->usage);
if (new > c->max) {
atomic_long_sub(nr_pages, &c->usage);
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* This is racy, but we can live with some
* inaccuracy in the failcnt.
@@ -125,7 +125,7 @@ bool page_counter_try_charge(struct page
*fail = c;
goto failed;
}
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* Just like with failcnt, we can live with some
* inaccuracy in the watermark.
_
Patches currently in -mm which might be from mkoutny(a)suse.com are
proc-pid-smaps-consistent-whitespace-output-format.patch
The patch titled
Subject: ocfs2: change slot number type s16 to u16
has been removed from the -mm tree. Its filename was
ocfs2-change-slot-number-type-s16-to-u16.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: change slot number type s16 to u16
Dan Carpenter reported the following static checker warning.
fs/ocfs2/super.c:1269 ocfs2_parse_options() warn: '(-1)' 65535 can't fit into 32767 'mopt->slot'
fs/ocfs2/suballoc.c:859 ocfs2_init_inode_steal_slot() warn: '(-1)' 65535 can't fit into 32767 'osb->s_inode_steal_slot'
fs/ocfs2/suballoc.c:867 ocfs2_init_meta_steal_slot() warn: '(-1)' 65535 can't fit into 32767 'osb->s_meta_steal_slot'
That's because OCFS2_INVALID_SLOT is (u16)-1. Slot number in ocfs2 can be
never negative, so change s16 to u16.
Link: http://lkml.kernel.org/r/20200627001259.19757-1-junxiao.bi@oracle.com
Fixes: 9277f8334ffc ("ocfs2: fix value of OCFS2_INVALID_SLOT")
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reviewed-by: Gang He <ghe(a)suse.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/ocfs2.h | 4 ++--
fs/ocfs2/suballoc.c | 4 ++--
fs/ocfs2/super.c | 4 ++--
3 files changed, 6 insertions(+), 6 deletions(-)
--- a/fs/ocfs2/ocfs2.h~ocfs2-change-slot-number-type-s16-to-u16
+++ a/fs/ocfs2/ocfs2.h
@@ -327,8 +327,8 @@ struct ocfs2_super
spinlock_t osb_lock;
u32 s_next_generation;
unsigned long osb_flags;
- s16 s_inode_steal_slot;
- s16 s_meta_steal_slot;
+ u16 s_inode_steal_slot;
+ u16 s_meta_steal_slot;
atomic_t s_num_inodes_stolen;
atomic_t s_num_meta_stolen;
--- a/fs/ocfs2/suballoc.c~ocfs2-change-slot-number-type-s16-to-u16
+++ a/fs/ocfs2/suballoc.c
@@ -879,9 +879,9 @@ static void __ocfs2_set_steal_slot(struc
{
spin_lock(&osb->osb_lock);
if (type == INODE_ALLOC_SYSTEM_INODE)
- osb->s_inode_steal_slot = slot;
+ osb->s_inode_steal_slot = (u16)slot;
else if (type == EXTENT_ALLOC_SYSTEM_INODE)
- osb->s_meta_steal_slot = slot;
+ osb->s_meta_steal_slot = (u16)slot;
spin_unlock(&osb->osb_lock);
}
--- a/fs/ocfs2/super.c~ocfs2-change-slot-number-type-s16-to-u16
+++ a/fs/ocfs2/super.c
@@ -78,7 +78,7 @@ struct mount_options
unsigned long commit_interval;
unsigned long mount_opt;
unsigned int atime_quantum;
- signed short slot;
+ unsigned short slot;
int localalloc_opt;
unsigned int resv_level;
int dir_resv_level;
@@ -1349,7 +1349,7 @@ static int ocfs2_parse_options(struct su
goto bail;
}
if (option)
- mopt->slot = (s16)option;
+ mopt->slot = (u16)option;
break;
case Opt_commit:
if (match_int(&args[0], &option)) {
_
Patches currently in -mm which might be from junxiao.bi(a)oracle.com are
The patch titled
Subject: mm: fix kthread_use_mm() vs TLB invalidate
has been removed from the -mm tree. Its filename was
mm-fix-kthread_use_mm-vs-tlb-invalidate.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Peter Zijlstra <peterz(a)infradead.org>
Subject: mm: fix kthread_use_mm() vs TLB invalidate
For SMP systems using IPI based TLB invalidation, looking at
current->active_mm is entirely reasonable. This then presents the
following race condition:
CPU0 CPU1
flush_tlb_mm(mm) use_mm(mm)
<send-IPI>
tsk->active_mm = mm;
<IPI>
if (tsk->active_mm == mm)
// flush TLBs
</IPI>
switch_mm(old_mm,mm,tsk);
Where it is possible the IPI flushed the TLBs for @old_mm, not @mm,
because the IPI lands before we actually switched.
Avoid this by disabling IRQs across changing ->active_mm and
switch_mm().
Of the (SMP) architectures that have IPI based TLB invalidate:
Alpha - checks active_mm
ARC - ASID specific
IA64 - checks active_mm
MIPS - ASID specific flush
OpenRISC - shoots down world
PARISC - shoots down world
SH - ASID specific
SPARC - ASID specific
x86 - N/A
xtensa - checks active_mm
So at the very least Alpha, IA64 and Xtensa are suspect.
On top of this, for scheduler consistency we need at least preemption
disabled across changing tsk->mm and doing switch_mm(), which is
currently provided by task_lock(), but that's not sufficient for
PREEMPT_RT.
[akpm(a)linux-foundation.org: add comment]
Link: http://lkml.kernel.org/r/20200721154106.GE10769@hirez.programming.kicks-ass…
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reported-by: Andy Lutomirski <luto(a)amacapital.net>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Jann Horn <jannh(a)google.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kthread.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/kernel/kthread.c~mm-fix-kthread_use_mm-vs-tlb-invalidate
+++ a/kernel/kthread.c
@@ -1241,13 +1241,16 @@ void kthread_use_mm(struct mm_struct *mm
WARN_ON_ONCE(tsk->mm);
task_lock(tsk);
+ /* Hold off tlb flush IPIs while switching mm's */
+ local_irq_disable();
active_mm = tsk->active_mm;
if (active_mm != mm) {
mmgrab(mm);
tsk->active_mm = mm;
}
tsk->mm = mm;
- switch_mm(active_mm, mm, tsk);
+ switch_mm_irqs_off(active_mm, mm, tsk);
+ local_irq_enable();
task_unlock(tsk);
#ifdef finish_arch_post_lock_switch
finish_arch_post_lock_switch();
@@ -1276,9 +1279,11 @@ void kthread_unuse_mm(struct mm_struct *
task_lock(tsk);
sync_mm_rss(mm);
+ local_irq_disable();
tsk->mm = NULL;
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
+ local_irq_enable();
task_unlock(tsk);
}
EXPORT_SYMBOL_GPL(kthread_unuse_mm);
_
Patches currently in -mm which might be from peterz(a)infradead.org are
The patch titled
Subject: mm/shuffle: don't move pages between zones and don't read garbage memmaps
has been removed from the -mm tree. Its filename was
mm-shuffle-dont-move-pages-between-zones-and-dont-read-garbage-memmaps.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/shuffle: don't move pages between zones and don't read garbage memmaps
Especially with memory hotplug, we can have offline sections (with a
garbage memmap) and overlapping zones. We have to make sure to only touch
initialized memmaps (online sections managed by the buddy) and that the
zone matches, to not move pages between zones.
To test if this can actually happen, I added a simple
BUG_ON(page_zone(page_i) != page_zone(page_j));
right before the swap. When hotplugging a 256M DIMM to a 4G x86-64 VM and
onlining the first memory block "online_movable" and the second memory
block "online_kernel", it will trigger the BUG, as both zones (NORMAL and
MOVABLE) overlap.
This might result in all kinds of weird situations (e.g., double
allocations, list corruptions, unmovable allocations ending up in the
movable zone).
Link: http://lkml.kernel.org/r/20200624094741.9918-2-david@redhat.com
Fixes: e900a918b098 ("mm: shuffle initial free memory to improve memory-side-cache utilization")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Wei Yang <richard.weiyang(a)linux.alibaba.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: <stable(a)vger.kernel.org> [5.2+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shuffle.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
--- a/mm/shuffle.c~mm-shuffle-dont-move-pages-between-zones-and-dont-read-garbage-memmaps
+++ a/mm/shuffle.c
@@ -58,25 +58,25 @@ module_param_call(shuffle, shuffle_store
* For two pages to be swapped in the shuffle, they must be free (on a
* 'free_area' lru), have the same order, and have the same migratetype.
*/
-static struct page * __meminit shuffle_valid_page(unsigned long pfn, int order)
+static struct page * __meminit shuffle_valid_page(struct zone *zone,
+ unsigned long pfn, int order)
{
- struct page *page;
+ struct page *page = pfn_to_online_page(pfn);
/*
* Given we're dealing with randomly selected pfns in a zone we
* need to ask questions like...
*/
- /* ...is the pfn even in the memmap? */
- if (!pfn_valid_within(pfn))
+ /* ... is the page managed by the buddy? */
+ if (!page)
return NULL;
- /* ...is the pfn in a present section or a hole? */
- if (!pfn_in_present_section(pfn))
+ /* ... is the page assigned to the same zone? */
+ if (page_zone(page) != zone)
return NULL;
/* ...is the page free and currently on a free_area list? */
- page = pfn_to_page(pfn);
if (!PageBuddy(page))
return NULL;
@@ -123,7 +123,7 @@ void __meminit __shuffle_zone(struct zon
* page_j randomly selected in the span @zone_start_pfn to
* @spanned_pages.
*/
- page_i = shuffle_valid_page(i, order);
+ page_i = shuffle_valid_page(z, i, order);
if (!page_i)
continue;
@@ -137,7 +137,7 @@ void __meminit __shuffle_zone(struct zon
j = z->zone_start_pfn +
ALIGN_DOWN(get_random_long() % z->spanned_pages,
order_pages);
- page_j = shuffle_valid_page(j, order);
+ page_j = shuffle_valid_page(z, j, order);
if (page_j && page_j != page_i)
break;
}
_
Patches currently in -mm which might be from david(a)redhat.com are
From: Chengming Zhou <zhouchengming(a)bytedance.com>
When module loaded and enabled, we will use __ftrace_replace_code
for module if any ftrace_ops referenced it found. But we will get
wrong ftrace_addr for module rec in ftrace_get_addr_new, because
rec->flags has not been setup correctly. It can cause the callback
function of a ftrace_ops has FTRACE_OPS_FL_SAVE_REGS to be called
with pt_regs set to NULL.
So setup correct FTRACE_FL_REGS flags for rec when we call
referenced_filters to find ftrace_ops references it.
Link: https://lkml.kernel.org/r/20200728180554.65203-1-zhouchengming@bytedance.com
Cc: stable(a)vger.kernel.org
Fixes: 8c4f3c3fa9681 ("ftrace: Check module functions being traced on reload")
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c141d347f71a..d052f856f1cf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6198,8 +6198,11 @@ static int referenced_filters(struct dyn_ftrace *rec)
int cnt = 0;
for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
- if (ops_references_rec(ops, rec))
- cnt++;
+ if (ops_references_rec(ops, rec)) {
+ cnt++;
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+ rec->flags |= FTRACE_FL_REGS;
+ }
}
return cnt;
@@ -6378,8 +6381,8 @@ void ftrace_module_enable(struct module *mod)
if (ftrace_start_up)
cnt += referenced_filters(rec);
- /* This clears FTRACE_FL_DISABLED */
- rec->flags = cnt;
+ rec->flags &= ~FTRACE_FL_DISABLED;
+ rec->flags += cnt;
if (ftrace_start_up && cnt) {
int failed = __ftrace_replace_code(rec, 1);
--
2.26.2
From: Qiushi Wu <wu000273(a)umn.edu>
[ Upstream commit 17ed808ad243192fb923e4e653c1338d3ba06207 ]
When kobject_init_and_add() returns an error, it should be handled
because kobject_init_and_add() takes a reference even when it fails. If
this function returns an error, kobject_put() must be called to properly
clean up the memory associated with the object.
Therefore, replace calling kfree() and call kobject_put() and add a
missing kobject_put() in the edac_device_register_sysfs_main_kobj()
error path.
[ bp: Massage and merge into a single patch. ]
Fixes: b2ed215a3338 ("Kobject: change drivers/edac to use kobject_init_and_add")
Signed-off-by: Qiushi Wu <wu000273(a)umn.edu>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Link: https://lkml.kernel.org/r/20200528202238.18078-1-wu000273@umn.edu
Link: https://lkml.kernel.org/r/20200528203526.20908-1-wu000273@umn.edu
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/edac/edac_device_sysfs.c | 1 +
drivers/edac/edac_pci_sysfs.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index fb68a06ad6837..18991cfec2af4 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -280,6 +280,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
/* Error exit stack */
err_kobj_reg:
+ kobject_put(&edac_dev->kobj);
module_put(edac_dev->owner);
err_mod_get:
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 24d877f6e5775..c56128402bc67 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -394,7 +394,7 @@ static int edac_pci_main_kobj_setup(void)
/* Error unwind statck */
kobject_init_and_add_fail:
- kfree(edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
kzalloc_fail:
module_put(THIS_MODULE);
--
2.25.1
From: Qiushi Wu <wu000273(a)umn.edu>
[ Upstream commit 17ed808ad243192fb923e4e653c1338d3ba06207 ]
When kobject_init_and_add() returns an error, it should be handled
because kobject_init_and_add() takes a reference even when it fails. If
this function returns an error, kobject_put() must be called to properly
clean up the memory associated with the object.
Therefore, replace calling kfree() and call kobject_put() and add a
missing kobject_put() in the edac_device_register_sysfs_main_kobj()
error path.
[ bp: Massage and merge into a single patch. ]
Fixes: b2ed215a3338 ("Kobject: change drivers/edac to use kobject_init_and_add")
Signed-off-by: Qiushi Wu <wu000273(a)umn.edu>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Link: https://lkml.kernel.org/r/20200528202238.18078-1-wu000273@umn.edu
Link: https://lkml.kernel.org/r/20200528203526.20908-1-wu000273@umn.edu
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/edac/edac_device_sysfs.c | 1 +
drivers/edac/edac_pci_sysfs.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 93da1a45c7161..470b02fc2de96 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -275,6 +275,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
/* Error exit stack */
err_kobj_reg:
+ kobject_put(&edac_dev->kobj);
module_put(edac_dev->owner);
err_out:
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 6e3428ba400f3..622d117e25335 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -386,7 +386,7 @@ static int edac_pci_main_kobj_setup(void)
/* Error unwind statck */
kobject_init_and_add_fail:
- kfree(edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
kzalloc_fail:
module_put(THIS_MODULE);
--
2.25.1
From: Jakub Kicinski <kuba(a)kernel.org>
When ur_load_imm_any() is inlined into jeq_imm(), it's possible for the
compiler to deduce a case where _val can only have the value of -1 at
compile time. Specifically,
/* struct bpf_insn: _s32 imm */
u64 imm = insn->imm; /* sign extend */
if (imm >> 32) { /* non-zero only if insn->imm is negative */
/* inlined from ur_load_imm_any */
u32 __imm = imm >> 32; /* therefore, always 0xffffffff */
if (__builtin_constant_p(__imm) && __imm > 255)
compiletime_assert_XXX()
This can result in tripping a BUILD_BUG_ON() in __BF_FIELD_CHECK() that
checks that a given value is representable in one byte (interpreted as
unsigned).
FIELD_FIT() should return true or false at runtime for whether a value
can fit for not. Don't break the build over a value that's too large for
the mask. We'd prefer to keep the inlining and compiler optimizations
though we know this case will always return false.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/kernel-hardening/CAK7LNASvb0UDJ0U5wkYYRzTAdnEs64HjX…
Reported-by: Masahiro Yamada <masahiroy(a)kernel.org>
Debugged-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
Acked-by: Alex Elder <elder(a)linaro.org>
---
Note: resent patch 1/2 as per Jakub on
https://lore.kernel.org/netdev/20200708230402.1644819-1-ndesaulniers@google…
include/linux/bitfield.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 48ea093ff04c..4e035aca6f7e 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -77,7 +77,7 @@
*/
#define FIELD_FIT(_mask, _val) \
({ \
- __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \
+ __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \
!((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
})
--
2.27.0.383.g050319c2ae-goog
Hi Marc,
On Tue, Aug 04, 2020 at 05:52:36PM -0700, Marc Plumb wrote:
> Seeding two PRNGs with the same entropy causes two problems. The minor one
> is that you're double counting entropy. The major one is that anyone who can
> determine the state of one PRNG can determine the state of the other.
>
> The net_rand_state PRNG is effectively a 113 bit LFSR, so anyone who can see
> any 113 bits of output can determine the complete internal state.
>
> The output of the net_rand_state PRNG is used to determine how data is sent
> to the network, so the output is effectively broadcast to anyone watching
> network traffic. Therefore anyone watching the network traffic can determine
> the seed data being fed to the net_rand_state PRNG.
The problem this patch is trying to work around is that the reporter
(Amit) was able to determine the entire net_rand_state after observing
a certain number of packets due to this trivial LFSR and the fact that
its internal state between two reseedings only depends on the number
of calls to read it. (please note that regarding this point I'll
propose a patch to replace that PRNG to stop directly exposing the
internal state to the network).
If you look closer at the patch, you'll see that in one interrupt
the patch only uses any 32 out of the 128 bits of fast_pool to
update only 32 bits of the net_rand_state. As such, the sequence
observed on the network also depends on the remaining bits of
net_rand_state, while the 96 other bits of the fast_pool are not
exposed there.
> Since this is the same
> seed data being fed to get_random_bytes, it allows an attacker to determine
> the state and there output of /dev/random. I sincerely hope that this was
> not the intended goal. :)
Not only was this obviously not the goal, but I'd be particularly
interested in seeing this reality demonstrated, considering that
the whole 128 bits of fast_pool together count as a single bit of
entropy, and that as such, even if you were able to figure the
value of the 32 bits leaked to net_rand_state, you'd still have to
guess the 96 other bits for each single entropy bit :-/
Regards,
Willy
When FTRIM is issued on a group, ext4 marks it as trimmed so another FTRIM
on the same group has no effect. Ext4 marks group as trimmed if at least
one block is trimmed, therefore it is possible that a group is marked as
trimmed even if there are blocks in that group left untrimmed.
This patch marks group as trimmed only if there are no more blocks
in that group to be trimmed.
Fixes: 3d56b8d2c74cc3f375ce332b3ac3519e009d79ee
Tested-by: Lazar Beloica <lazar.beloica(a)nutanix.com>
Signed-off-by: Lazar Beloica <lazar.beloica(a)nutanix.com>
---
fs/ext4/mballoc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c0a331e..130936b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5346,6 +5346,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
{
void *bitmap;
ext4_grpblk_t next, count = 0, free_count = 0;
+ ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
struct ext4_buddy e4b;
int ret = 0;
@@ -5401,7 +5402,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
if (!ret) {
ret = count;
- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+ next = mb_find_next_bit(bitmap, max_blks, max + 1);
+ if (next == max_blks)
+ EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
}
out:
ext4_unlock_group(sb, group);
--
1.8.3.1
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: a95883ce5505 - USB: serial: qcserial: add EM7305 QDL product ID
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 5:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ ACPI table test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ ACPI table test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ ACPI table test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
On Fri, Aug 7, 2020 at 10:55 AM Andy Lutomirski <luto(a)amacapital.net> wrote:
>
> I think the real random.c can run plenty fast. It’s ChaCha20 plus ludicrous overhead right now.
I doubt it.
I tried something very much like that in user space to just see how
many cycles it ended up being.
I made a "just raw ChaCha20", and it was already much too slow for
what some of the networking people claim to want.
And maybe they are asking for too much, but if they think it's too
slow, they'll not use it, and then we're back to square one.
Now, what *might* be acceptable is to not do ChaCha20, but simply do a
single double-round of it.
So after doing 10 prandom_u32() calls, you'd have done a full
ChaCha20. I didn't actually try that, but from looking at the costs
from trying the full thing, I think it might be in the right ballpark.
How does that sound to people?
Linus
The patch titled
Subject: mm/memory.c: avoid access flag update TLB flush for retried page fault
has been removed from the -mm tree. Its filename was
mm-avoid-access-flag-update-tlb-flush-for-retried-page-fault.patch
This patch was dropped because it was nacked
------------------------------------------------------
From: Yang Shi <yang.shi(a)linux.alibaba.com>
Subject: mm/memory.c: avoid access flag update TLB flush for retried page fault
Recently we found regression when running will_it_scale/page_fault3 test
on ARM64. Over 70% down for the multi processes cases and over 20% down
for the multi threads cases. It turns out the regression is caused by
commit 89b15332af7c0312a41e50846819ca6613b58b4c ("mm: drop mmap_sem before
calling balance_dirty_pages() in write fault").
The test mmaps a memory size file then write to the mapping, this would
make all memory dirty and trigger dirty pages throttle, that upstream
commit would release mmap_sem then retry the page fault. The retried page
fault would see correct PTEs installed by the first try then update dirty
bit and clear read-only bit and flush TLBs for ARM. The regression is
caused by the excessive TLB flush. It is fine on x86 since x86 doesn't
clear read-only bit so there is no need to flush TLB for this case.
The page fault would be retried due to:
1. Waiting for page readahead
2. Waiting for page swapped in
3. Waiting for dirty pages throttling
The first two cases don't have PTEs set up at all, so the retried page
fault would install the PTEs, so they don't reach there. But the #3 case
usually has PTEs installed, the retried page fault would reach the dirty
bit and read-only bit update. But it seems not necessary to modify those
bits again for #3 since they should be already set by the first page fault
try.
Of course the parallel page fault may set up PTEs, but we just need care
about write fault. If the parallel page fault setup a writable and dirty
PTE then the retried fault doesn't need do anything extra. If the
parallel page fault setup a clean read-only PTE, the retried fault should
just call do_wp_page() then return as the below code snippet shows:
if (vmf->flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry))
return do_wp_page(vmf);
}
With this fix the test result get back to normal.
[yang.shi(a)linux.alibaba.com: incorporate comment from Will Deacon, update commit log per discussion]
Link: http://lkml.kernel.org/r/1594848990-55657-1-git-send-email-yang.shi@linux.a…
Link: http://lkml.kernel.org/r/1594148072-91273-1-git-send-email-yang.shi@linux.a…
Signed-off-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Reported-by: Xu Yu <xuyu(a)linux.alibaba.com>
Debugged-by: Xu Yu <xuyu(a)linux.alibaba.com>
Tested-by: Xu Yu <xuyu(a)linux.alibaba.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Josef Bacik <josef(a)toxicpanda.com>
Cc: Hillf Danton <hdanton(a)sina.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
--- a/mm/memory.c~mm-avoid-access-flag-update-tlb-flush-for-retried-page-fault
+++ a/mm/memory.c
@@ -4241,8 +4241,14 @@ static vm_fault_t handle_pte_fault(struc
if (vmf->flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry))
return do_wp_page(vmf);
- entry = pte_mkdirty(entry);
}
+
+ if (vmf->flags & FAULT_FLAG_TRIED)
+ goto unlock;
+
+ if (vmf->flags & FAULT_FLAG_WRITE)
+ entry = pte_mkdirty(entry);
+
entry = pte_mkyoung(entry);
if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
vmf->flags & FAULT_FLAG_WRITE)) {
_
Patches currently in -mm which might be from yang.shi(a)linux.alibaba.com are
mm-filemap-clear-idle-flag-for-writes.patch
mm-filemap-add-missing-fgp_-flags-in-kerneldoc-comment-for-pagecache_get_page.patch
mm-thp-remove-debug_cow-switch.patch
If the link register was zeroed out, do not attempt to use it for
address calculations for which there are currently no fixup handlers,
which can lead to a panic during unwind. Since panicking triggers
another unwind, this can lead to an infinite loop. If this occurs
during start_kernel(), this can prevent a kernel from booting.
commit 59b6359dd92d ("ARM: 8702/1: head-common.S: Clear lr before jumping to start_kernel()")
intentionally zeros out the link register in __mmap_switched which tail
calls into start kernel. Test for this condition so that we can stop
unwinding when initiated within start_kernel() correctly.
Cc: stable(a)vger.kernel.org
Fixes: commit 6dc5fd93b2f1 ("ARM: 8900/1: UNWINDER_FRAME_POINTER implementation for Clang")
Reported-by: Miles Chen <miles.chen(a)mediatek.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
arch/arm/lib/backtrace-clang.S | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S
index 6174c45f53a5..5388ac664c12 100644
--- a/arch/arm/lib/backtrace-clang.S
+++ b/arch/arm/lib/backtrace-clang.S
@@ -144,6 +144,8 @@ for_each_frame: tst frame, mask @ Check for address exceptions
*/
1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame
+ tst sv_lr, #0 @ If there's no previous lr,
+ beq finished_setup @ we're done.
ldr r0, [sv_lr, #-4] @ get call instruction
ldr r3, .Lopcode+4
and r2, r3, r0 @ is this a bl call
--
2.28.0.163.g6104cc2f0b6-goog
Hi Andy,
On Fri, Aug 07, 2020 at 10:55:11AM -0700, Andy Lutomirski wrote:
> >> This is still another non-cryptographic PRNG.
> >
> > Absolutely. During some discussions regarding the possibility of using
> > CSPRNGs, orders around hundreds of CPU cycles were mentioned for them,
> > which can definitely be a huge waste of precious resources for some
> > workloads, possibly causing the addition of a few percent extra machines
> > in certain environments just to keep the average load under a certain
> > threshold.
>
> I think the real random.c can run plenty fast. It's ChaCha20 plus ludicrous
> overhead right now. I'm working (slowly) on making the overhead go away. I'm
> hoping to have something testable in a few days. As it stands, there is a
> ton of indirection, a pile of locks, multiple time comparisons, per-node and
> percpu buffers (why both?), wasted bits due to alignment, and probably other
> things that can be cleaned up. I'm trying to come up with something that is
> fast and has easy-to-understand semantics.
>
> You can follow along at:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/log/?h=rando…
Thanks, we'll see. I developed a quick test tool that's meant to be easy
to use to measure the performance impact on connect/accept. I have not
yet run it on a modified PRNG to verify if it works. I'll send it once
I've tested. I'd definitely would like to see no measurable performance
drop, and ideally even a small performance increase (as Tausworthe isn't
the lightest thing around either so we do have some little margin).
Willy
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d1719f70d0a5b83b12786a7dbc5b9fe396469016 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 30 Jul 2020 13:43:53 -0600
Subject: [PATCH] io_uring: don't touch 'ctx' after installing file descriptor
As soon as we install the file descriptor, we have to assume that it
can get arbitrarily closed. We currently account memory (and note that
we did) after installing the ring fd, which means that it could be a
potential use-after-free condition if the fd is closed right after
being installed, but before we fiddle with the ctx.
In fact, syzbot reported this exact scenario:
BUG: KASAN: use-after-free in io_account_mem fs/io_uring.c:7397 [inline]
BUG: KASAN: use-after-free in io_uring_create fs/io_uring.c:8369 [inline]
BUG: KASAN: use-after-free in io_uring_setup+0x2797/0x2910 fs/io_uring.c:8400
Read of size 1 at addr ffff888087a41044 by task syz-executor.5/18145
CPU: 0 PID: 18145 Comm: syz-executor.5 Not tainted 5.8.0-rc7-next-20200729-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x18f/0x20d lib/dump_stack.c:118
print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383
__kasan_report mm/kasan/report.c:513 [inline]
kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
io_account_mem fs/io_uring.c:7397 [inline]
io_uring_create fs/io_uring.c:8369 [inline]
io_uring_setup+0x2797/0x2910 fs/io_uring.c:8400
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45c429
Code: 8d b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 5b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f8f121d0c78 EFLAGS: 00000246 ORIG_RAX: 00000000000001a9
RAX: ffffffffffffffda RBX: 0000000000008540 RCX: 000000000045c429
RDX: 0000000000000000 RSI: 0000000020000040 RDI: 0000000000000196
RBP: 000000000078bf38 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000078bf0c
R13: 00007fff86698cff R14: 00007f8f121d19c0 R15: 000000000078bf0c
Move the accounting of the ring used locked memory before we get and
install the ring file descriptor.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+9d46305e76057f30c74e(a)syzkaller.appspotmail.com
Fixes: 309758254ea6 ("io_uring: report pinned memory usage")
Reviewed-by: Stefano Garzarella <sgarzare(a)redhat.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index fabf0b692384..33702f3b5af8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8329,6 +8329,15 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
ret = -EFAULT;
goto err;
}
+
+ /*
+ * Account memory _before_ installing the file descriptor. Once
+ * the descriptor is installed, it can get closed at any time.
+ */
+ io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries),
+ ACCT_LOCKED);
+ ctx->limit_mem = limit_mem;
+
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
@@ -8338,9 +8347,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
goto err;
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
- io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries),
- ACCT_LOCKED);
- ctx->limit_mem = limit_mem;
return ret;
err:
io_ring_ctx_wait_and_kill(ctx);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4b836a1426cb0f1ef2a6e211d7e553221594f8fc Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Mon, 27 Jul 2020 14:04:24 +0200
Subject: [PATCH] binder: Prevent context manager from incrementing ref 0
Binder is designed such that a binder_proc never has references to
itself. If this rule is violated, memory corruption can occur when a
process sends a transaction to itself; see e.g.
<https://syzkaller.appspot.com/bug?extid=09e05aba06723a94d43d>.
There is a remaining edgecase through which such a transaction-to-self
can still occur from the context of a task with BINDER_SET_CONTEXT_MGR
access:
- task A opens /dev/binder twice, creating binder_proc instances P1
and P2
- P1 becomes context manager
- P2 calls ACQUIRE on the magic handle 0, allocating index 0 in its
handle table
- P1 dies (by closing the /dev/binder fd and waiting a bit)
- P2 becomes context manager
- P2 calls ACQUIRE on the magic handle 0, allocating index 1 in its
handle table
[this triggers a warning: "binder: 1974:1974 tried to acquire
reference to desc 0, got 1 instead"]
- task B opens /dev/binder once, creating binder_proc instance P3
- P3 calls P2 (via magic handle 0) with (void*)1 as argument (two-way
transaction)
- P2 receives the handle and uses it to call P3 (two-way transaction)
- P3 calls P2 (via magic handle 0) (two-way transaction)
- P2 calls P2 (via handle 1) (two-way transaction)
And then, if P2 does *NOT* accept the incoming transaction work, but
instead closes the binder fd, we get a crash.
Solve it by preventing the context manager from using ACQUIRE on ref 0.
There shouldn't be any legitimate reason for the context manager to do
that.
Additionally, print a warning if someone manages to find another way to
trigger a transaction-to-self bug in the future.
Cc: stable(a)vger.kernel.org
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Acked-by: Todd Kjos <tkjos(a)google.com>
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Martijn Coenen <maco(a)android.com>
Link: https://lore.kernel.org/r/20200727120424.1627555-1-jannh@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index f50c5f182bb5..5b310eea9e52 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2982,6 +2982,12 @@ static void binder_transaction(struct binder_proc *proc,
goto err_dead_binder;
}
e->to_node = target_node->debug_id;
+ if (WARN_ON(proc == target_proc)) {
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_invalid_target_handle;
+ }
if (security_binder_transaction(proc->tsk,
target_proc->tsk) < 0) {
return_error = BR_FAILED_REPLY;
@@ -3635,10 +3641,17 @@ static int binder_thread_write(struct binder_proc *proc,
struct binder_node *ctx_mgr_node;
mutex_lock(&context->context_mgr_node_lock);
ctx_mgr_node = context->binder_context_mgr_node;
- if (ctx_mgr_node)
+ if (ctx_mgr_node) {
+ if (ctx_mgr_node->proc == proc) {
+ binder_user_error("%d:%d context manager tried to acquire desc 0\n",
+ proc->pid, thread->pid);
+ mutex_unlock(&context->context_mgr_node_lock);
+ return -EINVAL;
+ }
ret = binder_inc_ref_for_node(
proc, ctx_mgr_node,
strong, NULL, &rdata);
+ }
mutex_unlock(&context->context_mgr_node_lock);
}
if (ret)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4b836a1426cb0f1ef2a6e211d7e553221594f8fc Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Mon, 27 Jul 2020 14:04:24 +0200
Subject: [PATCH] binder: Prevent context manager from incrementing ref 0
Binder is designed such that a binder_proc never has references to
itself. If this rule is violated, memory corruption can occur when a
process sends a transaction to itself; see e.g.
<https://syzkaller.appspot.com/bug?extid=09e05aba06723a94d43d>.
There is a remaining edgecase through which such a transaction-to-self
can still occur from the context of a task with BINDER_SET_CONTEXT_MGR
access:
- task A opens /dev/binder twice, creating binder_proc instances P1
and P2
- P1 becomes context manager
- P2 calls ACQUIRE on the magic handle 0, allocating index 0 in its
handle table
- P1 dies (by closing the /dev/binder fd and waiting a bit)
- P2 becomes context manager
- P2 calls ACQUIRE on the magic handle 0, allocating index 1 in its
handle table
[this triggers a warning: "binder: 1974:1974 tried to acquire
reference to desc 0, got 1 instead"]
- task B opens /dev/binder once, creating binder_proc instance P3
- P3 calls P2 (via magic handle 0) with (void*)1 as argument (two-way
transaction)
- P2 receives the handle and uses it to call P3 (two-way transaction)
- P3 calls P2 (via magic handle 0) (two-way transaction)
- P2 calls P2 (via handle 1) (two-way transaction)
And then, if P2 does *NOT* accept the incoming transaction work, but
instead closes the binder fd, we get a crash.
Solve it by preventing the context manager from using ACQUIRE on ref 0.
There shouldn't be any legitimate reason for the context manager to do
that.
Additionally, print a warning if someone manages to find another way to
trigger a transaction-to-self bug in the future.
Cc: stable(a)vger.kernel.org
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Acked-by: Todd Kjos <tkjos(a)google.com>
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Martijn Coenen <maco(a)android.com>
Link: https://lore.kernel.org/r/20200727120424.1627555-1-jannh@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index f50c5f182bb5..5b310eea9e52 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2982,6 +2982,12 @@ static void binder_transaction(struct binder_proc *proc,
goto err_dead_binder;
}
e->to_node = target_node->debug_id;
+ if (WARN_ON(proc == target_proc)) {
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_invalid_target_handle;
+ }
if (security_binder_transaction(proc->tsk,
target_proc->tsk) < 0) {
return_error = BR_FAILED_REPLY;
@@ -3635,10 +3641,17 @@ static int binder_thread_write(struct binder_proc *proc,
struct binder_node *ctx_mgr_node;
mutex_lock(&context->context_mgr_node_lock);
ctx_mgr_node = context->binder_context_mgr_node;
- if (ctx_mgr_node)
+ if (ctx_mgr_node) {
+ if (ctx_mgr_node->proc == proc) {
+ binder_user_error("%d:%d context manager tried to acquire desc 0\n",
+ proc->pid, thread->pid);
+ mutex_unlock(&context->context_mgr_node_lock);
+ return -EINVAL;
+ }
ret = binder_inc_ref_for_node(
proc, ctx_mgr_node,
strong, NULL, &rdata);
+ }
mutex_unlock(&context->context_mgr_node_lock);
}
if (ret)
The only-root-readable /sys/module/$module/sections/$section files
did not truncate their output to the available buffer size. While most
paths into the kernfs read handlers end up using PAGE_SIZE buffers,
it's possible to get there through other paths (e.g. splice, sendfile).
Actually limit the output to the "count" passed into the read function,
and report it back correctly. *sigh*
Reported-by: kernel test robot <lkp(a)intel.com>
Link: https://lore.kernel.org/lkml/20200805002015.GE23458@shao2-debian
Fixes: ed66f991bb19 ("module: Refactor section attr into bin attribute")
Cc: stable(a)vger.kernel.org
Cc: Jessica Yu <jeyu(a)kernel.org>
Signed-off-by: Kees Cook <keescook(a)chromium.org>
---
kernel/module.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/kernel/module.c b/kernel/module.c
index aa183c9ac0a2..08c46084d8cc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1520,18 +1520,34 @@ struct module_sect_attrs {
struct module_sect_attr attrs[];
};
+#define MODULE_SECT_READ_SIZE (3 /* "0x", "\n" */ + (BITS_PER_LONG / 4))
static ssize_t module_sect_read(struct file *file, struct kobject *kobj,
struct bin_attribute *battr,
char *buf, loff_t pos, size_t count)
{
struct module_sect_attr *sattr =
container_of(battr, struct module_sect_attr, battr);
+ char bounce[MODULE_SECT_READ_SIZE + 1];
+ size_t wrote;
if (pos != 0)
return -EINVAL;
- return sprintf(buf, "0x%px\n",
- kallsyms_show_value(file->f_cred) ? (void *)sattr->address : NULL);
+ /*
+ * Since we're a binary read handler, we must account for the
+ * trailing NUL byte that sprintf will write: if "buf" is
+ * too small to hold the NUL, or the NUL is exactly the last
+ * byte, the read will look like it got truncated by one byte.
+ * Since there is no way to ask sprintf nicely to not write
+ * the NUL, we have to use a bounce buffer.
+ */
+ wrote = scnprintf(bounce, sizeof(bounce), "0x%px\n",
+ kallsyms_show_value(file->f_cred)
+ ? (void *)sattr->address : NULL);
+ count = min(count, wrote);
+ memcpy(buf, bounce, count);
+
+ return count;
}
static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -1580,7 +1596,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info)
goto out;
sect_attrs->nsections++;
sattr->battr.read = module_sect_read;
- sattr->battr.size = 3 /* "0x", "\n" */ + (BITS_PER_LONG / 4);
+ sattr->battr.size = MODULE_SECT_READ_SIZE;
sattr->battr.attr.mode = 0400;
*(gattr++) = &(sattr++)->battr;
}
--
2.25.1
This patch adds check to ensure that the struct net_device::ml_priv is
allocated, as it is used later by the j1939 stack.
The allocation is done by all mainline CAN network drivers, but when using
bond or team devices this is not the case.
Bail out if no ml_priv is allocated.
Reported-by: syzbot+f03d384f3455d28833eb(a)syzkaller.appspotmail.com
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: linux-stable <stable(a)vger.kernel.org> # >= v5.4
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
---
net/can/j1939/socket.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index b9a17c2ee16f..27542de233c7 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -467,6 +467,14 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out_release_sock;
}
+ if (!ndev->ml_priv) {
+ netdev_warn_once(ndev,
+ "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
+ dev_put(ndev);
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
priv = j1939_netdev_start(ndev);
dev_put(ndev);
if (IS_ERR(priv)) {
--
2.28.0
The current stack implementation do not support ECTS requests of not
aligned TP sized blocks.
If ECTS will request a block with size and offset spanning two TP
blocks, this will cause memcpy() to read beyond the queued skb (which
does only contain one TP sized block).
Sometimes KASAN will detect this read if the memory region beyond the
skb was previously allocated and freed. In other situations it will stay
undetected. The ETP transfer in any case will be corrupted.
This patch adds a sanity check to avoid this kind of read and abort the
session with error J1939_XTP_ABORT_ECTS_TOO_BIG.
Reported-by: syzbot+5322482fe520b02aea30(a)syzkaller.appspotmail.com
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: linux-stable <stable(a)vger.kernel.org> # >= v5.4
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
---
net/can/j1939/transport.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index b135c5e2a86e..30957c9a8eb7 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -787,6 +787,18 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (len > 7)
len = 7;
+ if (offset + len > se_skb->len) {
+ netdev_err_once(priv->ndev,
+ "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
+ __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
+ return -EOVERFLOW;
+ }
+
+ if (!len) {
+ ret = -ENOBUFS;
+ break;
+ }
+
memcpy(&dat[1], &tpdat[offset], len);
ret = j1939_tp_tx_dat(session, dat, len + 1);
if (ret < 0) {
@@ -1120,6 +1132,9 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
* cleanup including propagation of the error to user space.
*/
break;
+ case -EOVERFLOW:
+ j1939_session_cancel(session, J1939_XTP_ABORT_ECTS_TOO_BIG);
+ break;
case 0:
session->tx_retry = 0;
break;
--
2.28.0
I'm announcing the release of the 5.7.14 kernel.
All users of the 5.7 kernel series must upgrade.
The updated 5.7.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.7.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
arch/arm/include/asm/percpu.h | 2
arch/arm64/include/asm/archrandom.h | 1
arch/arm64/include/asm/pointer_auth.h | 8 +++
arch/arm64/kernel/kaslr.c | 2
drivers/char/random.c | 1
include/linux/prandom.h | 78 ++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 +--------------------------
kernel/time/timer.c | 8 +++
lib/random32.c | 2
10 files changed, 103 insertions(+), 64 deletions(-)
Greg Kroah-Hartman (1):
Linux 5.7.14
Grygorii Strashko (1):
ARM: percpu.h: fix build error
Linus Torvalds (3):
random32: remove net_rand_state from the latent entropy gcc plugin
random32: move the pseudo-random 32-bit definitions to prandom.h
random: random.h should include archrandom.h, not the other way around
Marc Zyngier (1):
arm64: Workaround circular dependency in pointer_auth.h
Willy Tarreau (2):
random32: update the net random state on interrupt and activity
random: fix circular include dependency on arm64 after addition of percpu.h
I'm announcing the release of the 5.4.57 kernel.
All users of the 5.4 kernel series must upgrade.
The updated 5.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.4.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
arch/arm/include/asm/percpu.h | 2
arch/arm64/include/asm/pointer_auth.h | 8 ++-
drivers/char/random.c | 1
fs/ext4/inode.c | 5 ++
include/linux/bpf.h | 13 ++++-
include/linux/prandom.h | 78 ++++++++++++++++++++++++++++++++
include/linux/random.h | 63 +------------------------
include/linux/skmsg.h | 13 +++++
kernel/bpf/syscall.c | 4 -
kernel/time/timer.c | 8 +++
lib/random32.c | 2
net/core/sock_map.c | 50 ++++++++++++++++++--
tools/testing/selftests/bpf/test_maps.c | 12 ++--
14 files changed, 184 insertions(+), 77 deletions(-)
Greg Kroah-Hartman (1):
Linux 5.4.57
Grygorii Strashko (1):
ARM: percpu.h: fix build error
Jiang Ying (1):
ext4: fix direct I/O read error
Linus Torvalds (2):
random32: remove net_rand_state from the latent entropy gcc plugin
random32: move the pseudo-random 32-bit definitions to prandom.h
Lorenz Bauer (2):
selftests: bpf: Fix detach from sockmap tests
bpf: sockmap: Require attach_bpf_fd when detaching a program
Marc Zyngier (1):
arm64: Workaround circular dependency in pointer_auth.h
Willy Tarreau (2):
random32: update the net random state on interrupt and activity
random: fix circular include dependency on arm64 after addition of percpu.h
I'm announcing the release of the 4.19.138 kernel.
All users of the 4.19 kernel series must upgrade.
The updated 4.19.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.19.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 -
arch/arm/include/asm/percpu.h | 2 +
drivers/char/random.c | 1
fs/ext4/inode.c | 5 ++
include/linux/prandom.h | 78 ++++++++++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 ++-------------------------------
kernel/time/timer.c | 8 ++++
lib/random32.c | 2 -
8 files changed, 100 insertions(+), 61 deletions(-)
Greg Kroah-Hartman (1):
Linux 4.19.138
Grygorii Strashko (1):
ARM: percpu.h: fix build error
Jiang Ying (1):
ext4: fix direct I/O read error
Linus Torvalds (2):
random32: remove net_rand_state from the latent entropy gcc plugin
random32: move the pseudo-random 32-bit definitions to prandom.h
Willy Tarreau (2):
random32: update the net random state on interrupt and activity
random: fix circular include dependency on arm64 after addition of percpu.h
I'm announcing the release of the 4.14.193 kernel.
All users of the 4.14 kernel series must upgrade.
The updated 4.14.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.14.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
arch/arm/include/asm/percpu.h | 2
arch/arm/kernel/head-common.S | 1
drivers/char/random.c | 1
drivers/scsi/libsas/sas_ata.c | 1
drivers/scsi/libsas/sas_discover.c | 32 ++++++---------
drivers/scsi/libsas/sas_expander.c | 8 ++-
drivers/scsi/libsas/sas_internal.h | 1
drivers/scsi/libsas/sas_port.c | 3 -
fs/ext4/inode.c | 5 ++
include/linux/prandom.h | 78 +++++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 +----------------------------
include/scsi/libsas.h | 3 -
include/scsi/scsi_transport_sas.h | 1
kernel/time/timer.c | 8 +++
lib/random32.c | 2
16 files changed, 123 insertions(+), 88 deletions(-)
Geert Uytterhoeven (1):
ARM: 8702/1: head-common.S: Clear lr before jumping to start_kernel()
Greg Kroah-Hartman (2):
Revert "scsi: libsas: direct call probe and destruct"
Linux 4.14.193
Grygorii Strashko (1):
ARM: percpu.h: fix build error
Jiang Ying (1):
ext4: fix direct I/O read error
Linus Torvalds (2):
random32: remove net_rand_state from the latent entropy gcc plugin
random32: move the pseudo-random 32-bit definitions to prandom.h
Willy Tarreau (2):
random32: update the net random state on interrupt and activity
random: fix circular include dependency on arm64 after addition of percpu.h
This is the start of the stable review cycle for the 5.7.14 release.
There are 7 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 07 Aug 2020 19:59:06 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.7.14-rc2…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.7.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.7.14-rc2
Linus Torvalds <torvalds(a)linux-foundation.org>
random: random.h should include archrandom.h, not the other way around
Marc Zyngier <maz(a)kernel.org>
arm64: Workaround circular dependency in pointer_auth.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: move the pseudo-random 32-bit definitions to prandom.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: remove net_rand_state from the latent entropy gcc plugin
Willy Tarreau <w(a)1wt.eu>
random: fix circular include dependency on arm64 after addition of percpu.h
Grygorii Strashko <grygorii.strashko(a)ti.com>
ARM: percpu.h: fix build error
Willy Tarreau <w(a)1wt.eu>
random32: update the net random state on interrupt and activity
-------------
Diffstat:
Makefile | 4 +-
arch/arm/include/asm/percpu.h | 2 +
arch/arm64/include/asm/archrandom.h | 1 -
arch/arm64/include/asm/pointer_auth.h | 8 +++-
arch/arm64/kernel/kaslr.c | 2 +-
drivers/char/random.c | 1 +
include/linux/prandom.h | 78 +++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 ++--------------------------
kernel/time/timer.c | 8 ++++
lib/random32.c | 2 +-
10 files changed, 104 insertions(+), 65 deletions(-)
This is the start of the stable review cycle for the 4.19.138 release.
There are 6 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 07 Aug 2020 15:34:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.138-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.138-rc1
Jiang Ying <jiangying8582(a)126.com>
ext4: fix direct I/O read error
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: move the pseudo-random 32-bit definitions to prandom.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: remove net_rand_state from the latent entropy gcc plugin
Willy Tarreau <w(a)1wt.eu>
random: fix circular include dependency on arm64 after addition of percpu.h
Grygorii Strashko <grygorii.strashko(a)ti.com>
ARM: percpu.h: fix build error
Willy Tarreau <w(a)1wt.eu>
random32: update the net random state on interrupt and activity
-------------
Diffstat:
Makefile | 4 +--
arch/arm/include/asm/percpu.h | 2 ++
drivers/char/random.c | 1 +
fs/ext4/inode.c | 5 +++
include/linux/prandom.h | 78 +++++++++++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 +++-------------------------------
kernel/time/timer.c | 8 +++++
lib/random32.c | 2 +-
8 files changed, 101 insertions(+), 62 deletions(-)
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid()
Move collapse_huge_page()'s mmget_still_valid() check into
khugepaged_test_exit() itself. collapse_huge_page() is used for anon THP
only, and earned its mmget_still_valid() check because it inserts a huge
pmd entry in place of the page table's pmd entry; whereas
collapse_file()'s retract_page_tables() or collapse_pte_mapped_thp()
merely clears the page table's pmd entry. But core dumping without mmap
lock must have been as open to mistaking a racily cleared pmd entry for a
page table at physical page 0, as exit_mmap() was. And we certainly have
no interest in mapping as a THP once dumping core.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021217020.27773@eggly.anvils
Fixes: 59ea6d06cfa9 ("coredump: fix race condition between collapse_huge_page() and core dumping")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/mm/khugepaged.c~khugepaged-khugepaged_test_exit-check-mmget_still_valid
+++ a/mm/khugepaged.c
@@ -431,7 +431,7 @@ static void insert_to_mm_slots_hash(stru
static inline int khugepaged_test_exit(struct mm_struct *mm)
{
- return atomic_read(&mm->mm_users) == 0;
+ return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
}
static bool hugepage_vma_check(struct vm_area_struct *vma,
@@ -1100,9 +1100,6 @@ static void collapse_huge_page(struct mm
* handled by the anon_vma lock + PG_lock.
*/
mmap_write_lock(mm);
- result = SCAN_ANY_PROCESS;
- if (!mmget_still_valid(mm))
- goto out;
result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
_
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: retract_page_tables() remember to test exit
Only once have I seen this scenario (and forgot even to notice what forced
the eventual crash): a sequence of "BUG: Bad page map" alerts from
vm_normal_page(), from zap_pte_range() servicing exit_mmap();
pmd:00000000, pte values corresponding to data in physical page 0.
The pte mappings being zapped in this case were supposed to be from a huge
page of ext4 text (but could as well have been shmem): my belief is that
it was racing with collapse_file()'s retract_page_tables(), found *pmd
pointing to a page table, locked it, but *pmd had become 0 by the time
start_pte was decided.
In most cases, that possibility is excluded by holding mmap lock; but
exit_mmap() proceeds without mmap lock. Most of what's run by khugepaged
checks khugepaged_test_exit() after acquiring mmap lock:
khugepaged_collapse_pte_mapped_thps() and hugepage_vma_revalidate() do so,
for example. But retract_page_tables() did not: fix that.
The fix is for retract_page_tables() to check khugepaged_test_exit(),
after acquiring mmap lock, before doing anything to the page table.
Getting the mmap lock serializes with __mmput(), which briefly takes and
drops it in __khugepaged_exit(); then the khugepaged_test_exit() check on
mm_users makes sure we don't touch the page table once exit_mmap() might
reach it, since exit_mmap() will be proceeding without mmap lock, not
expecting anyone to be racing with it.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021215400.27773@eggly.anvils
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
--- a/mm/khugepaged.c~khugepaged-retract_page_tables-remember-to-test-exit
+++ a/mm/khugepaged.c
@@ -1532,6 +1532,7 @@ out:
static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
{
struct vm_area_struct *vma;
+ struct mm_struct *mm;
unsigned long addr;
pmd_t *pmd, _pmd;
@@ -1560,7 +1561,8 @@ static void retract_page_tables(struct a
continue;
if (vma->vm_end < addr + HPAGE_PMD_SIZE)
continue;
- pmd = mm_find_pmd(vma->vm_mm, addr);
+ mm = vma->vm_mm;
+ pmd = mm_find_pmd(mm, addr);
if (!pmd)
continue;
/*
@@ -1570,17 +1572,19 @@ static void retract_page_tables(struct a
* mmap_lock while holding page lock. Fault path does it in
* reverse order. Trylock is a way to avoid deadlock.
*/
- if (mmap_write_trylock(vma->vm_mm)) {
- spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
- /* assume page table is clear */
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
- spin_unlock(ptl);
- mmap_write_unlock(vma->vm_mm);
- mm_dec_nr_ptes(vma->vm_mm);
- pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+ if (mmap_write_trylock(mm)) {
+ if (!khugepaged_test_exit(mm)) {
+ spinlock_t *ptl = pmd_lock(mm, pmd);
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+ }
+ mmap_write_unlock(mm);
} else {
/* Try again later */
- khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
+ khugepaged_add_pte_mapped_thp(mm, addr);
}
}
i_mmap_unlock_write(mapping);
_
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: collapse_pte_mapped_thp() protect the pmd lock
When retract_page_tables() removes a page table to make way for a huge
pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and
pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the
case when the original mmap_write_trylock had failed), only
mmap_write_trylock and pmd lock are held.
That's not enough. One machine has twice crashed under load, with "BUG:
spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b. Examining the second
crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving
page_referenced() on a file THP, that had found a page table at *pmd)
discovers that the page table page and its lock have already been freed by
the time it comes to unlock.
Follow the example of retract_page_tables(), but we only need one of huge
page lock or i_mmap_lock_write to secure against this: because it's the
narrower lock, and because it simplifies collapse_pte_mapped_thp() to know
the hpage earlier, choose to rely on huge page lock here.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021213070.27773@eggly.anvils
Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 44 +++++++++++++++++++-------------------------
1 file changed, 19 insertions(+), 25 deletions(-)
--- a/mm/khugepaged.c~khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock
+++ a/mm/khugepaged.c
@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_s
{
unsigned long haddr = addr & HPAGE_PMD_MASK;
struct vm_area_struct *vma = find_vma(mm, haddr);
- struct page *hpage = NULL;
+ struct page *hpage;
pte_t *start_pte, *pte;
pmd_t *pmd, _pmd;
spinlock_t *ptl;
@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_s
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
return;
+ hpage = find_lock_page(vma->vm_file->f_mapping,
+ linear_page_index(vma, haddr));
+ if (!hpage)
+ return;
+
+ if (!PageHead(hpage))
+ goto drop_hpage;
+
pmd = mm_find_pmd(mm, haddr);
if (!pmd)
- return;
+ goto drop_hpage;
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_s
page = vm_normal_page(vma, addr, *pte);
- if (!page || !PageCompound(page))
- goto abort;
-
- if (!hpage) {
- hpage = compound_head(page);
- /*
- * The mapping of the THP should not change.
- *
- * Note that uprobe, debugger, or MAP_PRIVATE may
- * change the page table, but the new page will
- * not pass PageCompound() check.
- */
- if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
- goto abort;
- }
-
/*
- * Confirm the page maps to the correct subpage.
- *
- * Note that uprobe, debugger, or MAP_PRIVATE may change
- * the page table, but the new page will not pass
- * PageCompound() check.
+ * Note that uprobe, debugger, or MAP_PRIVATE may change the
+ * page table, but the new page will not be a subpage of hpage.
*/
- if (WARN_ON(hpage + i != page))
+ if (hpage + i != page)
goto abort;
count++;
}
@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_s
pte_unmap_unlock(start_pte, ptl);
/* step 3: set proper refcount and mm_counters. */
- if (hpage) {
+ if (count) {
page_ref_sub(hpage, count);
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
}
@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_s
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
+
+drop_hpage:
+ unlock_page(hpage);
+ put_page(hpage);
return;
abort:
pte_unmap_unlock(start_pte, ptl);
+ goto drop_hpage;
}
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
_
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: collapse_pte_mapped_thp() flush the right range
pmdp_collapse_flush() should be given the start address at which the huge
page is mapped, haddr: it was given addr, which at that point has been
used as a local variable, incremented to the end address of the extent.
Found by source inspection while chasing a hugepage locking bug, which I
then could not explain by this. At first I thought this was very bad;
then saw that all of the page translations that were not flushed would
actually still point to the right pages afterwards, so harmless; then
realized that I know nothing of how different architectures and models
cache intermediate paging structures, so maybe it matters after all -
particularly since the page table concerned is immediately freed.
Much easier to fix than to think about.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021204390.27773@eggly.anvils
Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/khugepaged.c~khugepaged-collapse_pte_mapped_thp-flush-the-right-range
+++ a/mm/khugepaged.c
@@ -1502,7 +1502,7 @@ void collapse_pte_mapped_thp(struct mm_s
/* step 4: collapse pmd */
ptl = pmd_lock(vma->vm_mm, pmd);
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
_
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible
+++ a/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
_
From: Michal Koutný <mkoutny(a)suse.com>
Subject: mm/page_counter.c: fix protection usage propagation
When workload runs in cgroups that aren't directly below root cgroup and
their parent specifies reclaim protection, it may end up ineffective.
The reason is that propagate_protected_usage() is not called in all
hierarchy up. All the protected usage is incorrectly accumulated in the
workload's parent. This means that siblings_low_usage is overestimated
and effective protection underestimated. Even though it is transitional
phenomenon (uncharge path does correct propagation and fixes the wrong
children_low_usage), it can undermine the intended protection
unexpectedly.
We have noticed this problem while seeing a swap out in a descendant of a
protected memcg (intermediate node) while the parent was conveniently
under its protection limit and the memory pressure was external to that
hierarchy. Michal has pinpointed this down to the wrong
siblings_low_usage which led to the unwanted reclaim.
The fix is simply updating children_low_usage in respective ancestors also
in the charging path.
Link: http://lkml.kernel.org/r/20200803153231.15477-1-mhocko@kernel.org
Fixes: 230671533d64 ("mm: memory.low hierarchical behavior")
Signed-off-by: Michal Koutný <mkoutny(a)suse.com>
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Roman Gushchin <guro(a)fb.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [4.18+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_counter.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/page_counter.c~mm-fix-protection-usage-propagation
+++ a/mm/page_counter.c
@@ -72,7 +72,7 @@ void page_counter_charge(struct page_cou
long new;
new = atomic_long_add_return(nr_pages, &c->usage);
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
@@ -116,7 +116,7 @@ bool page_counter_try_charge(struct page
new = atomic_long_add_return(nr_pages, &c->usage);
if (new > c->max) {
atomic_long_sub(nr_pages, &c->usage);
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* This is racy, but we can live with some
* inaccuracy in the failcnt.
@@ -125,7 +125,7 @@ bool page_counter_try_charge(struct page
*fail = c;
goto failed;
}
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* Just like with failcnt, we can live with some
* inaccuracy in the watermark.
_
From: Peter Zijlstra <peterz(a)infradead.org>
Subject: mm: fix kthread_use_mm() vs TLB invalidate
For SMP systems using IPI based TLB invalidation, looking at
current->active_mm is entirely reasonable. This then presents the
following race condition:
CPU0 CPU1
flush_tlb_mm(mm) use_mm(mm)
<send-IPI>
tsk->active_mm = mm;
<IPI>
if (tsk->active_mm == mm)
// flush TLBs
</IPI>
switch_mm(old_mm,mm,tsk);
Where it is possible the IPI flushed the TLBs for @old_mm, not @mm,
because the IPI lands before we actually switched.
Avoid this by disabling IRQs across changing ->active_mm and
switch_mm().
Of the (SMP) architectures that have IPI based TLB invalidate:
Alpha - checks active_mm
ARC - ASID specific
IA64 - checks active_mm
MIPS - ASID specific flush
OpenRISC - shoots down world
PARISC - shoots down world
SH - ASID specific
SPARC - ASID specific
x86 - N/A
xtensa - checks active_mm
So at the very least Alpha, IA64 and Xtensa are suspect.
On top of this, for scheduler consistency we need at least preemption
disabled across changing tsk->mm and doing switch_mm(), which is
currently provided by task_lock(), but that's not sufficient for
PREEMPT_RT.
[akpm(a)linux-foundation.org: add comment]
Link: http://lkml.kernel.org/r/20200721154106.GE10769@hirez.programming.kicks-ass…
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reported-by: Andy Lutomirski <luto(a)amacapital.net>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Jann Horn <jannh(a)google.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kthread.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/kernel/kthread.c~mm-fix-kthread_use_mm-vs-tlb-invalidate
+++ a/kernel/kthread.c
@@ -1241,13 +1241,16 @@ void kthread_use_mm(struct mm_struct *mm
WARN_ON_ONCE(tsk->mm);
task_lock(tsk);
+ /* Hold off tlb flush IPIs while switching mm's */
+ local_irq_disable();
active_mm = tsk->active_mm;
if (active_mm != mm) {
mmgrab(mm);
tsk->active_mm = mm;
}
tsk->mm = mm;
- switch_mm(active_mm, mm, tsk);
+ switch_mm_irqs_off(active_mm, mm, tsk);
+ local_irq_enable();
task_unlock(tsk);
#ifdef finish_arch_post_lock_switch
finish_arch_post_lock_switch();
@@ -1276,9 +1279,11 @@ void kthread_unuse_mm(struct mm_struct *
task_lock(tsk);
sync_mm_rss(mm);
+ local_irq_disable();
tsk->mm = NULL;
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
+ local_irq_enable();
task_unlock(tsk);
}
EXPORT_SYMBOL_GPL(kthread_unuse_mm);
_
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/shuffle: don't move pages between zones and don't read garbage memmaps
Especially with memory hotplug, we can have offline sections (with a
garbage memmap) and overlapping zones. We have to make sure to only touch
initialized memmaps (online sections managed by the buddy) and that the
zone matches, to not move pages between zones.
To test if this can actually happen, I added a simple
BUG_ON(page_zone(page_i) != page_zone(page_j));
right before the swap. When hotplugging a 256M DIMM to a 4G x86-64 VM and
onlining the first memory block "online_movable" and the second memory
block "online_kernel", it will trigger the BUG, as both zones (NORMAL and
MOVABLE) overlap.
This might result in all kinds of weird situations (e.g., double
allocations, list corruptions, unmovable allocations ending up in the
movable zone).
Link: http://lkml.kernel.org/r/20200624094741.9918-2-david@redhat.com
Fixes: e900a918b098 ("mm: shuffle initial free memory to improve memory-side-cache utilization")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Wei Yang <richard.weiyang(a)linux.alibaba.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: <stable(a)vger.kernel.org> [5.2+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shuffle.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
--- a/mm/shuffle.c~mm-shuffle-dont-move-pages-between-zones-and-dont-read-garbage-memmaps
+++ a/mm/shuffle.c
@@ -58,25 +58,25 @@ module_param_call(shuffle, shuffle_store
* For two pages to be swapped in the shuffle, they must be free (on a
* 'free_area' lru), have the same order, and have the same migratetype.
*/
-static struct page * __meminit shuffle_valid_page(unsigned long pfn, int order)
+static struct page * __meminit shuffle_valid_page(struct zone *zone,
+ unsigned long pfn, int order)
{
- struct page *page;
+ struct page *page = pfn_to_online_page(pfn);
/*
* Given we're dealing with randomly selected pfns in a zone we
* need to ask questions like...
*/
- /* ...is the pfn even in the memmap? */
- if (!pfn_valid_within(pfn))
+ /* ... is the page managed by the buddy? */
+ if (!page)
return NULL;
- /* ...is the pfn in a present section or a hole? */
- if (!pfn_in_present_section(pfn))
+ /* ... is the page assigned to the same zone? */
+ if (page_zone(page) != zone)
return NULL;
/* ...is the page free and currently on a free_area list? */
- page = pfn_to_page(pfn);
if (!PageBuddy(page))
return NULL;
@@ -123,7 +123,7 @@ void __meminit __shuffle_zone(struct zon
* page_j randomly selected in the span @zone_start_pfn to
* @spanned_pages.
*/
- page_i = shuffle_valid_page(i, order);
+ page_i = shuffle_valid_page(z, i, order);
if (!page_i)
continue;
@@ -137,7 +137,7 @@ void __meminit __shuffle_zone(struct zon
j = z->zone_start_pfn +
ALIGN_DOWN(get_random_long() % z->spanned_pages,
order_pages);
- page_j = shuffle_valid_page(j, order);
+ page_j = shuffle_valid_page(z, j, order);
if (page_j && page_j != page_i)
break;
}
_
On 2020-03-20 12:58, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the perf/core branch of tip:
>
> Commit-ID: 90c91dfb86d0ff545bd329d3ddd72c147e2ae198
> Gitweb: https://git.kernel.org/tip/90c91dfb86d0ff545bd329d3ddd72c147e2ae198
> Author: Peter Zijlstra <peterz(a)infradead.org>
> AuthorDate: Thu, 05 Mar 2020 13:38:51 +01:00
> Committer: Peter Zijlstra <peterz(a)infradead.org>
> CommitterDate: Fri, 20 Mar 2020 13:06:22 +01:00
>
> perf/core: Fix endless multiplex timer
>
> Kan and Andi reported that we fail to kill rotation when the flexible
> events go empty, but the context does not. XXX moar
>
> Fixes: fd7d55172d1e ("perf/cgroups: Don't rotate events for cgroups unnecessarily")
Can this patch (commit 90c91dfb86d0 ("perf/core: Fix endless multiplex
timer") upstream) be applied to stable please? For PMU drivers built as
modules, the bug can actually kill the system, since the runaway hrtimer
loop keeps calling pmu->{enable,disable} after all the events have been
closed and dropped their references to pmu->module. Thus legitimately
unloading the module once things have got into this state quickly
results in a crash when those callbacks disappear.
(FWIW I spent about two days fighting with this while testing a new
driver as a module against the 5.3 kernel installed on someone else's
machine, assuming it was a bug in my code...)
Robin.
> Reported-by: Andi Kleen <ak(a)linux.intel.com>
> Reported-by: Kan Liang <kan.liang(a)linux.intel.com>
> Tested-by: Kan Liang <kan.liang(a)linux.intel.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
> Link: https://lkml.kernel.org/r/20200305123851.GX2596@hirez.programming.kicks-ass…
> ---
> kernel/events/core.c | 20 ++++++++++++++------
> 1 file changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index ccf8d4f..b5a68d2 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -2291,6 +2291,7 @@ __perf_remove_from_context(struct perf_event *event,
>
> if (!ctx->nr_events && ctx->is_active) {
> ctx->is_active = 0;
> + ctx->rotate_necessary = 0;
> if (ctx->task) {
> WARN_ON_ONCE(cpuctx->task_ctx != ctx);
> cpuctx->task_ctx = NULL;
> @@ -3188,12 +3189,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
> if (!ctx->nr_active || !(is_active & EVENT_ALL))
> return;
>
> - /*
> - * If we had been multiplexing, no rotations are necessary, now no events
> - * are active.
> - */
> - ctx->rotate_necessary = 0;
> -
> perf_pmu_disable(ctx->pmu);
> if (is_active & EVENT_PINNED) {
> list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list)
> @@ -3203,6 +3198,13 @@ static void ctx_sched_out(struct perf_event_context *ctx,
> if (is_active & EVENT_FLEXIBLE) {
> list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
> group_sched_out(event, cpuctx, ctx);
> +
> + /*
> + * Since we cleared EVENT_FLEXIBLE, also clear
> + * rotate_necessary, is will be reset by
> + * ctx_flexible_sched_in() when needed.
> + */
> + ctx->rotate_necessary = 0;
> }
> perf_pmu_enable(ctx->pmu);
> }
> @@ -3985,6 +3987,12 @@ ctx_event_to_rotate(struct perf_event_context *ctx)
> typeof(*event), group_node);
> }
>
> + /*
> + * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in()
> + * finds there are unschedulable events, it will set it again.
> + */
> + ctx->rotate_necessary = 0;
> +
> return event;
> }
>
>
When running `make coccicheck` in report mode using the
add_namespace.cocci file, it will fail for files that contain
MODULE_LICENSE. Those match the replacement precondition, but spatch
errors out as virtual.ns is not set.
In order to fix that, add the virtual rule nsdeps and only do search and
replace if that rule has been explicitly requested.
In order to make spatch happy in report mode, we also need a dummy rule,
as otherwise it errors out with "No rules apply". Using a script:python
rule appears unrelated and odd, but this is the shortest I could come up
with.
Adjust scripts/nsdeps accordingly to set the nsdeps rule when run trough
`make nsdeps`.
Suggested-by: Julia Lawall <julia.lawall(a)inria.fr>
Fixes: c7c4e29fb5a4 ("scripts: add_namespace: Fix coccicheck failed")
Cc: YueHaibing <yuehaibing(a)huawei.com>
Cc: jeyu(a)kernel.org
Cc: cocci(a)systeme.lip6.fr
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthias Maennich <maennich(a)google.com>
---
scripts/coccinelle/misc/add_namespace.cocci | 8 +++++++-
scripts/nsdeps | 2 +-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/scripts/coccinelle/misc/add_namespace.cocci b/scripts/coccinelle/misc/add_namespace.cocci
index 99e93a6c2e24..cbf1614163cb 100644
--- a/scripts/coccinelle/misc/add_namespace.cocci
+++ b/scripts/coccinelle/misc/add_namespace.cocci
@@ -6,6 +6,7 @@
/// add a missing namespace tag to a module source file.
///
+virtual nsdeps
virtual report
@has_ns_import@
@@ -16,10 +17,15 @@ MODULE_IMPORT_NS(ns);
// Add missing imports, but only adjacent to a MODULE_LICENSE statement.
// That ensures we are adding it only to the main module source file.
-@do_import depends on !has_ns_import@
+@do_import depends on !has_ns_import && nsdeps@
declarer name MODULE_LICENSE;
expression license;
identifier virtual.ns;
@@
MODULE_LICENSE(license);
+ MODULE_IMPORT_NS(ns);
+
+// Dummy rule for report mode that would otherwise be empty and make spatch
+// fail ("No rules apply.")
+@script:python depends on report@
+@@
diff --git a/scripts/nsdeps b/scripts/nsdeps
index 03a8e7cbe6c7..dab4c1a0e27d 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -29,7 +29,7 @@ fi
generate_deps_for_ns() {
$SPATCH --very-quiet --in-place --sp-file \
- $srctree/scripts/coccinelle/misc/add_namespace.cocci -D ns=$1 $2
+ $srctree/scripts/coccinelle/misc/add_namespace.cocci -D nsdeps -D ns=$1 $2
}
generate_deps() {
--
2.27.0.rc2.251.g90737beb825-goog
This is the start of the stable review cycle for the 4.14.193 release.
There are 8 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 07 Aug 2020 15:34:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.193-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.14.193-rc1
Geert Uytterhoeven <geert(a)linux-m68k.org>
ARM: 8702/1: head-common.S: Clear lr before jumping to start_kernel()
Jiang Ying <jiangying8582(a)126.com>
ext4: fix direct I/O read error
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: move the pseudo-random 32-bit definitions to prandom.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: remove net_rand_state from the latent entropy gcc plugin
Willy Tarreau <w(a)1wt.eu>
random: fix circular include dependency on arm64 after addition of percpu.h
Grygorii Strashko <grygorii.strashko(a)ti.com>
ARM: percpu.h: fix build error
Willy Tarreau <w(a)1wt.eu>
random32: update the net random state on interrupt and activity
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "scsi: libsas: direct call probe and destruct"
-------------
Diffstat:
Makefile | 4 +-
arch/arm/include/asm/percpu.h | 2 +
arch/arm/kernel/head-common.S | 1 +
drivers/char/random.c | 1 +
drivers/scsi/libsas/sas_ata.c | 1 +
drivers/scsi/libsas/sas_discover.c | 32 +++++++---------
drivers/scsi/libsas/sas_expander.c | 8 ++--
drivers/scsi/libsas/sas_internal.h | 1 -
drivers/scsi/libsas/sas_port.c | 3 --
fs/ext4/inode.c | 5 +++
include/linux/prandom.h | 78 ++++++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 ++----------------------------
include/scsi/libsas.h | 3 +-
include/scsi/scsi_transport_sas.h | 1 -
kernel/time/timer.c | 8 ++++
lib/random32.c | 2 +-
16 files changed, 124 insertions(+), 89 deletions(-)
This is the start of the stable review cycle for the 5.4.57 release.
There are 9 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 07 Aug 2020 15:34:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.57-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.57-rc1
Lorenz Bauer <lmb(a)cloudflare.com>
bpf: sockmap: Require attach_bpf_fd when detaching a program
Lorenz Bauer <lmb(a)cloudflare.com>
selftests: bpf: Fix detach from sockmap tests
Jiang Ying <jiangying8582(a)126.com>
ext4: fix direct I/O read error
Marc Zyngier <maz(a)kernel.org>
arm64: Workaround circular dependency in pointer_auth.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: move the pseudo-random 32-bit definitions to prandom.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: remove net_rand_state from the latent entropy gcc plugin
Willy Tarreau <w(a)1wt.eu>
random: fix circular include dependency on arm64 after addition of percpu.h
Grygorii Strashko <grygorii.strashko(a)ti.com>
ARM: percpu.h: fix build error
Willy Tarreau <w(a)1wt.eu>
random32: update the net random state on interrupt and activity
-------------
Diffstat:
Makefile | 4 +-
arch/arm/include/asm/percpu.h | 2 +
arch/arm64/include/asm/pointer_auth.h | 8 +++-
drivers/char/random.c | 1 +
fs/ext4/inode.c | 5 +++
include/linux/bpf.h | 13 +++++-
include/linux/prandom.h | 78 +++++++++++++++++++++++++++++++++
include/linux/random.h | 63 ++------------------------
include/linux/skmsg.h | 13 ++++++
kernel/bpf/syscall.c | 4 +-
kernel/time/timer.c | 8 ++++
lib/random32.c | 2 +-
net/core/sock_map.c | 50 ++++++++++++++++++---
tools/testing/selftests/bpf/test_maps.c | 12 ++---
14 files changed, 185 insertions(+), 78 deletions(-)
If we hit an earlier error path in io_uring_create(), then we will have
accounted memory, but not set ctx->{sq,cq}_entries yet. Then when the
ring is torn down in error, we use those values to unaccount the memory.
Ensure we set the ctx entries before we're able to hit a potential error
path.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
fs/io_uring.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8f96566603f3..0d857f7ca507 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8193,6 +8193,10 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
struct io_rings *rings;
size_t size, sq_array_offset;
+ /* make sure these are sane, as we already accounted them */
+ ctx->sq_entries = p->sq_entries;
+ ctx->cq_entries = p->cq_entries;
+
size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
if (size == SIZE_MAX)
return -EOVERFLOW;
@@ -8209,8 +8213,6 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
rings->cq_ring_entries = p->cq_entries;
ctx->sq_mask = rings->sq_ring_mask;
ctx->cq_mask = rings->cq_ring_mask;
- ctx->sq_entries = rings->sq_ring_entries;
- ctx->cq_entries = rings->cq_ring_entries;
size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
if (size == SIZE_MAX) {
--
2.28.0
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
On exit, if a process is preempted after the trace_sched_process_exit()
tracepoint but before the process is done exiting, then when it gets
scheduled in, the function tracers will not filter it properly against the
function tracing pid filters.
That is because the function tracing pid filters hooks to the
sched_process_exit() tracepoint to remove the exiting task's pid from the
filter list. Because the filtering happens at the sched_switch tracepoint,
when the exiting task schedules back in to finish up the exit, it will no
longer be in the function pid filtering tables.
This was noticeable in the notrace self tests on a preemptable kernel, as
the tests would fail as it exits and preempted after being taken off the
notrace filter table and on scheduling back in it would not be in the
notrace list, and then the ending of the exit function would trace. The test
detected this and would fail.
Cc: stable(a)vger.kernel.org
Cc: Namhyung Kim <namhyung(a)kernel.org>
Fixes: 1e10486ffee0a ("ftrace: Add 'function-fork' trace option")
Fixes: c37775d57830a ("tracing: Add infrastructure to allow set_event_pid to follow children"
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 4 ++--
kernel/trace/trace_events.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4e3a5d79c078..76f2dd6fd414 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6985,12 +6985,12 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
if (enable) {
register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork,
tr);
- register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit,
+ register_trace_sched_process_free(ftrace_pid_follow_sched_process_exit,
tr);
} else {
unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork,
tr);
- unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit,
+ unregister_trace_sched_process_free(ftrace_pid_follow_sched_process_exit,
tr);
}
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f6f55682d3e2..a85effb2373b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -538,12 +538,12 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
if (enable) {
register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
tr, INT_MIN);
- register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit,
+ register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
tr, INT_MAX);
} else {
unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
tr);
- unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit,
+ unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
tr);
}
}
--
2.26.2
From: Masami Hiramatsu <mhiramat(a)kernel.org>
Since the parse_args() stops parsing at '--', bootconfig_params()
will never get the '--' as param and initargs_found never be true.
In the result, if we pass some init arguments via the bootconfig,
those are always appended to the kernel command line with '--'
even if the kernel command line already has '--'.
To fix this correctly, check the return value of parse_args()
and set initargs_found true if the return value is not an error
but a valid address.
Link: https://lkml.kernel.org/r/159650953285.270383.14822353843556363851.stgit@de…
Fixes: f61872bb58a1 ("bootconfig: Use parse_args() to find bootconfig and '--'")
Cc: stable(a)vger.kernel.org
Reported-by: Arvind Sankar <nivedita(a)alum.mit.edu>
Suggested-by: Arvind Sankar <nivedita(a)alum.mit.edu>
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
init/main.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/init/main.c b/init/main.c
index 0ead83e86b5a..883ded3638e5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -387,8 +387,6 @@ static int __init bootconfig_params(char *param, char *val,
{
if (strcmp(param, "bootconfig") == 0) {
bootconfig_found = true;
- } else if (strcmp(param, "--") == 0) {
- initargs_found = true;
}
return 0;
}
@@ -399,19 +397,23 @@ static void __init setup_boot_config(const char *cmdline)
const char *msg;
int pos;
u32 size, csum;
- char *data, *copy;
+ char *data, *copy, *err;
int ret;
/* Cut out the bootconfig data even if we have no bootconfig option */
data = get_boot_config_from_initrd(&size, &csum);
strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
- bootconfig_params);
+ err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
+ bootconfig_params);
- if (!bootconfig_found)
+ if (IS_ERR(err) || !bootconfig_found)
return;
+ /* parse_args() stops at '--' and returns an address */
+ if (err)
+ initargs_found = true;
+
if (!data) {
pr_err("'bootconfig' found on command line, but no bootconfig found\n");
return;
--
2.26.2
On Thu, Aug 06, 2020 at 08:04:07PM +0800, 姜迎 wrote:
>
>
>
> Hi all,
> This patch is used to fix checkpatch error on kernel stable rc 4.9.
> I have built pass and tested pass, thanks!
Now queued up, thanks!
greg k-h
This patch is used to fix ext4 direct I/O read error when
the read size is not aligned with block size.
Then, I will use a test to explain the error.
(1) Make a file that is not aligned with block size:
$dd if=/dev/zero of=./test.jar bs=1000 count=3
(2) I wrote a source file named "direct_io_read_file.c" as following:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#define BUF_SIZE 1024
int main()
{
int fd;
int ret;
unsigned char *buf;
ret = posix_memalign((void **)&buf, 512, BUF_SIZE);
if (ret) {
perror("posix_memalign failed");
exit(1);
}
fd = open("./test.jar", O_RDONLY | O_DIRECT, 0755);
if (fd < 0){
perror("open ./test.jar failed");
exit(1);
}
do {
ret = read(fd, buf, BUF_SIZE);
printf("ret=%d\n",ret);
if (ret < 0) {
perror("write test.jar failed");
}
} while (ret > 0);
free(buf);
close(fd);
}
(3) Compile the source file:
$gcc direct_io_read_file.c -D_GNU_SOURCE
(4) Run the test program:
$./a.out
The result is as following:
ret=1024
ret=1024
ret=952
ret=-1
write test.jar failed: Invalid argument.
I have tested this program on XFS filesystem, XFS does not have
this problem, because XFS use iomap_dio_rw() to do direct I/O
read. And the comparing between read offset and file size is done
in iomap_dio_rw(), the code is as following:
if (pos < size) {
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
}
...
}
...only when "pos < size", direct I/O can be done, or 0 will be return.
I have tested the fix patch on Ext4, it is up to the mustard of
EINVAL in man2(read) as following:
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
EINVAL
fd is attached to an object which is unsuitable for reading;
or the file was opened with the O_DIRECT flag, and either the
address specified in buf, the value specified in count, or the
current file offset is not suitably aligned.
So I think this patch can be applied to fix ext4 direct I/O error.
However Ext4 introduces direct I/O read using iomap infrastructure
on kernel 5.5, the patch is commit <b1b4705d54ab>
("ext4: introduce direct I/O read using iomap infrastructure"),
then Ext4 will be the same as XFS, they all use iomap_dio_rw() to do direct
I/O read. So this problem does not exist on kernel 5.5 for Ext4.
>From above description, we can see this problem exists on all the kernel
versions between kernel 3.14 and kernel 5.4. It will cause the Applications
to fail to read. For example, when the search service downloads a new full
index file, the search engine is loading the previous index file and is
processing the search request, it can not use buffer io that may squeeze
the previous index file in use from pagecache, so the serch service must
use direct I/O read.
Please apply this patch on these kernel versions, or please use the method
on kernel 5.5 to fix this problem.
Fixes: 9fe55eea7e4b ("Fix race when checking i_size on direct i/o read")
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Jiang Ying <jiangying8582(a)126.com>
Changes since V5:
Fix checkpatch error on kernel stable rc 4.9 based V3.
Use "reviewed-by" instead of "Co-developed-by" to fix
checkpatch error.
Changes since V4:
Fix build error on kernel stable rc 4.4 based V3.
This patch only for kernel 4.4.
Changes since V3:
Add the info: this bug could break some application that use the
stable kernel releases.
Changes since V2:
Optimize the description of the commit message and make a variation for
the patch, e.g. with:
Before:
loff_t size;
size = i_size_read(inode);
After:
loff_t size = i_size_read(inode);
Changes since V1:
Signed-off use real name and add "Fixes:" flag
---
fs/ext4/inode.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d8780e0..ccce89d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3575,6 +3575,11 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
+ loff_t offset = iocb->ki_pos;
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
/*
* Shared inode_lock is enough for us - it protects against concurrent
--
1.8.3.1
This patch is used to fix ext4 direct I/O read error when
the read size is not aligned with block size.
Then, I will use a test to explain the error.
(1) Make a file that is not aligned with block size:
$dd if=/dev/zero of=./test.jar bs=1000 count=3
(2) I wrote a source file named "direct_io_read_file.c" as following:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#define BUF_SIZE 1024
int main()
{
int fd;
int ret;
unsigned char *buf;
ret = posix_memalign((void **)&buf, 512, BUF_SIZE);
if (ret) {
perror("posix_memalign failed");
exit(1);
}
fd = open("./test.jar", O_RDONLY | O_DIRECT, 0755);
if (fd < 0){
perror("open ./test.jar failed");
exit(1);
}
do {
ret = read(fd, buf, BUF_SIZE);
printf("ret=%d\n",ret);
if (ret < 0) {
perror("write test.jar failed");
}
} while (ret > 0);
free(buf);
close(fd);
}
(3) Compile the source file:
$gcc direct_io_read_file.c -D_GNU_SOURCE
(4) Run the test program:
$./a.out
The result is as following:
ret=1024
ret=1024
ret=952
ret=-1
write test.jar failed: Invalid argument.
I have tested this program on XFS filesystem, XFS does not have
this problem, because XFS use iomap_dio_rw() to do direct I/O
read. And the comparing between read offset and file size is done
in iomap_dio_rw(), the code is as following:
if (pos < size) {
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
}
...
}
...only when "pos < size", direct I/O can be done, or 0 will be return.
I have tested the fix patch on Ext4, it is up to the mustard of
EINVAL in man2(read) as following:
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
EINVAL
fd is attached to an object which is unsuitable for reading;
or the file was opened with the O_DIRECT flag, and either the
address specified in buf, the value specified in count, or the
current file offset is not suitably aligned.
So I think this patch can be applied to fix ext4 direct I/O error.
However Ext4 introduces direct I/O read using iomap infrastructure
on kernel 5.5, the patch is commit <b1b4705d54ab>
("ext4: introduce direct I/O read using iomap infrastructure"),
then Ext4 will be the same as XFS, they all use iomap_dio_rw() to do direct
I/O read. So this problem does not exist on kernel 5.5 for Ext4.
>From above description, we can see this problem exists on all the kernel
versions between kernel 3.14 and kernel 5.4. It will cause the Applications
to fail to read. For example, when the search service downloads a new full
index file, the search engine is loading the previous index file and is
processing the search request, it can not use buffer io that may squeeze
the previous index file in use from pagecache, so the serch service must
use direct I/O read.
Please apply this patch on these kernel versions, or please use the method
on kernel 5.5 to fix this problem.
Fixes: 9fe55eea7e4b ("Fix race when checking i_size on direct i/o read")
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Jiang Ying <jiangying8582(a)126.com>
Changes since V4:
Fix build error on kernel stable rc 4.4.
This patch only for kernel 4.4.
Changes since V3:
Add the info: this bug could break some application that use the
stable kernel releases.
Changes since V2:
Optimize the description of the commit message and make a variation for
the patch, e.g. with:
Before:
loff_t size;
size = i_size_read(inode);
After:
loff_t size = i_size_read(inode);
Changes since V1:
Signed-off use real name and add "Fixes:" flag
---
fs/ext4/inode.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8e79970..8816016 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3353,6 +3353,13 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t ret;
+ if (iov_iter_rw(iter) == READ) {
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
+ }
+
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
--
1.8.3.1
stable rc 4.4 build breaks on arm64, arm, x86_64 and i386.
Here are the build log failures on arm64.
git_repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
target_arch: arm64
toolchain: gcc-9
git_short_log: 0b3898baf614 (\Linux 4.4.233-rc1\)
git_sha: 0b3898baf61459e1f963dcf893b4683174668975
git_describe: v4.4.232-33-g0b3898baf614
kernel_version: 4.4.233-rc1
make -sk KBUILD_BUILD_USER=TuxBuild -C/linux -j16 ARCH=arm64
CROSS_COMPILE=aarch64-linux-gnu- HOSTCC=gcc CC="sccache
aarch64-linux-gnu-gcc" O=build Image
#
../arch/arm64/kernel/hw_breakpoint.c: In function ‘arch_bp_generic_fields’:
../arch/arm64/kernel/hw_breakpoint.c:348:5: note: parameter passing
for argument of type ‘struct arch_hw_breakpoint_ctrl’ changed in GCC
9.1
348 | int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
| ^~~~~~~~~~~~~~~~~~~~~~
../fs/ext4/inode.c: In function ‘ext4_direct_IO’:
../fs/ext4/inode.c:3355:9: error: ‘offset’ redeclared as different
kind of symbol
3355 | loff_t offset = iocb->ki_pos;
| ^~~~~~
../fs/ext4/inode.c:3349:17: note: previous definition of ‘offset’ was here
3349 | loff_t offset)
| ~~~~~~~^~~~~~
make[3]: *** [../scripts/Makefile.build:277: fs/ext4/inode.o] Error 1
make[3]: Target '__build' not remade because of errors.
make[2]: *** [../scripts/Makefile.build:484: fs/ext4] Error 2
../drivers/net/ethernet/apm/xgene/xgene_enet_main.c:32:36: warning:
array ‘xgene_enet_acpi_match’ assumed to have one element
32 | static const struct acpi_device_id xgene_enet_acpi_match[];
| ^~~~~~~~~~~~~~~~~~~~~
make[2]: Target '__build' not remade because of errors.
make[1]: *** [/linux/Makefile:1006: fs] Error 2
make[1]: Target 'Image' not remade because of errors.
make: *** [Makefile:152: sub-make] Error 2
make: Target 'Image' not remade because of errors.
--
Linaro LKFT
https://lkft.linaro.org
From: Mike Rapoport <rppt(a)linux.ibm.com>
When a configuration has NUMA disabled and SGI_IP27 enabled, the build
fails:
CC kernel/bounds.s
CC arch/mips/kernel/asm-offsets.s
In file included from arch/mips/include/asm/topology.h:11,
from include/linux/topology.h:36,
from include/linux/gfp.h:9,
from include/linux/slab.h:15,
from include/linux/crypto.h:19,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/mips/kernel/asm-offsets.c:12:
include/linux/topology.h: In function 'numa_node_id':
arch/mips/include/asm/mach-ip27/topology.h:16:27: error: implicit declaration of function 'cputonasid'; did you mean 'cpu_vpe_id'? [-Werror=implicit-function-declaration]
#define cpu_to_node(cpu) (cputonasid(cpu))
^~~~~~~~~~
include/linux/topology.h:119:9: note: in expansion of macro 'cpu_to_node'
return cpu_to_node(raw_smp_processor_id());
^~~~~~~~~~~
include/linux/topology.h: In function 'cpu_cpu_mask':
arch/mips/include/asm/mach-ip27/topology.h:19:7: error: implicit declaration of function 'hub_data' [-Werror=implicit-function-declaration]
&hub_data(node)->h_cpus)
^~~~~~~~
include/linux/topology.h:210:9: note: in expansion of macro 'cpumask_of_node'
return cpumask_of_node(cpu_to_node(cpu));
^~~~~~~~~~~~~~~
arch/mips/include/asm/mach-ip27/topology.h:19:21: error: invalid type argument of '->' (have 'int')
&hub_data(node)->h_cpus)
^~
include/linux/topology.h:210:9: note: in expansion of macro 'cpumask_of_node'
return cpumask_of_node(cpu_to_node(cpu));
^~~~~~~~~~~~~~~
Before switch from discontigmem to sparsemem, there always was
CONFIG_NEED_MULTIPLE_NODES=y because it was selected by DISCONTIGMEM.
Without DISCONTIGMEM it is possible to have SPARSEMEM without NUMA for
SGI_IP27 and as many things there rely on custom node definition, the
build breaks.
As Thomas noted "... there are right now too many places in IP27 code,
which assumes NUMA enabled", the simplest solution would be to always
enable NUMA for SGI-IP27 builds.
Reported-by: kernel test robot <lkp(a)intel.com>
Fixes: 397dc00e249e ("mips: sgi-ip27: switch from DISCONTIGMEM to SPARSEMEM")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mike Rapoport <rppt(a)linux.ibm.com>
---
arch/mips/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 6fee1a133e9d..a7e40bb1e5bc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -678,6 +678,7 @@ config SGI_IP27
select SYS_SUPPORTS_NUMA
select SYS_SUPPORTS_SMP
select MIPS_L1_CACHE_SHIFT_7
+ select NUMA
help
This are the SGI Origin 200, Origin 2000 and Onyx 2 Graphics
workstations. To compile a Linux kernel that runs on these, say Y
--
2.26.2
stable rc 4.9 build breaks on arm64, arm, x86_64 and i386.
Here are the build log failures on arm64.
git_repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
target_arch: arm64
toolchain: gcc-9
git_short_log: 1f47445197d2 (\Linux 4.9.233-rc1\)
git_sha: 1f47445197d2c8eecafa2b996f635aa89851c123
git_describe: v4.9.232-51-g1f47445197d2
kernel_version: 4.9.233-rc1
make -sk KBUILD_BUILD_USER=TuxBuild -C/linux -j16 ARCH=arm64
CROSS_COMPILE=aarch64-linux-gnu- HOSTCC=gcc CC="sccache
aarch64-linux-gnu-gcc" O=build Image
#
../arch/arm64/kernel/hw_breakpoint.c: In function ‘arch_bp_generic_fields’:
../arch/arm64/kernel/hw_breakpoint.c:352:5: note: parameter passing
for argument of type ‘struct arch_hw_breakpoint_ctrl’ changed in GCC
9.1
352 | int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
| ^~~~~~~~~~~~~~~~~~~~~~
../fs/ext4/inode.c: In function ‘ext4_direct_IO’:
../fs/ext4/inode.c:3610:9: error: redefinition of ‘offset’
3610 | loff_t offset = iocb->ki_pos;
| ^~~~~~
../fs/ext4/inode.c:3608:9: note: previous definition of ‘offset’ was here
3608 | loff_t offset = iocb->ki_pos;
| ^~~~~~
make[3]: *** [../scripts/Makefile.build:304: fs/ext4/inode.o] Error 1
make[3]: Target '__build' not remade because of errors.
make[2]: *** [../scripts/Makefile.build:555: fs/ext4] Error 2
../lib/vsprintf.c: In function ‘number’:
../lib/vsprintf.c:399:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
399 | char *number(char *buf, char *end, unsigned long long num,
| ^~~~~~
../lib/vsprintf.c: In function ‘widen_string’:
../lib/vsprintf.c:562:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
562 | char *widen_string(char *buf, int n, char *end, struct printf_spec spec)
| ^~~~~~~~~~~~
../lib/vsprintf.c: In function ‘string’:
../lib/vsprintf.c:583:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
583 | char *string(char *buf, char *end, const char *s, struct
printf_spec spec)
| ^~~~~~
../lib/vsprintf.c: In function ‘hex_string’:
../lib/vsprintf.c:803:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
803 | char *hex_string(char *buf, char *end, u8 *addr, struct
printf_spec spec,
| ^~~~~~~~~~
../lib/vsprintf.c: In function ‘mac_address_string’:
../lib/vsprintf.c:936:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
936 | char *mac_address_string(char *buf, char *end, u8 *addr,
| ^~~~~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘ip4_addr_string’:
../lib/vsprintf.c:1137:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1137 | char *ip4_addr_string(char *buf, char *end, const u8 *addr,
| ^~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘uuid_string’:
../lib/vsprintf.c:1305:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1305 | char *uuid_string(char *buf, char *end, const u8 *addr,
| ^~~~~~~~~~~
../lib/vsprintf.c: In function ‘symbol_string’:
../lib/vsprintf.c:668:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
668 | char *symbol_string(char *buf, char *end, void *ptr,
| ^~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘resource_string’:
../lib/vsprintf.c:695:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
695 | char *resource_string(char *buf, char *end, struct resource *res,
| ^~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘ip6_addr_string’:
../lib/vsprintf.c:1123:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1123 | char *ip6_addr_string(char *buf, char *end, const u8 *addr,
| ^~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘ip4_addr_string_sa’:
../lib/vsprintf.c:1210:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1210 | char *ip4_addr_string_sa(char *buf, char *end, const struct
sockaddr_in *sa,
| ^~~~~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘ip6_addr_string_sa’:
../lib/vsprintf.c:1148:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1148 | char *ip6_addr_string_sa(char *buf, char *end, const struct
sockaddr_in6 *sa,
| ^~~~~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘escaped_string’:
../lib/vsprintf.c:1245:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1245 | char *escaped_string(char *buf, char *end, u8 *addr, struct
printf_spec spec,
| ^~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘dentry_name’:
../lib/vsprintf.c:604:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
604 | char *dentry_name(char *buf, char *end, const struct dentry
*d, struct printf_spec spec,
| ^~~~~~~~~~~
../lib/vsprintf.c: In function ‘clock.isra.0’:
../lib/vsprintf.c:1387:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1387 | char *clock(char *buf, char *end, struct clk *clk, struct
printf_spec spec,
| ^~~~~
../lib/vsprintf.c: In function ‘bitmap_list_string.isra.0’:
../lib/vsprintf.c:896:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
896 | char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
| ^~~~~~~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘bitmap_string.isra.0’:
../lib/vsprintf.c:855:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
855 | char *bitmap_string(char *buf, char *end, unsigned long *bitmap,
| ^~~~~~~~~~~~~
../lib/vsprintf.c: In function ‘bdev_name.isra.0’:
../lib/vsprintf.c:649:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
649 | char *bdev_name(char *buf, char *end, struct block_device *bdev,
| ^~~~~~~~~
../lib/vsprintf.c: In function ‘pointer’:
../lib/vsprintf.c:1571:7: note: parameter passing for argument of type
‘struct printf_spec’ changed in GCC 9.1
1571 | char *pointer(const char *fmt, char *buf, char *end, void *ptr,
| ^~~~~~~
make[2]: Target '__build' not remade because of errors.
make[1]: *** [/linux/Makefile:1036: fs] Error 2
make[1]: Target 'Image' not remade because of errors.
make: *** [Makefile:152: sub-make] Error 2
make: Target 'Image' not remade because of errors.
--
Linaro LKFT
https://lkft.linaro.org
There are a few issues in DWC3 driver when preparing for TRB.
The driver needs to account the following:
* MPS alignment for ZLP OUT direction
* Extra TRBs when checking for available TRBs
* SG entries size > request length
Along with these fixes, there are some cleanup/refactoring patches in this
series .
Thinh Nguyen (7):
usb: dwc3: gadget: Don't setup more than requested
usb: dwc3: gadget: Fix handling ZLP
usb: dwc3: gadget: Handle ZLP for sg requests
usb: dwc3: gadget: Refactor preparing TRBs
usb: dwc3: gadget: Account for extra TRB
usb: dwc3: gadget: Rename misleading function names
usb: dwc3: ep0: Skip ZLP setup for OUT
drivers/usb/dwc3/ep0.c | 2 +-
drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++----------------
2 files changed, 137 insertions(+), 97 deletions(-)
base-commit: e3ee0e740c3887d2293e8d54a8707218d70d86ca
--
2.28.0
This patch is used to fix ext4 direct I/O read error when
the read size is not aligned with block size.
Then, I will use a test to explain the error.
(1) Make a file that is not aligned with block size:
$dd if=/dev/zero of=./test.jar bs=1000 count=3
(2) I wrote a source file named "direct_io_read_file.c" as following:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#define BUF_SIZE 1024
int main()
{
int fd;
int ret;
unsigned char *buf;
ret = posix_memalign((void **)&buf, 512, BUF_SIZE);
if (ret) {
perror("posix_memalign failed");
exit(1);
}
fd = open("./test.jar", O_RDONLY | O_DIRECT, 0755);
if (fd < 0){
perror("open ./test.jar failed");
exit(1);
}
do {
ret = read(fd, buf, BUF_SIZE);
printf("ret=%d\n",ret);
if (ret < 0) {
perror("write test.jar failed");
}
} while (ret > 0);
free(buf);
close(fd);
}
(3) Compile the source file:
$gcc direct_io_read_file.c -D_GNU_SOURCE
(4) Run the test program:
$./a.out
The result is as following:
ret=1024
ret=1024
ret=952
ret=-1
write test.jar failed: Invalid argument.
I have tested this program on XFS filesystem, XFS does not have
this problem, because XFS use iomap_dio_rw() to do direct I/O
read. And the comparing between read offset and file size is done
in iomap_dio_rw(), the code is as following:
if (pos < size) {
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
}
...
}
...only when "pos < size", direct I/O can be done, or 0 will be return.
I have tested the fix patch on Ext4, it is up to the mustard of
EINVAL in man2(read) as following:
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
EINVAL
fd is attached to an object which is unsuitable for reading;
or the file was opened with the O_DIRECT flag, and either the
address specified in buf, the value specified in count, or the
current file offset is not suitably aligned.
So I think this patch can be applied to fix ext4 direct I/O error.
However Ext4 introduces direct I/O read using iomap infrastructure
on kernel 5.5, the patch is commit <b1b4705d54ab>
("ext4: introduce direct I/O read using iomap infrastructure"),
then Ext4 will be the same as XFS, they all use iomap_dio_rw() to do direct
I/O read. So this problem does not exist on kernel 5.5 for Ext4.
>From above description, we can see this problem exists on all the kernel
versions between kernel 3.14 and kernel 5.4. It will cause the Applications
to fail to read. For example, when the search service downloads a new full
index file, the search engine is loading the previous index file and is
processing the search request, it can not use buffer io that may squeeze
the previous index file in use from pagecache, so the serch service must
use direct I/O read.
Please apply this patch on these kernel versions, or please use the method
on kernel 5.5 to fix this problem.
Fixes: 9fe55eea7e4b ("Fix race when checking i_size on direct i/o read")
Reviewed-by: Jan Kara <jack(a)suse.cz>
Co-developed-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Jiang Ying <jiangying8582(a)126.com>
Changes since V3:
Add the info: this bug could break some application that use the
stable kernel releases.
Changes since V2:
Optimize the description of the commit message and make a variation for
the patch, e.g. with:
Before:
loff_t size;
size = i_size_read(inode);
After:
loff_t size = i_size_read(inode);
Changes since V1:
Signed-off use real name and add "Fixes:" flag
---
fs/ext4/inode.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 516faa2..a66b0ac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3821,6 +3821,11 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = mapping->host;
size_t count = iov_iter_count(iter);
ssize_t ret;
+ loff_t offset = iocb->ki_pos;
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
/*
* Shared inode_lock is enough for us - it protects against concurrent
--
1.8.3.1
Hi
[This is an automated email]
This commit has been processed because it contains a "Fixes:" tag
fixing commit: e9df09428996 ("KVM: SVM: Add sev module_param").
The bot has tested the following trees: v5.7.11, v5.4.54, v4.19.135.
v5.7.11: Failed to apply! Possible dependencies:
3bae0459bcd5 ("KVM: x86/mmu: Drop KVM's hugepage enums in favor of the kernel's enums")
b2f432f872d9 ("KVM: x86/mmu: Tweak PSE hugepage handling to avoid 2M vs 4M conundrum")
e662ec3e0705 ("KVM: x86/mmu: Move max hugepage level to a separate #define")
v5.4.54: Failed to apply! Possible dependencies:
106ee47dc633 ("docs: kvm: Convert api.txt to ReST format")
213e0e1f500b ("KVM: SVM: Refactor logging of NPT enabled/disabled")
3bae0459bcd5 ("KVM: x86/mmu: Drop KVM's hugepage enums in favor of the kernel's enums")
3c9bd4006bfc ("KVM: x86: enable dirty log gradually in small chunks")
80b10aa92448 ("Documentation: kvm: Fix mention to number of ioctls classes")
c726200dd106 ("KVM: arm/arm64: Allow reporting non-ISV data aborts to userspace")
cb9b88c66939 ("KVM: x86/mmu: Refactor handling of cache consistency with TDP")
da345174ceca ("KVM: arm/arm64: Allow user injection of external data aborts")
e662ec3e0705 ("KVM: x86/mmu: Move max hugepage level to a separate #define")
v4.19.135: Failed to apply! Possible dependencies:
213e0e1f500b ("KVM: SVM: Refactor logging of NPT enabled/disabled")
3bae0459bcd5 ("KVM: x86/mmu: Drop KVM's hugepage enums in favor of the kernel's enums")
44dd3ffa7bb3 ("x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU")
4fef0f491347 ("KVM: x86: move definition PT_MAX_HUGEPAGE_LEVEL and KVM_NR_PAGE_SIZES together")
91e86d225ef3 ("kvm: x86: Add payload operands to kvm_multiple_exception")
c851436a34ca ("kvm: x86: Add has_payload and payload to kvm_queued_exception")
cb9b88c66939 ("KVM: x86/mmu: Refactor handling of cache consistency with TDP")
d647eb63e671 ("KVM: svm: add nrips module parameter")
da998b46d244 ("kvm: x86: Defer setting of CR2 until #PF delivery")
e662ec3e0705 ("KVM: x86/mmu: Move max hugepage level to a separate #define")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
Hi
[This is an automated email]
This commit has been processed because it contains a "Fixes:" tag
fixing commit: 6dc5fd93b2f1 ("ARM: 8900/1: UNWINDER_FRAME_POINTER implementation for Clang").
The bot has tested the following trees: v5.7.11, v5.4.54.
v5.7.11: Failed to apply! Possible dependencies:
5489ab50c227 ("arm/asm: add loglvl to c_backtrace()")
637ce97e7c24 ("ARM: backtrace-clang: check for NULL lr")
7c8ef99a0b04 ("ARM: backtrace-clang: add fixup for lr dereference")
v5.4.54: Failed to apply! Possible dependencies:
40ff1ddb5570 ("ARM: 8948/1: Prevent OOB access in stacktrace")
5489ab50c227 ("arm/asm: add loglvl to c_backtrace()")
637ce97e7c24 ("ARM: backtrace-clang: check for NULL lr")
7c8ef99a0b04 ("ARM: backtrace-clang: add fixup for lr dereference")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
This is the start of the stable review cycle for the 5.7.14 release.
There are 6 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 07 Aug 2020 15:34:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.7.14-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.7.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.7.14-rc1
Marc Zyngier <maz(a)kernel.org>
arm64: Workaround circular dependency in pointer_auth.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: move the pseudo-random 32-bit definitions to prandom.h
Linus Torvalds <torvalds(a)linux-foundation.org>
random32: remove net_rand_state from the latent entropy gcc plugin
Willy Tarreau <w(a)1wt.eu>
random: fix circular include dependency on arm64 after addition of percpu.h
Grygorii Strashko <grygorii.strashko(a)ti.com>
ARM: percpu.h: fix build error
Willy Tarreau <w(a)1wt.eu>
random32: update the net random state on interrupt and activity
-------------
Diffstat:
Makefile | 4 +-
arch/arm/include/asm/percpu.h | 2 +
arch/arm64/include/asm/pointer_auth.h | 8 +++-
drivers/char/random.c | 1 +
include/linux/prandom.h | 78 +++++++++++++++++++++++++++++++++++
include/linux/random.h | 63 ++--------------------------
kernel/time/timer.c | 8 ++++
lib/random32.c | 2 +-
8 files changed, 103 insertions(+), 63 deletions(-)
Hi,
all x86 and x86_64 images fail to boot in v4.4.y-queue (v4.4.232-30-g52247eb).
Bisect results below. Reverting both 3bc53626ab45 and d1c993b94751
fixes the problem.
Guenter
---
# bad: [52247eb98ebec43288b5da7033c5b757a6fbd1d0] Linux 4.4.233-rc1
# good: [e164d5f7b274f422f9cd4fa6a6638ea07c4969f1] Linux 4.4.232
git bisect start 'HEAD' 'v4.4.232'
# bad: [3bc53626ab45d3886eef382b83973969dc6fc429] x86, vmlinux.lds: Page-align end of ..page_aligned sections
git bisect bad 3bc53626ab45d3886eef382b83973969dc6fc429
# good: [34fda3ae46a68f53e4b18d9b5b560a9cecabb072] scsi: libsas: direct call probe and destruct
git bisect good 34fda3ae46a68f53e4b18d9b5b560a9cecabb072
# good: [994de7ca7e88d4c5e8893bd695dadf9af4751bc6] f2fs: check memory boundary by insane namelen
git bisect good 994de7ca7e88d4c5e8893bd695dadf9af4751bc6
# good: [93aa53738c81fc0e286b8cb37533e9697ae7ea6f] ARM: 8986/1: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints
git bisect good 93aa53738c81fc0e286b8cb37533e9697ae7ea6f
# bad: [d1c993b94751c4a84604711b378906ab91fb16ad] x86/build/lto: Fix truncated .bss with -fdata-sections
git bisect bad d1c993b94751c4a84604711b378906ab91fb16ad
# first bad commit: [d1c993b94751c4a84604711b378906ab91fb16ad] x86/build/lto: Fix truncated .bss with -fdata-sections
The node distance is hardcoded to 0, which causes a trouble
for some user-level applications. In particular, "libnuma"
expects the distance of a node to itself as LOCAL_DISTANCE.
This update removes the offending node distance override.
Cc: <stable(a)vger.kernel.org> # v5.6+
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Fixes: 701dc81e7412 ("s390/mm: remove fake numa support")
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
---
arch/s390/include/asm/topology.h | 6 ------
1 file changed, 6 deletions(-)
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb5075..3a0ac0c 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
--
1.8.3.1
This patch is used to fix ext4 direct I/O read error when
the read size is not aligned with block size.
Then, I will use a test to explain the error.
(1) Make a file that is not aligned with block size:
$dd if=/dev/zero of=./test.jar bs=1000 count=3
(2) I wrote a source file named "direct_io_read_file.c" as following:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#define BUF_SIZE 1024
int main()
{
int fd;
int ret;
unsigned char *buf;
ret = posix_memalign((void **)&buf, 512, BUF_SIZE);
if (ret) {
perror("posix_memalign failed");
exit(1);
}
fd = open("./test.jar", O_RDONLY | O_DIRECT, 0755);
if (fd < 0){
perror("open ./test.jar failed");
exit(1);
}
do {
ret = read(fd, buf, BUF_SIZE);
printf("ret=%d\n",ret);
if (ret < 0) {
perror("write test.jar failed");
}
} while (ret > 0);
free(buf);
close(fd);
}
(3) Compile the source file:
$gcc direct_io_read_file.c -D_GNU_SOURCE
(4) Run the test program:
$./a.out
The result is as following:
ret=1024
ret=1024
ret=952
ret=-1
write test.jar failed: Invalid argument.
I have tested this program on XFS filesystem, XFS does not have
this problem, because XFS use iomap_dio_rw() to do direct I/O
read. And the comparing between read offset and file size is done
in iomap_dio_rw(), the code is as following:
if (pos < size) {
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
}
...
}
...only when "pos < size", direct I/O can be done, or 0 will be return.
I have tested the fix patch on Ext4, it is up to the mustard of
EINVAL in man2(read) as following:
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
EINVAL
fd is attached to an object which is unsuitable for reading;
or the file was opened with the O_DIRECT flag, and either the
address specified in buf, the value specified in count, or the
current file offset is not suitably aligned.
So I think this patch can be applied to fix ext4 direct I/O error.
However Ext4 introduces direct I/O read using iomap infrastructure
on kernel 5.5, the patch is commit <b1b4705d54ab>
("ext4: introduce direct I/O read using iomap infrastructure"),
then Ext4 will be the same as XFS, they all use iomap_dio_rw() to do direct
I/O read. So this problem does not exist on kernel 5.5 for Ext4.
>From above description, we can see this problem exists on all the kernel
versions between kernel 3.14 and kernel 5.4. It will cause the Applications
to fail to read. For example, when the search service downloads a new full
index file, the search engine is loading the previous index file and is
processing the search request, it can not use buffer io that may squeeze
the previous index file in use from pagecache, so the serch service must
use direct I/O read.
Please apply this patch on these kernel versions, or please use the method
on kernel 5.5 to fix this problem.
Fixes: 9fe55eea7e4b ("Fix race when checking i_size on direct i/o read")
Reviewed-by: Jan Kara <jack(a)suse.cz>
Co-developed-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Jiang Ying <jiangying8582(a)126.com>
---
fs/ext4/inode.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 516faa2..a66b0ac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3821,6 +3821,11 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = mapping->host;
size_t count = iov_iter_count(iter);
ssize_t ret;
+ loff_t offset = iocb->ki_pos;
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
/*
* Shared inode_lock is enough for us - it protects against concurrent
--
1.8.3.1
commit df58fae72428b "smb3: Incorrect size for netname negotiate
context" (patch was added in 5.4) turns out to be more important than
we realized (fixing a feature added in 5.3 by commit 96d3cca1241d6
which sends the "netname context" during protocol negotiations).
commit df58fae72428b should be cc:stable for 5.3
--
Thanks,
Steve
This patch set is aim to update the old IP_TOS_MASK to new IP_DSCP_MASK
as tos value has been obsoleted for a long time. But to make sure we don't
break any existing behaviour, we can't just replease all IP_TOS_MASK
to new IP_DSCP_MASK.
So let's update it case by case. The first issue we will fix is that vxlan
is unable to take the first 3 bits from DSCP field before xmit. Use the
new RT_DSCP() would resolve this.
v2: Remove IP_DSCP() definition as it's duplicated with RT_DSCP().
Post the patch to net instead of net-next as we need fix the vxlan issue
Hangbin Liu (2):
net: add IP_DSCP_MASK
vxlan: fix getting tos value from DSCP field
drivers/net/vxlan.c | 4 ++--
include/uapi/linux/in_route.h | 1 +
include/uapi/linux/ip.h | 1 +
3 files changed, 4 insertions(+), 2 deletions(-)
--
2.25.4
On Tue, Aug 4, 2020 at 5:52 PM Marc Plumb <lkml.mplumb(a)gmail.com> wrote:
>
> TL;DR This change takes the seed data from get_random_bytes and broadcasts it to the network, thereby destroying the security of dev/random. This change needs to be reverted and redesigned.
This was discussed.,
It's theoretical, not practical.
The patch improves real security, and the fake "but in theory" kind is
meaningless and people should stop that kind of behavior.
Linus
Willy and Ted,
This commit has serious security flaws
f227e3ec3b5cad859ad15666874405e8c1bbc1d4
TL;DR This change takes the seed data from get_random_bytes and
broadcasts it to the network, thereby destroying the security of
dev/random. This change needs to be reverted and redesigned.
It is inefficient:
This function is called from an interrupt context, so there is no chance
of a CPU switch, therefore the this_cpu_add function should be
__this_cpu_add. This is a sign that the patch may have been rushed and
may not be suitable for a stable release.
It is fixing the wrong problem:
The net_rand_state PRNG is a weak PRNG for the purpose of avoiding
collisions, not to be unguessable to an attacker. The network PRNG does
not need secure seeding. If you need a secure PRNG then you shouldn't be
using the net_rand_state PRNG. Please reconsider why you think that this
change is necessary.
It dramatically weakens dev/random:
Seeding two PRNGs with the same entropy causes two problems. The minor
one is that you're double counting entropy. The major one is that anyone
who can determine the state of one PRNG can determine the state of the
other.
The net_rand_state PRNG is effectively a 113 bit LFSR, so anyone who can
see any 113 bits of output can determine the complete internal state.
The output of the net_rand_state PRNG is used to determine how data is
sent to the network, so the output is effectively broadcast to anyone
watching network traffic. Therefore anyone watching the network traffic
can determine the seed data being fed to the net_rand_state PRNG. Since
this is the same seed data being fed to get_random_bytes, it allows an
attacker to determine the state and there output of /dev/random. I
sincerely hope that this was not the intended goal. :)
Thank you
Marc
From: Muchun Song <songmuchun(a)bytedance.com>
We found a case of kernel panic on our server. The stack trace is as
follows(omit some irrelevant information):
BUG: kernel NULL pointer dereference, address: 0000000000000080
RIP: 0010:kprobe_ftrace_handler+0x5e/0xe0
RSP: 0018:ffffb512c6550998 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff8e9d16eea018 RCX: 0000000000000000
RDX: ffffffffbe1179c0 RSI: ffffffffc0535564 RDI: ffffffffc0534ec0
RBP: ffffffffc0534ec1 R08: ffff8e9d1bbb0f00 R09: 0000000000000004
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffff8e9d1f797060 R14: 000000000000bacc R15: ffff8e9ce13eca00
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000080 CR3: 00000008453d0005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<IRQ>
ftrace_ops_assist_func+0x56/0xe0
ftrace_call+0x5/0x34
tcpa_statistic_send+0x5/0x130 [ttcp_engine]
The tcpa_statistic_send is the function being kprobed. After analysis,
the root cause is that the fourth parameter regs of kprobe_ftrace_handler
is NULL. Why regs is NULL? We use the crash tool to analyze the kdump.
crash> dis tcpa_statistic_send -r
<tcpa_statistic_send>: callq 0xffffffffbd8018c0 <ftrace_caller>
The tcpa_statistic_send calls ftrace_caller instead of ftrace_regs_caller.
So it is reasonable that the fourth parameter regs of kprobe_ftrace_handler
is NULL. In theory, we should call the ftrace_regs_caller instead of the
ftrace_caller. After in-depth analysis, we found a reproducible path.
Writing a simple kernel module which starts a periodic timer. The
timer's handler is named 'kprobe_test_timer_handler'. The module
name is kprobe_test.ko.
1) insmod kprobe_test.ko
2) bpftrace -e 'kretprobe:kprobe_test_timer_handler {}'
3) echo 0 > /proc/sys/kernel/ftrace_enabled
4) rmmod kprobe_test
5) stop step 2) kprobe
6) insmod kprobe_test.ko
7) bpftrace -e 'kretprobe:kprobe_test_timer_handler {}'
We mark the kprobe as GONE but not disarm the kprobe in the step 4).
The step 5) also do not disarm the kprobe when unregister kprobe. So
we do not remove the ip from the filter. In this case, when the module
loads again in the step 6), we will replace the code to ftrace_caller
via the ftrace_module_enable(). When we register kprobe again, we will
not replace ftrace_caller to ftrace_regs_caller because the ftrace is
disabled in the step 3). So the step 7) will trigger kernel panic. Fix
this problem by disarming the kprobe when the module is going away.
Link: https://lkml.kernel.org/r/20200728064536.24405-1-songmuchun@bytedance.com
Cc: stable(a)vger.kernel.org
Fixes: ae6aa16fdc16 ("kprobes: introduce ftrace based optimization")
Acked-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Co-developed-by: Chengming Zhou <zhouchengming(a)bytedance.com>
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/kprobes.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4a904cc56d68..07bf03fcf574 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2113,6 +2113,13 @@ static void kill_kprobe(struct kprobe *p)
* the original probed function (which will be freed soon) any more.
*/
arch_remove_kprobe(p);
+
+ /*
+ * The module is going away. We should disarm the kprobe which
+ * is using ftrace.
+ */
+ if (kprobe_ftrace(p))
+ disarm_kprobe_ftrace(p);
}
/* Disable one kprobe */
--
2.26.2
From: Nick Desaulniers <ndesaulniers(a)google.com>
__tracepoint_string's have their string data stored in .rodata, and an
address to that data stored in the "__tracepoint_str" section. Functions
that refer to those strings refer to the symbol of the address. Compiler
optimization can replace those address references with references
directly to the string data. If the address doesn't appear to have other
uses, then it appears dead to the compiler and is removed. This can
break the /tracing/printk_formats sysfs node which iterates the
addresses stored in the "__tracepoint_str" section.
Like other strings stored in custom sections in this header, mark these
__used to inform the compiler that there are other non-obvious users of
the address, so they should still be emitted.
Link: https://lkml.kernel.org/r/20200730224555.2142154-2-ndesaulniers@google.com
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Miguel Ojeda <miguel.ojeda.sandonis(a)gmail.com>
Cc: stable(a)vger.kernel.org
Fixes: 102c9323c35a8 ("tracing: Add __tracepoint_string() to export string pointers")
Reported-by: Tim Murray <timmurray(a)google.com>
Reported-by: Simon MacMullen <simonmacm(a)google.com>
Suggested-by: Greg Hackmann <ghackmann(a)google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
include/linux/tracepoint.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a1fecf311621..3a5b717d92e8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -361,7 +361,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
static const char *___tp_str __tracepoint_string = str; \
___tp_str; \
})
-#define __tracepoint_string __attribute__((section("__tracepoint_str")))
+#define __tracepoint_string __attribute__((section("__tracepoint_str"), used))
#else
/*
* tracepoint_string() is used to save the string address for userspace
--
2.26.2
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 3ff3d4f43856 - x86/i8259: Use printk_deferred() to prevent deadlock
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ❌ Storage blktests
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
The patch titled
Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
has been added to the -mm tree. Its filename is
hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/hugetlbfs-remove-call-to-huge_pte_…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/hugetlbfs-remove-call-to-huge_pte_…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing
synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem
in at least read mode. This is because the explicit locking in
huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring
the code, the call to huge_pte_alloc in the else block at the beginning of
hugetlb_fault was missed.
Unfortunately, that else clause is exercised when there is no page table
entry. This will likely lead to a call to huge_pmd_share. If
huge_pmd_share thinks pmd sharing is possible, it will traverse the
mapping tree (i_mmap) without holding i_mmap_rwsem. If someone else is
modifying the tree, bad things such as addressing exceptions or worse
could happen.
Simply remove the else clause. It should have been removed previously.
The code following the else will call huge_pte_alloc with the appropriate
locking.
To prevent this type of issue in the future, add routines to assert that
i_mmap_rwsem is held, and call these routines in huge pmd sharing
routines.
Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com
Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Suggested-by: Matthew Wilcox <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A.Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Prakash Sangappa <prakash.sangappa(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/fs.h | 10 ++++++++++
include/linux/hugetlb.h | 8 +++++---
mm/hugetlb.c | 15 +++++++--------
mm/rmap.c | 2 +-
4 files changed, 23 insertions(+), 12 deletions(-)
--- a/include/linux/fs.h~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/include/linux/fs.h
@@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(st
up_read(&mapping->i_mmap_rwsem);
}
+static inline void i_mmap_assert_locked(struct address_space *mapping)
+{
+ lockdep_assert_held(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_assert_write_locked(struct address_space *mapping)
+{
+ lockdep_assert_held_write(&mapping->i_mmap_rwsem);
+}
+
/*
* Might pages of this file be mapped into userspace?
*/
--- a/include/linux/hugetlb.h~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/include/linux/hugetlb.h
@@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -203,8 +204,9 @@ static inline struct address_space *huge
return NULL;
}
-static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
- pte_t *ptep)
+static inline int huge_pmd_unshare(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
return 0;
}
--- a/mm/hugetlb.c~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/mm/hugetlb.c
@@ -3953,7 +3953,7 @@ void __unmap_hugepage_range(struct mmu_g
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
@@ -4540,10 +4540,6 @@ vm_fault_t hugetlb_fault(struct mm_struc
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
- } else {
- ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
}
/*
@@ -5020,7 +5016,7 @@ unsigned long hugetlb_change_protection(
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
pages++;
spin_unlock(ptl);
shared_pmd = true;
@@ -5401,12 +5397,14 @@ out:
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
p4d_t *p4d = p4d_offset(pgd, *addr);
pud_t *pud = pud_offset(p4d, *addr);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
@@ -5424,7 +5422,8 @@ pte_t *huge_pmd_share(struct mm_struct *
return NULL;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
return 0;
}
--- a/mm/rmap.c~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/mm/rmap.c
@@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page
* do this outside rmap routines.
*/
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
- if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlbfs-prevent-filesystem-stacking-of-hugetlbfs.patch
hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem.patch
cma-dont-quit-at-first-error-when-activating-reserved-areas.patch
Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing
synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem
in at least read mode. This is because the explicit locking in
huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring
the code, the call to huge_pte_alloc in the else block at the beginning
of hugetlb_fault was missed.
Unfortunately, that else clause is exercised when there is no page table
entry. This will likely lead to a call to huge_pmd_share. If
huge_pmd_share thinks pmd sharing is possible, it will traverse the mapping
tree (i_mmap) without holding i_mmap_rwsem. If someone else is modifying
the tree, bad things such as addressing exceptions or worse could happen.
Simply remove the else clause. It should have been removed previously.
The code following the else will call huge_pte_alloc with the appropriate
locking.
Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
---
mm/hugetlb.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 590111ea6975..0f6716422a53 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4539,10 +4539,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
- } else {
- ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
}
/*
--
2.25.4
The patch titled
Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid()
has been added to the -mm tree. Its filename is
khugepaged-khugepaged_test_exit-check-mmget_still_valid.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/khugepaged-khugepaged_test_exit-ch…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/khugepaged-khugepaged_test_exit-ch…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid()
Move collapse_huge_page()'s mmget_still_valid() check into
khugepaged_test_exit() itself. collapse_huge_page() is used for anon THP
only, and earned its mmget_still_valid() check because it inserts a huge
pmd entry in place of the page table's pmd entry; whereas
collapse_file()'s retract_page_tables() or collapse_pte_mapped_thp()
merely clears the page table's pmd entry. But core dumping without mmap
lock must have been as open to mistaking a racily cleared pmd entry for a
page table at physical page 0, as exit_mmap() was. And we certainly have
no interest in mapping as a THP once dumping core.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021217020.27773@eggly.anvils
Fixes: 59ea6d06cfa9 ("coredump: fix race condition between collapse_huge_page() and core dumping")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/mm/khugepaged.c~khugepaged-khugepaged_test_exit-check-mmget_still_valid
+++ a/mm/khugepaged.c
@@ -431,7 +431,7 @@ static void insert_to_mm_slots_hash(stru
static inline int khugepaged_test_exit(struct mm_struct *mm)
{
- return atomic_read(&mm->mm_users) == 0;
+ return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
}
static bool hugepage_vma_check(struct vm_area_struct *vma,
@@ -1100,9 +1100,6 @@ static void collapse_huge_page(struct mm
* handled by the anon_vma lock + PG_lock.
*/
mmap_write_lock(mm);
- result = SCAN_ANY_PROCESS;
- if (!mmget_still_valid(mm))
- goto out;
result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-memcontrol-decouple-reference-counting-from-page-accounting-fix.patch
khugepaged-collapse_pte_mapped_thp-flush-the-right-range.patch
khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock.patch
khugepaged-retract_page_tables-remember-to-test-exit.patch
khugepaged-khugepaged_test_exit-check-mmget_still_valid.patch
The patch titled
Subject: khugepaged: retract_page_tables() remember to test exit
has been added to the -mm tree. Its filename is
khugepaged-retract_page_tables-remember-to-test-exit.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/khugepaged-retract_page_tables-rem…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/khugepaged-retract_page_tables-rem…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: retract_page_tables() remember to test exit
Only once have I seen this scenario (and forgot even to notice what forced
the eventual crash): a sequence of "BUG: Bad page map" alerts from
vm_normal_page(), from zap_pte_range() servicing exit_mmap();
pmd:00000000, pte values corresponding to data in physical page 0.
The pte mappings being zapped in this case were supposed to be from a huge
page of ext4 text (but could as well have been shmem): my belief is that
it was racing with collapse_file()'s retract_page_tables(), found *pmd
pointing to a page table, locked it, but *pmd had become 0 by the time
start_pte was decided.
In most cases, that possibility is excluded by holding mmap lock; but
exit_mmap() proceeds without mmap lock. Most of what's run by khugepaged
checks khugepaged_test_exit() after acquiring mmap lock:
khugepaged_collapse_pte_mapped_thps() and hugepage_vma_revalidate() do so,
for example. But retract_page_tables() did not: fix that.
The fix is for retract_page_tables() to check khugepaged_test_exit(),
after acquiring mmap lock, before doing anything to the page table.
Getting the mmap lock serializes with __mmput(), which briefly takes and
drops it in __khugepaged_exit(); then the khugepaged_test_exit() check on
mm_users makes sure we don't touch the page table once exit_mmap() might
reach it, since exit_mmap() will be proceeding without mmap lock, not
expecting anyone to be racing with it.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021215400.27773@eggly.anvils
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
--- a/mm/khugepaged.c~khugepaged-retract_page_tables-remember-to-test-exit
+++ a/mm/khugepaged.c
@@ -1532,6 +1532,7 @@ out:
static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
{
struct vm_area_struct *vma;
+ struct mm_struct *mm;
unsigned long addr;
pmd_t *pmd, _pmd;
@@ -1560,7 +1561,8 @@ static void retract_page_tables(struct a
continue;
if (vma->vm_end < addr + HPAGE_PMD_SIZE)
continue;
- pmd = mm_find_pmd(vma->vm_mm, addr);
+ mm = vma->vm_mm;
+ pmd = mm_find_pmd(mm, addr);
if (!pmd)
continue;
/*
@@ -1570,17 +1572,19 @@ static void retract_page_tables(struct a
* mmap_lock while holding page lock. Fault path does it in
* reverse order. Trylock is a way to avoid deadlock.
*/
- if (mmap_write_trylock(vma->vm_mm)) {
- spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
- /* assume page table is clear */
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
- spin_unlock(ptl);
- mmap_write_unlock(vma->vm_mm);
- mm_dec_nr_ptes(vma->vm_mm);
- pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+ if (mmap_write_trylock(mm)) {
+ if (!khugepaged_test_exit(mm)) {
+ spinlock_t *ptl = pmd_lock(mm, pmd);
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+ }
+ mmap_write_unlock(mm);
} else {
/* Try again later */
- khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
+ khugepaged_add_pte_mapped_thp(mm, addr);
}
}
i_mmap_unlock_write(mapping);
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-memcontrol-decouple-reference-counting-from-page-accounting-fix.patch
khugepaged-collapse_pte_mapped_thp-flush-the-right-range.patch
khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock.patch
khugepaged-retract_page_tables-remember-to-test-exit.patch
khugepaged-khugepaged_test_exit-check-mmget_still_valid.patch
The patch titled
Subject: khugepaged: collapse_pte_mapped_thp() protect the pmd lock
has been added to the -mm tree. Its filename is
khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/khugepaged-collapse_pte_mapped_thp…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/khugepaged-collapse_pte_mapped_thp…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: collapse_pte_mapped_thp() protect the pmd lock
When retract_page_tables() removes a page table to make way for a huge
pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and
pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the
case when the original mmap_write_trylock had failed), only
mmap_write_trylock and pmd lock are held.
That's not enough. One machine has twice crashed under load, with "BUG:
spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b. Examining the second
crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving
page_referenced() on a file THP, that had found a page table at *pmd)
discovers that the page table page and its lock have already been freed by
the time it comes to unlock.
Follow the example of retract_page_tables(), but we only need one of huge
page lock or i_mmap_lock_write to secure against this: because it's the
narrower lock, and because it simplifies collapse_pte_mapped_thp() to know
the hpage earlier, choose to rely on huge page lock here.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021213070.27773@eggly.anvils
Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 44 +++++++++++++++++++-------------------------
1 file changed, 19 insertions(+), 25 deletions(-)
--- a/mm/khugepaged.c~khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock
+++ a/mm/khugepaged.c
@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_s
{
unsigned long haddr = addr & HPAGE_PMD_MASK;
struct vm_area_struct *vma = find_vma(mm, haddr);
- struct page *hpage = NULL;
+ struct page *hpage;
pte_t *start_pte, *pte;
pmd_t *pmd, _pmd;
spinlock_t *ptl;
@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_s
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
return;
+ hpage = find_lock_page(vma->vm_file->f_mapping,
+ linear_page_index(vma, haddr));
+ if (!hpage)
+ return;
+
+ if (!PageHead(hpage))
+ goto drop_hpage;
+
pmd = mm_find_pmd(mm, haddr);
if (!pmd)
- return;
+ goto drop_hpage;
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_s
page = vm_normal_page(vma, addr, *pte);
- if (!page || !PageCompound(page))
- goto abort;
-
- if (!hpage) {
- hpage = compound_head(page);
- /*
- * The mapping of the THP should not change.
- *
- * Note that uprobe, debugger, or MAP_PRIVATE may
- * change the page table, but the new page will
- * not pass PageCompound() check.
- */
- if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
- goto abort;
- }
-
/*
- * Confirm the page maps to the correct subpage.
- *
- * Note that uprobe, debugger, or MAP_PRIVATE may change
- * the page table, but the new page will not pass
- * PageCompound() check.
+ * Note that uprobe, debugger, or MAP_PRIVATE may change the
+ * page table, but the new page will not be a subpage of hpage.
*/
- if (WARN_ON(hpage + i != page))
+ if (hpage + i != page)
goto abort;
count++;
}
@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_s
pte_unmap_unlock(start_pte, ptl);
/* step 3: set proper refcount and mm_counters. */
- if (hpage) {
+ if (count) {
page_ref_sub(hpage, count);
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
}
@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_s
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
+
+drop_hpage:
+ unlock_page(hpage);
+ put_page(hpage);
return;
abort:
pte_unmap_unlock(start_pte, ptl);
+ goto drop_hpage;
}
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-memcontrol-decouple-reference-counting-from-page-accounting-fix.patch
khugepaged-collapse_pte_mapped_thp-flush-the-right-range.patch
khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock.patch
khugepaged-retract_page_tables-remember-to-test-exit.patch
khugepaged-khugepaged_test_exit-check-mmget_still_valid.patch
The patch titled
Subject: khugepaged: collapse_pte_mapped_thp() flush the right range
has been added to the -mm tree. Its filename is
khugepaged-collapse_pte_mapped_thp-flush-the-right-range.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/khugepaged-collapse_pte_mapped_thp…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/khugepaged-collapse_pte_mapped_thp…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: collapse_pte_mapped_thp() flush the right range
pmdp_collapse_flush() should be given the start address at which the huge
page is mapped, haddr: it was given addr, which at that point has been
used as a local variable, incremented to the end address of the extent.
Found by source inspection while chasing a hugepage locking bug, which I
then could not explain by this. At first I thought this was very bad;
then saw that all of the page translations that were not flushed would
actually still point to the right pages afterwards, so harmless; then
realized that I know nothing of how different architectures and models
cache intermediate paging structures, so maybe it matters after all -
particularly since the page table concerned is immediately freed.
Much easier to fix than to think about.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021204390.27773@eggly.anvils
Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/khugepaged.c~khugepaged-collapse_pte_mapped_thp-flush-the-right-range
+++ a/mm/khugepaged.c
@@ -1502,7 +1502,7 @@ void collapse_pte_mapped_thp(struct mm_s
/* step 4: collapse pmd */
ptl = pmd_lock(vma->vm_mm, pmd);
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-memcontrol-decouple-reference-counting-from-page-accounting-fix.patch
khugepaged-collapse_pte_mapped_thp-flush-the-right-range.patch
khugepaged-collapse_pte_mapped_thp-protect-the-pmd-lock.patch
khugepaged-retract_page_tables-remember-to-test-exit.patch
khugepaged-khugepaged_test_exit-check-mmget_still_valid.patch
This reverts commit 9a6418487b56 ("ALSA: hda: call runtime_allow()
for all hda controllers").
The reverted patch already introduced some regressions on some
machines:
- on gemini-lake machines, the error of "azx_get_response timeout"
happens in the hda driver.
- on the machines with alc662 codec, the audio jack detection doesn't
work anymore.
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208511
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang(a)canonical.com>
---
sound/pci/hda/hda_intel.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index e699873c8293..e34a4d5d047c 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2352,7 +2352,6 @@ static int azx_probe_continue(struct azx *chip)
if (azx_has_pm_runtime(chip)) {
pm_runtime_use_autosuspend(&pci->dev);
- pm_runtime_allow(&pci->dev);
pm_runtime_put_autosuspend(&pci->dev);
}
--
2.17.1
The patch titled
Subject: mm/page_counter.c: fix protection usage propagation
has been added to the -mm tree. Its filename is
mm-fix-protection-usage-propagation.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-fix-protection-usage-propagatio…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-fix-protection-usage-propagatio…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Michal Koutný <mkoutny(a)suse.com>
Subject: mm/page_counter.c: fix protection usage propagation
When workload runs in cgroups that aren't directly below root cgroup and
their parent specifies reclaim protection, it may end up ineffective.
The reason is that propagate_protected_usage() is not called in all
hierarchy up. All the protected usage is incorrectly accumulated in the
workload's parent. This means that siblings_low_usage is overestimated
and effective protection underestimated. Even though it is transitional
phenomenon (uncharge path does correct propagation and fixes the wrong
children_low_usage), it can undermine the indended protection
unexpectedly.
We have noticed this problem while seeing a swap out in a descendant of a
protected memcg (intermediate node) while the parent was conveniently
under its protection limit and the memory pressure was external to that
hierarchy. Michal has pinpointed this down to the wrong
siblings_low_usage which led to the unwanted reclaim.
The fix is simply updating children_low_usage in respective ancestors also
in the charging path.
Link: http://lkml.kernel.org/r/20200803153231.15477-1-mhocko@kernel.org
Fixes: 230671533d64 ("mm: memory.low hierarchical behavior")
Signed-off-by: Michal Koutný <mkoutny(a)suse.com>
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Roman Gushchin <guro(a)fb.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [4.18+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_counter.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/page_counter.c~mm-fix-protection-usage-propagation
+++ a/mm/page_counter.c
@@ -72,7 +72,7 @@ void page_counter_charge(struct page_cou
long new;
new = atomic_long_add_return(nr_pages, &c->usage);
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
@@ -116,7 +116,7 @@ bool page_counter_try_charge(struct page
new = atomic_long_add_return(nr_pages, &c->usage);
if (new > c->max) {
atomic_long_sub(nr_pages, &c->usage);
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* This is racy, but we can live with some
* inaccuracy in the failcnt.
@@ -125,7 +125,7 @@ bool page_counter_try_charge(struct page
*fail = c;
goto failed;
}
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(c, new);
/*
* Just like with failcnt, we can live with some
* inaccuracy in the watermark.
_
Patches currently in -mm which might be from mkoutny(a)suse.com are
mm-fix-protection-usage-propagation.patch
proc-pid-smaps-consistent-whitespace-output-format.patch
From: Geert Uytterhoeven <geert(a)linux-m68k.org>
commit 59b6359dd92d18f5dc04b14a4c926fa08ab66f7c upstream.
If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few
hundred identical messages:
unwind: Unknown symbol address c0800300
unwind: Index not found c0800300
c0800300 is the return address from the last subroutine call (to
__memzero()) in __mmap_switched(). Apparently having this address in
the link register confuses the unwinder.
To fix this, reset the link register to zero before jumping to
start_kernel().
Fixes: 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup code")
Suggested-by: Ard Biesheuvel <ard.biesheuvel(a)linaro.org>
Signed-off-by: Geert Uytterhoeven <geert+renesas(a)glider.be>
Acked-by: Nicolas Pitre <nico(a)linaro.org>
Signed-off-by: Russell King <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
Looks like this first landed in v4.15-rc1. Without this, we can't tell
during an unwind initiated from start_kernel() when to stop unwinding,
which for the clang specific implementation of the arm frame pointer
unwinder leads to dereferencing a garbage value, triggering an exception
which has no fixup, triggering a panic, triggering an unwind, triggering
an infinite loop that prevents booting. I have more patches to send
upstream to make the unwinder more resilient, but it's ambiguous as to
when to stop unwinding without this patch.
arch/arm/kernel/head-common.S | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 7e662bdd5cb3..932b2244e709 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -101,6 +101,7 @@ __mmap_switched:
str r2, [r6] @ Save atags pointer
cmp r7, #0
strne r0, [r7] @ Save control register values
+ mov lr, #0
b start_kernel
ENDPROC(__mmap_switched)
--
2.28.0.rc0.142.g3c755180ce-goog
From: Mike Snitzer <snitzer(a)redhat.com>
Discontinue issuing writethrough write IO in series to the origin and
then cache.
Use bio_clone_fast() to create a new origin clone bio that will be
mapped to the origin device and then bio_chain() it to the bio that gets
remapped to the cache device. The origin clone bio does _not_ have a
copy of the per_bio_data -- as such check_if_tick_bio_needed() will not
be called.
The cache bio (parent bio) will not complete until the origin bio has
completed -- this fulfills bio_clone_fast()'s requirements as well as
the requirement to not complete the original IO until the write IO has
completed to both the origin and cache device.
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
(cherry picked from commit 2df3bae9a6543e90042291707b8db0cbfbae9ee9)
Fixes: 4ec34f2196d125ff781170ddc6c3058c08ec5e73 (dm bio record:
save/restore bi_end_io and bi_integrity )
4ec34f21 introduced a mkfs.ext4 hang on a LVM device that has been
modified with lvconvert --cachemode=writethrough.
CC:stable@vger.kernel.org for 4.14.y
Signed-off-by: John Donnelly <john.p.donnelly(a)oracle.com>
Reviewed-by: Somasundaram Krishnasamy <somasundaram.krishnasamy(a)oracle.com>
conflicts:
drivers/md/dm-cache-target.c. - Corrected usage of
writethrough_mode(&cache->feature) that was caught by
compiler, and removed unused static functions : writethrough_endio(),
defer_writethrough_bio(), wake_deferred_writethrough_worker()
that generated warnings.
---
drivers/md/dm-cache-target.c | 92
++++++++++++++++++--------------------------
1 file changed, 37 insertions(+), 55 deletions(-)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 69cdb29ef6be..2732d1df05fa 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -450,6 +450,7 @@ struct cache {
struct work_struct migration_worker;
struct delayed_work waker;
struct dm_bio_prison_v2 *prison;
+ struct bio_set *bs;
mempool_t *migration_pool;
@@ -537,11 +538,6 @@ static void wake_deferred_bio_worker(struct cache
*cache)
queue_work(cache->wq, &cache->deferred_bio_worker);
}
-static void wake_deferred_writethrough_worker(struct cache *cache)
-{
- queue_work(cache->wq, &cache->deferred_writethrough_worker);
-}
-
static void wake_migration_worker(struct cache *cache)
{
if (passthrough_mode(&cache->features))
@@ -868,16 +864,23 @@ static void check_if_tick_bio_needed(struct cache
*cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags);
}
-static void remap_to_origin_clear_discard(struct cache *cache, struct
bio *bio,
- dm_oblock_t oblock)
+static void __remap_to_origin_clear_discard(struct cache *cache, struct
bio *bio,
+ dm_oblock_t oblock, bool bio_has_pbd)
{
- // FIXME: this is called way too much.
- check_if_tick_bio_needed(cache, bio);
+ if (bio_has_pbd)
+ check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock));
}
+static void remap_to_origin_clear_discard(struct cache *cache, struct
bio *bio,
+ dm_oblock_t oblock)
+{
+ // FIXME: check_if_tick_bio_needed() is called way too much through
this interface
+ __remap_to_origin_clear_discard(cache, bio, oblock, true);
+}
+
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
@@ -937,57 +940,26 @@ static void issue_op(struct bio *bio, void *context)
accounted_request(cache, bio);
}
-static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
- bio_list_add(&cache->deferred_writethrough_bios, bio);
- spin_unlock_irqrestore(&cache->lock, flags);
-
- wake_deferred_writethrough_worker(cache);
-}
-
-static void writethrough_endio(struct bio *bio)
-{
- struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
-
- dm_unhook_bio(&pb->hook_info, bio);
-
- if (bio->bi_status) {
- bio_endio(bio);
- return;
- }
-
- dm_bio_restore(&pb->bio_details, bio);
- remap_to_cache(pb->cache, bio, pb->cblock);
-
- /*
- * We can't issue this bio directly, since we're in interrupt
- * context. So it gets put on a bio list for processing by the
- * worker thread.
- */
- defer_writethrough_bio(pb->cache, bio);
-}
-
/*
- * FIXME: send in parallel, huge latency as is.
* When running in writethrough mode we need to send writes to clean
blocks
- * to both the cache and origin devices. In future we'd like to clone the
- * bio and send them in parallel, but for now we're doing them in
- * series as this is easier.
+ * to both the cache and origin devices. Clone the bio and send them
in parallel.
*/
-static void remap_to_origin_then_cache(struct cache *cache, struct bio
*bio,
- dm_oblock_t oblock, dm_cblock_t cblock)
+static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
+ dm_oblock_t oblock, dm_cblock_t cblock)
{
- struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
+ struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs);
+
+ BUG_ON(!origin_bio);
- pb->cache = cache;
- pb->cblock = cblock;
- dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
- dm_bio_record(&pb->bio_details, bio);
+ bio_chain(origin_bio, bio);
+ /*
+ * Passing false to __remap_to_origin_clear_discard() skips
+ * all code that might use per_bio_data (since clone doesn't have it)
+ */
+ __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
+ submit_bio(origin_bio);
- remap_to_origin_clear_discard(pb->cache, bio, oblock);
+ remap_to_cache(cache, bio, cblock);
}
/*----------------------------------------------------------------
@@ -1873,7 +1845,7 @@ static int map_bio(struct cache *cache, struct bio
*bio, dm_oblock_t block,
} else {
if (bio_data_dir(bio) == WRITE &&
writethrough_mode(&cache->features) &&
!is_dirty(cache, cblock)) {
- remap_to_origin_then_cache(cache, bio, block, cblock);
+ remap_to_origin_and_cache(cache, bio, block, cblock);
accounted_begin(cache, bio);
} else
remap_to_cache_dirty(cache, bio, block, cblock);
@@ -2132,6 +2104,9 @@ static void destroy(struct cache *cache)
kfree(cache->ctr_args[i]);
kfree(cache->ctr_args);
+ if (cache->bs)
+ bioset_free(cache->bs);
+
kfree(cache);
}
@@ -2589,6 +2564,13 @@ static int cache_create(struct cache_args *ca,
struct cache **result)
cache->features = ca->features;
ti->per_io_data_size = get_per_bio_data_size(cache);
+ if (writethrough_mode(&cache->features)) {
+ /* Create bioset for writethrough bios issued to origin */
+ cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!cache->bs)
+ goto bad;
+ }
+
cache->callbacks.congested_fn = cache_is_congested;
dm_table_add_target_callbacks(ti->table, &cache->callbacks);
-- 1.8.3.1
From: Pi-Hsun Shih <pihsun(a)chromium.org>
commit 6989310f5d4327e8595664954edd40a7f99ddd0d upstream.
Use offsetof to calculate offset of a field to take advantage of
compiler built-in version when possible, and avoid UBSAN warning when
compiling with Clang:
==================================================================
UBSAN: Undefined behaviour in net/wireless/wext-core.c:525:14
member access within null pointer of type 'struct iw_point'
CPU: 3 PID: 165 Comm: kworker/u16:3 Tainted: G S W 4.19.23 #43
Workqueue: cfg80211 __cfg80211_scan_done [cfg80211]
Call trace:
dump_backtrace+0x0/0x194
show_stack+0x20/0x2c
__dump_stack+0x20/0x28
dump_stack+0x70/0x94
ubsan_epilogue+0x14/0x44
ubsan_type_mismatch_common+0xf4/0xfc
__ubsan_handle_type_mismatch_v1+0x34/0x54
wireless_send_event+0x3cc/0x470
___cfg80211_scan_done+0x13c/0x220 [cfg80211]
__cfg80211_scan_done+0x28/0x34 [cfg80211]
process_one_work+0x170/0x35c
worker_thread+0x254/0x380
kthread+0x13c/0x158
ret_from_fork+0x10/0x18
===================================================================
Signed-off-by: Pi-Hsun Shih <pihsun(a)chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers(a)google.com>
Link: https://lore.kernel.org/r/20191204081307.138765-1-pihsun@chromium.org
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
Fix landed in v5.7-rc1. Thanks to James Hsu of MediaTek for the report.
include/uapi/linux/wireless.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h
index 86eca3208b6b..a2c006a364e0 100644
--- a/include/uapi/linux/wireless.h
+++ b/include/uapi/linux/wireless.h
@@ -74,6 +74,8 @@
#include <linux/socket.h> /* for "struct sockaddr" et al */
#include <linux/if.h> /* for IFNAMSIZ and co... */
+#include <stddef.h> /* for offsetof */
+
/***************************** VERSION *****************************/
/*
* This constant is used to know the availability of the wireless
@@ -1090,8 +1092,7 @@ struct iw_event {
/* iw_point events are special. First, the payload (extra data) come at
* the end of the event, so they are bigger than IW_EV_POINT_LEN. Second,
* we omit the pointer, so start at an offset. */
-#define IW_EV_POINT_OFF (((char *) &(((struct iw_point *) NULL)->length)) - \
- (char *) NULL)
+#define IW_EV_POINT_OFF offsetof(struct iw_point, length)
#define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point) - \
IW_EV_POINT_OFF)
--
2.28.0.163.g6104cc2f0b6-goog
Recently we found an issue about the suspend and resume. If dmic is
recording the sound, and we run suspend and resume, after the resume,
the dmic can't work well anymore. we need to close the app and reopen
the app, then the dmic could record the sound again.
For example, we run "arecord -D hw:CARD=acp,DEV=0 -f S32_LE -c 2
-r 48000 test.wav", then suspend and resume, after the system resume
back, we speak to the dmic. then stop the arecord, use aplay to play
the test.wav, we could hear the sound recorded after resume is weird,
it is not what we speak to the dmic.
I found two registers are set in the dai_hw_params(), if the two
registers are set in the resume() too, this issue could be fixed.
Cc: stable(a)vger.kernel.org
Signed-off-by: Hui Wang <hui.wang(a)canonical.com>
---
sound/soc/amd/renoir/acp3x-pdm-dma.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/sound/soc/amd/renoir/acp3x-pdm-dma.c b/sound/soc/amd/renoir/acp3x-pdm-dma.c
index 623dfd3ea705..8acb0315a169 100644
--- a/sound/soc/amd/renoir/acp3x-pdm-dma.c
+++ b/sound/soc/amd/renoir/acp3x-pdm-dma.c
@@ -474,6 +474,11 @@ static int acp_pdm_resume(struct device *dev)
rtd = runtime->private_data;
period_bytes = frames_to_bytes(runtime, runtime->period_size);
buffer_len = frames_to_bytes(runtime, runtime->buffer_size);
+ if (runtime->channels == TWO_CH) {
+ rn_writel(0x0 , rtd->acp_base + ACP_WOV_PDM_NO_OF_CHANNELS);
+ rn_writel(PDM_DECIMATION_FACTOR, rtd->acp_base +
+ ACP_WOV_PDM_DECIMATION_FACTOR);
+ }
config_acp_dma(rtd, SNDRV_PCM_STREAM_CAPTURE);
init_pdm_ring_buffer(MEM_WINDOW_START, buffer_len, period_bytes,
adata->acp_base);
--
2.17.1
Recently we found an issue about the suspend and resume. If dmic is
recording the sound, and we run suspend and resume, after the resume,
the dmic can't work well anymore. we need to close the app and reopen
the app, then the dmic could record the sound again.
For example, we run "arecord -D hw:CARD=acp,DEV=0 -f S32_LE -c 2
-r 48000 test.wav", then suspend and resume, after the system resume
back, we speak to the dmic. then stop the arecord, use aplay to play
the test.wav, we could hear the sound recorded after resume is weird,
it is not what we speak to the dmic.
I found two registers are set in the dai_hw_params(), if the two
registers are set during the resume, this issue could be fixed.
Move the code of the dai_hw_params() into the pdm_dai_trigger(), then
these two registers will be set during resume since pdm_dai_trigger()
will be called during resume. And delete the empty function
dai_hw_params().
Cc: stable(a)vger.kernel.org
Signed-off-by: Hui Wang <hui.wang(a)canonical.com>
---
sound/soc/amd/renoir/acp3x-pdm-dma.c | 29 +++++++++-------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/sound/soc/amd/renoir/acp3x-pdm-dma.c b/sound/soc/amd/renoir/acp3x-pdm-dma.c
index 623dfd3ea705..7b14d9a81b97 100644
--- a/sound/soc/amd/renoir/acp3x-pdm-dma.c
+++ b/sound/soc/amd/renoir/acp3x-pdm-dma.c
@@ -314,40 +314,30 @@ static int acp_pdm_dma_close(struct snd_soc_component *component,
return 0;
}
-static int acp_pdm_dai_hw_params(struct snd_pcm_substream *substream,
- struct snd_pcm_hw_params *params,
- struct snd_soc_dai *dai)
+static int acp_pdm_dai_trigger(struct snd_pcm_substream *substream,
+ int cmd, struct snd_soc_dai *dai)
{
struct pdm_stream_instance *rtd;
+ int ret;
+ bool pdm_status;
unsigned int ch_mask;
rtd = substream->runtime->private_data;
- switch (params_channels(params)) {
+ ret = 0;
+ switch (substream->runtime->channels) {
case TWO_CH:
ch_mask = 0x00;
break;
default:
return -EINVAL;
}
- rn_writel(ch_mask, rtd->acp_base + ACP_WOV_PDM_NO_OF_CHANNELS);
- rn_writel(PDM_DECIMATION_FACTOR, rtd->acp_base +
- ACP_WOV_PDM_DECIMATION_FACTOR);
- return 0;
-}
-
-static int acp_pdm_dai_trigger(struct snd_pcm_substream *substream,
- int cmd, struct snd_soc_dai *dai)
-{
- struct pdm_stream_instance *rtd;
- int ret;
- bool pdm_status;
-
- rtd = substream->runtime->private_data;
- ret = 0;
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+ rn_writel(ch_mask, rtd->acp_base + ACP_WOV_PDM_NO_OF_CHANNELS);
+ rn_writel(PDM_DECIMATION_FACTOR, rtd->acp_base +
+ ACP_WOV_PDM_DECIMATION_FACTOR);
rtd->bytescount = acp_pdm_get_byte_count(rtd,
substream->stream);
pdm_status = check_pdm_dma_status(rtd->acp_base);
@@ -369,7 +359,6 @@ static int acp_pdm_dai_trigger(struct snd_pcm_substream *substream,
}
static struct snd_soc_dai_ops acp_pdm_dai_ops = {
- .hw_params = acp_pdm_dai_hw_params,
.trigger = acp_pdm_dai_trigger,
};
--
2.17.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 830f01b089b12bbe93bd55f2d62837253012a30e Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli(a)tencent.com>
Date: Fri, 31 Jul 2020 11:12:21 +0800
Subject: [PATCH] KVM: SVM: Fix disable pause loop exit/pause filtering
capability on SVM
'Commit 8566ac8b8e7c ("KVM: SVM: Implement pause loop exit logic in SVM")'
drops disable pause loop exit/pause filtering capability completely, I
guess it is a merge fault by Radim since disable vmexits capabilities and
pause loop exit for SVM patchsets are merged at the same time. This patch
reintroduces the disable pause loop exit/pause filtering capability support.
Reported-by: Haiwei Li <lihaiwei(a)tencent.com>
Tested-by: Haiwei Li <lihaiwei(a)tencent.com>
Fixes: 8566ac8b ("KVM: SVM: Implement pause loop exit logic in SVM")
Signed-off-by: Wanpeng Li <wanpengli(a)tencent.com>
Message-Id: <1596165141-28874-3-git-send-email-wanpengli(a)tencent.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c0da4dd78ac5..5bbf76189afa 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1090,7 +1090,7 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
- if (pause_filter_count) {
+ if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
control->pause_filter_count = pause_filter_count;
if (pause_filter_thresh)
control->pause_filter_thresh = pause_filter_thresh;
@@ -2693,7 +2693,7 @@ static int pause_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
bool in_kernel = (svm_get_cpl(vcpu) == 0);
- if (pause_filter_thresh)
+ if (!kvm_pause_in_guest(vcpu->kvm))
grow_ple_window(vcpu);
kvm_vcpu_on_spin(vcpu, in_kernel);
@@ -3780,7 +3780,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- if (pause_filter_thresh)
+ if (!kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
}
@@ -3958,6 +3958,9 @@ static void svm_vm_destroy(struct kvm *kvm)
static int svm_vm_init(struct kvm *kvm)
{
+ if (!pause_filter_count || !pause_filter_thresh)
+ kvm->arch.pause_in_guest = true;
+
if (avic) {
int ret = avic_vm_init(kvm);
if (ret)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 830f01b089b12bbe93bd55f2d62837253012a30e Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli(a)tencent.com>
Date: Fri, 31 Jul 2020 11:12:21 +0800
Subject: [PATCH] KVM: SVM: Fix disable pause loop exit/pause filtering
capability on SVM
'Commit 8566ac8b8e7c ("KVM: SVM: Implement pause loop exit logic in SVM")'
drops disable pause loop exit/pause filtering capability completely, I
guess it is a merge fault by Radim since disable vmexits capabilities and
pause loop exit for SVM patchsets are merged at the same time. This patch
reintroduces the disable pause loop exit/pause filtering capability support.
Reported-by: Haiwei Li <lihaiwei(a)tencent.com>
Tested-by: Haiwei Li <lihaiwei(a)tencent.com>
Fixes: 8566ac8b ("KVM: SVM: Implement pause loop exit logic in SVM")
Signed-off-by: Wanpeng Li <wanpengli(a)tencent.com>
Message-Id: <1596165141-28874-3-git-send-email-wanpengli(a)tencent.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c0da4dd78ac5..5bbf76189afa 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1090,7 +1090,7 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
- if (pause_filter_count) {
+ if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
control->pause_filter_count = pause_filter_count;
if (pause_filter_thresh)
control->pause_filter_thresh = pause_filter_thresh;
@@ -2693,7 +2693,7 @@ static int pause_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
bool in_kernel = (svm_get_cpl(vcpu) == 0);
- if (pause_filter_thresh)
+ if (!kvm_pause_in_guest(vcpu->kvm))
grow_ple_window(vcpu);
kvm_vcpu_on_spin(vcpu, in_kernel);
@@ -3780,7 +3780,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- if (pause_filter_thresh)
+ if (!kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
}
@@ -3958,6 +3958,9 @@ static void svm_vm_destroy(struct kvm *kvm)
static int svm_vm_init(struct kvm *kvm)
{
+ if (!pause_filter_count || !pause_filter_thresh)
+ kvm->arch.pause_in_guest = true;
+
if (avic) {
int ret = avic_vm_init(kvm);
if (ret)
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: a51347edfd15 - riscv: Parse all memory blocks to remove unusable memory
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
🚧 ⚡⚡⚡ kdump - file-load
Host 2:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ❌ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
target_unpopulated is incremented with nr_pages at the start of the
function, but the call to free_xenballooned_pages will only subtract
pgno number of pages, and thus the rest need to be subtracted before
returning or else accounting will be skewed.
Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Cc: stable(a)vger.kernel.org
---
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Stefano Stabellini <sstabellini(a)kernel.org>
Cc: xen-devel(a)lists.xenproject.org
---
drivers/xen/balloon.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 77c57568e5d7..3cb10ed32557 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -630,6 +630,12 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
out_undo:
mutex_unlock(&balloon_mutex);
free_xenballooned_pages(pgno, pages);
+ /*
+ * NB: free_xenballooned_pages will only subtract pgno pages, but since
+ * target_unpopulated is incremented with nr_pages at the start we need
+ * to remove the remaining ones also, or accounting will be screwed.
+ */
+ balloon_stats.target_unpopulated -= nr_pages - pgno;
return ret;
}
EXPORT_SYMBOL(alloc_xenballooned_pages);
--
2.27.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: ded40d83aead - libtraceevent: Fix build with binutils 2.35
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
Host 4:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 1ec3f9f6cf05 - libtraceevent: Fix build with binutils 2.35
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 4:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
s390x:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
From: Rik van Riel <riel(a)surriel.com>
commit cdea5459ce263fbc963657a7736762ae897a8ae6 upstream
The code in xlog_wait uses the spinlock to make adding the task to
the wait queue, and setting the task state to UNINTERRUPTIBLE atomic
with respect to the waker.
Doing the wakeup after releasing the spinlock opens up the following
race condition:
Task 1 task 2
add task to wait queue
wake up task
set task state to UNINTERRUPTIBLE
This issue was found through code inspection as a result of kworkers
being observed stuck in UNINTERRUPTIBLE state with an empty
wait queue. It is rare and largely unreproducable.
Simply moving the spin_unlock to after the wake_up_all results
in the waker not being able to see a task on the waitqueue before
it has set its state to UNINTERRUPTIBLE.
This bug dates back to the conversion of this code to generic
waitqueue infrastructure from a counting semaphore back in 2008
which didn't place the wakeups consistently w.r.t. to the relevant
spin locks.
[dchinner: Also fix a similar issue in the shutdown path on
xc_commit_wait. Update commit log with more details of the issue.]
Fixes: d748c62367eb ("[XFS] Convert l_flushsema to a sv_t")
Reported-by: Chris Mason <clm(a)fb.com>
Signed-off-by: Rik van Riel <riel(a)surriel.com>
Signed-off-by: Dave Chinner <dchinner(a)redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong(a)oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong(a)oracle.com>
Cc: stable(a)vger.kernel.org # 4.9.x-4.19.x
[modified for contextual change near xlog_state_do_callback()]
Signed-off-by: Samuel Mendoza-Jonas <samjonas(a)amazon.com>
Reviewed-by: Frank van der Linden <fllinden(a)amazon.com>
Reviewed-by: Suraj Jitindar Singh <surajjs(a)amazon.com>
Reviewed-by: Benjamin Herrenschmidt <benh(a)amazon.com>
Reviewed-by: Anchal Agarwal <anchalag(a)amazon.com>
---
This issue was fixed in v5.4 but didn't appear to make it to stable. The
fixed commit goes back to v2.6, so backport this to stable kernels
before v5.4. The only difference is a contextual change at
xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
Which in v5.4 is
xlog_state_do_callback(log, true, NULL);
fs/xfs/xfs_log.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 360e32220f93..0c0f70e6c7d9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2684,7 +2684,6 @@ xlog_state_do_callback(
int funcdidcallbacks; /* flag: function did callbacks */
int repeats; /* for issuing console warnings if
* looping too many times */
- int wake = 0;
spin_lock(&log->l_icloglock);
first_iclog = iclog = log->l_iclog;
@@ -2886,11 +2885,9 @@ xlog_state_do_callback(
#endif
if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
- wake = 1;
- spin_unlock(&log->l_icloglock);
-
- if (wake)
wake_up_all(&log->l_flush_wait);
+
+ spin_unlock(&log->l_icloglock);
}
@@ -4052,7 +4049,9 @@ xfs_log_force_umount(
* item committed callback functions will do this again under lock to
* avoid races.
*/
+ spin_lock(&log->l_cilp->xc_push_lock);
wake_up_all(&log->l_cilp->xc_commit_wait);
+ spin_unlock(&log->l_cilp->xc_push_lock);
xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
#ifdef XFSERRORDEBUG
--
2.17.1
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: d3c05bf9fdd4 - PCI/ASPM: Disable ASPM on ASMedia ASM1083/1085 PCIe-to-PCI bridge
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 631a03ad0b39 - revert: 1320a4052ea1 ("audit: trigger accompanying records when no rules present")
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
s390x:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
This code attempts to set the HCI_UART_RESET_ON_INIT flag. However,
it sets the bit in the wrong flag word: HCI_UART_RESET_ON_INIT goes in
hu->hdev_flags, not hu->flags. So it is actually setting
HCI_UART_REGISTERED, which is bit 1 in hu->flags.
Since commit cba736465e5c ("Bluetooth: hci_serdev: Remove setting of
HCI_QUIRK_RESET_ON_CLOSE."), this flag is ignored for hci_serdev users,
so instead of fixing which flag is set, let's remove the flag entirely.
Cc: stable(a)vger.kernel.org
Fixes: ce945552fde4 ("Bluetooth: hci_h5: Add support for serdev enumerated devices")
Signed-off-by: Samuel Holland <samuel(a)sholland.org>
---
drivers/bluetooth/hci_h5.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index e60b2e0773db..981d96cc7695 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -793,8 +793,6 @@ static int h5_serdev_probe(struct serdev_device *serdev)
if (!h5)
return -ENOMEM;
- set_bit(HCI_UART_RESET_ON_INIT, &h5->serdev_hu.flags);
-
h5->hu = &h5->serdev_hu;
h5->serdev_hu.serdev = serdev;
serdev_device_set_drvdata(serdev, h5);
--
2.26.2
From: Huacai Chen <chenhc(a)lemote.com>
commit e02e07e3127d8aec1f4bcdfb2fc52a2d99b4859e upstream.
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
lld/scd is very weak ordering. We should add sync instructions "before
each ll/lld" and "at the branch-target between ll/sc" to workaround.
Otherwise, this flaw will cause deadlock occasionally (e.g. when doing
heavy load test with LTP).
Below is the explaination of CPU designer:
"For Loongson 3 family, when a memory access instruction (load, store,
or prefetch)'s executing occurs between the execution of LL and SC, the
success or failure of SC is not predictable. Although programmer would
not insert memory access instructions between LL and SC, the memory
instructions before LL in program-order, may dynamically executed
between the execution of LL/SC, so a memory fence (SYNC) is needed
before LL/LLD to avoid this situation.
Since Loongson-3A R2 (3A2000), we have improved our hardware design to
handle this case. But we later deduce a rarely circumstance that some
speculatively executed memory instructions due to branch misprediction
between LL/SC still fall into the above case, so a memory fence (SYNC)
at branch-target (if its target is not between LL/SC) is needed for
Loongson 3A1000, 3B1500, 3A2000 and 3A3000.
Our processor is continually evolving and we aim to to remove all these
workaround-SYNCs around LL/SC for new-come processor."
Here is an example:
Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var,
this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same
time('sc' return 1), and the variable is only *added by 1*, sometimes,
which is wrong and unacceptable(it should be added by 2).
Why disable fix-loongson3-llsc in compiler?
Because compiler fix will cause problems in kernel's __ex_table section.
This patch fix all the cases in kernel, but:
+. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target
of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix
the ll and branch-target coincidently such as atomic_sub_if_positive/
cmpxchg/xchg, just like this one.
+. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch
edac.h
+. local_ops and cmpxchg_local should not be affected by this bug since
only the owner can write.
+. mips_atomic_set for syscall.c is deprecated and rarely used, just let
it go
Signed-off-by: Huacai Chen <chenhc(a)lemote.com>
Signed-off-by: Huang Pei <huangpei(a)loongson.cn>
[paul.burton(a)mips.com:
- Simplify the addition of -mno-fix-loongson3-llsc to cflags, and add
a comment describing why it's there.
- Make loongson_llsc_mb() a no-op when
CONFIG_CPU_LOONGSON3_WORKAROUNDS=n, rather than a compiler memory
barrier.
- Add a comment describing the bug & how loongson_llsc_mb() helps
in asm/barrier.h.]
Signed-off-by: Paul Burton <paul.burton(a)mips.com>
Signed-off-by: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Cc: Ralf Baechle <ralf(a)linux-mips.org>
Cc: ambrosehua(a)gmail.com
Cc: Steven J . Hill <Steven.Hill(a)cavium.com>
Cc: linux-mips(a)linux-mips.org
Cc: Fuxin Zhang <zhangfx(a)lemote.com>
Cc: Zhangjin Wu <wuzhangjin(a)gmail.com>
Cc: Li Xuefeng <lixuefeng(a)loongson.cn>
Cc: Xu Chenghua <xuchenghua(a)loongson.cn>
Cc: stable(a)vger.kernel.org # 4.19
---
Backport to stable according to request from Debian downstream.
---
arch/mips/Kconfig | 15 ++++++++++++++
arch/mips/include/asm/atomic.h | 6 ++++++
arch/mips/include/asm/barrier.h | 36 +++++++++++++++++++++++++++++++++
arch/mips/include/asm/bitops.h | 5 +++++
arch/mips/include/asm/futex.h | 3 +++
arch/mips/include/asm/pgtable.h | 2 ++
arch/mips/loongson64/Platform | 23 +++++++++++++++++++++
arch/mips/mm/tlbex.c | 10 +++++++++
8 files changed, 100 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a830a9701e50..864a2d2fd054 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1398,6 +1398,21 @@ config LOONGSON3_ENHANCEMENT
please say 'N' here. If you want a high-performance kernel to run on
new Loongson 3 machines only, please say 'Y' here.
+config CPU_LOONGSON3_WORKAROUNDS
+ bool "Old Loongson 3 LLSC Workarounds"
+ default y if SMP
+ depends on CPU_LOONGSON3
+ help
+ Loongson 3 processors have the llsc issues which require workarounds.
+ Without workarounds the system may hang unexpectedly.
+
+ Newer Loongson 3 will fix these issues and no workarounds are needed.
+ The workarounds have no significant side effect on them but may
+ decrease the performance of the system so this option should be
+ disabled unless the kernel is intended to be run on old systems.
+
+ If unsure, please say Y.
+
config CPU_LOONGSON2E
bool "Loongson 2E"
depends on SYS_HAS_CPU_LOONGSON2E
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 9e805317847d..1fc6f04e85a1 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
if (kernel_uses_llsc) { \
int temp; \
\
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
"1: ll %0, %1 # atomic_" #op " \n" \
@@ -84,6 +85,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
if (kernel_uses_llsc) { \
int temp; \
\
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
"1: ll %1, %2 # atomic_" #op "_return \n" \
@@ -116,6 +118,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
if (kernel_uses_llsc) { \
int temp; \
\
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
"1: ll %1, %2 # atomic_fetch_" #op " \n" \
@@ -251,6 +254,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
if (kernel_uses_llsc) { \
long temp; \
\
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
"1: lld %0, %1 # atomic64_" #op " \n" \
@@ -277,6 +281,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
if (kernel_uses_llsc) { \
long temp; \
\
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
"1: lld %1, %2 # atomic64_" #op "_return\n" \
@@ -309,6 +314,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
if (kernel_uses_llsc) { \
long temp; \
\
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
"1: lld %1, %2 # atomic64_fetch_" #op "\n" \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index a5eb1bb199a7..b7f6ac5e513c 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -222,6 +222,42 @@
#define __smp_mb__before_atomic() __smp_mb__before_llsc()
#define __smp_mb__after_atomic() smp_llsc_mb()
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or pref) in between an ll & sc can cause the sc instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the ll in program order may actually
+ * be executed after the ll - this is the reordering case.
+ *
+ * In order to avoid this we need to place a memory barrier (ie. a sync
+ * instruction) prior to every ll instruction, in between it & any earlier
+ * memory access instructions. Many of these cases are already covered by
+ * smp_mb__before_llsc() but for the remaining cases, typically ones in
+ * which multiple CPUs may operate on a memory location but ordering is not
+ * usually guaranteed, we use loongson_llsc_mb() below.
+ *
+ * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an ll & sc with a target outside
+ * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
+ * or similar, then misprediction of the branch may allow speculative
+ * execution of memory accesses from outside of the ll-sc loop.
+ *
+ * In order to avoid this we need a memory barrier (ie. a sync instruction)
+ * at each affected branch target, for which we also use loongson_llsc_mb()
+ * defined below.
+ *
+ * This case affects all current Loongson 3 CPUs.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
+#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#else
+#define loongson_llsc_mb() do { } while (0)
+#endif
+
#include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index da1b8718861e..2a40ecd69ac4 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -68,6 +68,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" " __LL "%0, %1 # set_bit \n"
@@ -78,6 +79,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
} while (unlikely(!temp));
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
} else if (kernel_uses_llsc) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -120,6 +122,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
: "ir" (~(1UL << bit)));
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" " __LL "%0, %1 # clear_bit \n"
@@ -130,6 +133,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
} while (unlikely(!temp));
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
} else if (kernel_uses_llsc) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -188,6 +192,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set "MIPS_ISA_ARCH_LEVEL" \n"
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index a9e61ea54ca9..0a62a91b592d 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -50,6 +50,7 @@
"i" (-EFAULT) \
: "memory"); \
} else if (cpu_has_llsc) { \
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
@@ -162,6 +163,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
+ loongson_llsc_mb();
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
" .set push \n"
@@ -190,6 +192,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory");
+ loongson_llsc_mb();
} else
return -ENOSYS;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 129e0328367f..6a35bbf46b93 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -229,6 +229,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
: [global] "r" (page_global));
} else if (kernel_uses_llsc) {
+ loongson_llsc_mb();
__asm__ __volatile__ (
" .set "MIPS_ISA_ARCH_LEVEL" \n"
" .set push \n"
@@ -244,6 +245,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
" .set mips0 \n"
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
: [global] "r" (page_global));
+ loongson_llsc_mb();
}
#else /* !CONFIG_SMP */
if (pte_none(*buddy))
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 12abf14aed4a..9f79908f5063 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+
+#
+# Some versions of binutils, not currently mainline as of 2019/02/04, support
+# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
+# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
+# description).
+#
+# We disable this in order to prevent the assembler meddling with the
+# instruction that labels refer to, ie. if we label an ll instruction:
+#
+# 1: ll v0, 0(a0)
+#
+# ...then with the assembler fix applied the label may actually point at a sync
+# instruction inserted by the assembler, and if we were using the label in an
+# exception table the table would no longer contain the address of the ll
+# instruction.
+#
+# Avoid this by explicitly disabling that assembler behaviour. If upstream
+# binutils does not merge support for the flag then we can revisit & remove
+# this later - for now it ensures vendor toolchains don't cause problems.
+#
+cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
+
#
# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
# as MIPS64 R2; older versions as just R1. This leaves the possibility open
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 620abc968624..c99c53ba8338 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -943,6 +943,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
* to mimic that here by taking a load/istream page
* fault.
*/
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(p, 0);
UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
uasm_i_jr(p, ptr);
@@ -1666,6 +1668,8 @@ static void
iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
{
#ifdef CONFIG_SMP
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(p, 0);
# ifdef CONFIG_PHYS_ADDR_T_64BIT
if (cpu_has_64bits)
uasm_i_lld(p, pte, 0, ptr);
@@ -2279,6 +2283,8 @@ static void build_r4000_tlb_load_handler(void)
#endif
uasm_l_nopage_tlbl(&l, p);
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(&p, 0);
build_restore_work_registers(&p);
#ifdef CONFIG_CPU_MICROMIPS
if ((unsigned long)tlb_do_page_fault_0 & 1) {
@@ -2333,6 +2339,8 @@ static void build_r4000_tlb_store_handler(void)
#endif
uasm_l_nopage_tlbs(&l, p);
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(&p, 0);
build_restore_work_registers(&p);
#ifdef CONFIG_CPU_MICROMIPS
if ((unsigned long)tlb_do_page_fault_1 & 1) {
@@ -2388,6 +2396,8 @@ static void build_r4000_tlb_modify_handler(void)
#endif
uasm_l_nopage_tlbm(&l, p);
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(&p, 0);
build_restore_work_registers(&p);
#ifdef CONFIG_CPU_MICROMIPS
if ((unsigned long)tlb_do_page_fault_1 & 1) {
--
2.28.0.rc1
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From eec13b42d41b0f3339dcf0c4da43734427c68620 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Thu, 18 Jun 2020 11:16:45 +0100
Subject: [PATCH] ARM: 8986/1: hw_breakpoint: Don't invoke overflow handler on
uaccess watchpoints
Unprivileged memory accesses generated by the so-called "translated"
instructions (e.g. LDRT) in kernel mode can cause user watchpoints to fire
unexpectedly. In such cases, the hw_breakpoint logic will invoke the user
overflow handler which will typically raise a SIGTRAP back to the current
task. This is futile when returning back to the kernel because (a) the
signal won't have been delivered and (b) userspace can't handle the thing
anyway.
Avoid invoking the user overflow handler for watchpoints triggered by
kernel uaccess routines, and instead single-step over the faulting
instruction as we would if no overflow handler had been installed.
Cc: <stable(a)vger.kernel.org>
Fixes: f81ef4a920c8 ("ARM: 6356/1: hw-breakpoint: add ARM backend for the hw-breakpoint framework")
Reported-by: Luis Machado <luis.machado(a)linaro.org>
Tested-by: Luis Machado <luis.machado(a)linaro.org>
Signed-off-by: Will Deacon <will(a)kernel.org>
Signed-off-by: Russell King <rmk+kernel(a)armlinux.org.uk>
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 02ca7adf5375..7fff88e61252 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -683,6 +683,12 @@ static void disable_single_step(struct perf_event *bp)
arch_install_hw_breakpoint(bp);
}
+static int watchpoint_fault_on_uaccess(struct pt_regs *regs,
+ struct arch_hw_breakpoint *info)
+{
+ return !user_mode(regs) && info->ctrl.privilege == ARM_BREAKPOINT_USER;
+}
+
static void watchpoint_handler(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
@@ -742,16 +748,27 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
}
pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+
+ /*
+ * If we triggered a user watchpoint from a uaccess routine,
+ * then handle the stepping ourselves since userspace really
+ * can't help us with this.
+ */
+ if (watchpoint_fault_on_uaccess(regs, info))
+ goto step;
+
perf_bp_event(wp, regs);
/*
- * If no overflow handler is present, insert a temporary
- * mismatch breakpoint so we can single-step over the
- * watchpoint trigger.
+ * Defer stepping to the overflow handler if one is installed.
+ * Otherwise, insert a temporary mismatch breakpoint so that
+ * we can single-step over the watchpoint trigger.
*/
- if (is_default_overflow_handler(wp))
- enable_single_step(wp, instruction_pointer(regs));
+ if (!is_default_overflow_handler(wp))
+ goto unlock;
+step:
+ enable_single_step(wp, instruction_pointer(regs));
unlock:
rcu_read_unlock();
}