Errors returned by crypto_shash_update() are not checked in
ima_calc_boot_aggregate_tfm() and thus can be overwritten at the next
iteration of the loop. This patch adds a check after calling
crypto_shash_update() and returns immediately if the result is not zero.
Cc: stable(a)vger.kernel.org
Fixes: 3323eec921efd ("integrity: IMA as an integrity service provider")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/integrity/ima/ima_crypto.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 220b14920c37..47897fbae6c6 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -829,6 +829,8 @@ static int ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id,
/* now accumulate with current aggregate */
rc = crypto_shash_update(shash, d.digest,
crypto_shash_digestsize(tfm));
+ if (rc != 0)
+ return rc;
}
if (!rc)
crypto_shash_final(shash, digest);
--
2.17.1
Upstream commits fdfe7cbd5880 ("KVM: Pass MMU notifier range flags to
kvm_unmap_hva_range()") and b5331379bc62 ("KVM: arm64: Only reschedule
if MMU_NOTIFIER_RANGE_BLOCKABLE is not set") fix a "sleeping from invalid
context" BUG caused by unmap_stage2_range() attempting to reschedule when
called on the OOM path.
Unfortunately, these patches rely on the MMU notifier callback being
passed knowledge about whether or not blocking is permitted, which was
introduced in 4.19. Rather than backport this considerable amount of
infrastructure just for KVM on arm, instead just remove the conditional
reschedule.
Cc: <stable(a)vger.kernel.org> # v4.14 only
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
---
virt/kvm/arm/mmu.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 3814cdad643a..7fe673248e98 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -307,12 +307,6 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
next = stage2_pgd_addr_end(addr, end);
if (!stage2_pgd_none(*pgd))
unmap_stage2_puds(kvm, pgd, addr, next);
- /*
- * If the range is too large, release the kvm->mmu_lock
- * to prevent starvation and lockup detector warnings.
- */
- if (next != end)
- cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
--
2.28.0.297.g1956fa8f8d-goog
Upstream commits fdfe7cbd5880 ("KVM: Pass MMU notifier range flags to
kvm_unmap_hva_range()") and b5331379bc62 ("KVM: arm64: Only reschedule
if MMU_NOTIFIER_RANGE_BLOCKABLE is not set") fix a "sleeping from invalid
context" BUG caused by unmap_stage2_range() attempting to reschedule when
called on the OOM path.
Unfortunately, these patches rely on the MMU notifier callback being
passed knowledge about whether or not blocking is permitted, which was
introduced in 4.19. Rather than backport this considerable amount of
infrastructure just for KVM on arm, instead just remove the conditional
reschedule.
Cc: <stable(a)vger.kernel.org> # v4.9 only
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
---
arch/arm/kvm/mmu.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bb0d5e21d60b..b5ce1e81f945 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -298,12 +298,6 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
next = stage2_pgd_addr_end(addr, end);
if (!stage2_pgd_none(*pgd))
unmap_stage2_puds(kvm, pgd, addr, next);
- /*
- * If the range is too large, release the kvm->mmu_lock
- * to prevent starvation and lockup detector warnings.
- */
- if (next != end)
- cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
--
2.28.0.297.g1956fa8f8d-goog
From: Dirk Behme <dirk.behme(a)de.bosch.com>
The i2c-rcar driver utilizes the Generic Reset Controller kernel
feature, so select the RESET_CONTROLLER option when the I2C_RCAR
option is selected.
Fixes: 2b16fd63059ab9 ("i2c: rcar: handle RXDMA HW behaviour on Gen3")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Signed-off-by: Andy Lowe <andy_lowe(a)mentor.com>
Signed-off-by: Eugeniu Rosca <erosca(a)de.adit-jv.com>
---
drivers/i2c/busses/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 293e7a0760e7..bb80f0944d61 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1181,6 +1181,7 @@ config I2C_RCAR
tristate "Renesas R-Car I2C Controller"
depends on ARCH_RENESAS || COMPILE_TEST
select I2C_SLAVE
+ select RESET_CONTROLLER
help
If you say yes to this option, support will be included for the
R-Car I2C controller.
--
2.28.0
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From fdfe7cbd58806522e799e2a50a15aee7f2cbb7b6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:24 +0100
Subject: [PATCH] KVM: Pass MMU notifier range flags to kvm_unmap_hva_range()
The 'flags' field of 'struct mmu_notifier_range' is used to indicate
whether invalidate_range_{start,end}() are permitted to block. In the
case of kvm_mmu_notifier_invalidate_range_start(), this field is not
forwarded on to the architecture-specific implementation of
kvm_unmap_hva_range() and therefore the backend cannot sensibly decide
whether or not to block.
Add an extra 'flags' parameter to kvm_unmap_hva_range() so that
architectures are aware as to whether or not they are permitted to block.
Cc: <stable(a)vger.kernel.org>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-2-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 65568b23868a..e52c927aade5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -473,7 +473,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, unsigned flags);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0121ef2c7c8d..dc351802ff18 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -2213,7 +2213,7 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat
}
int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end, unsigned flags)
{
if (!kvm->arch.mmu.pgd)
return 0;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index d35eaed1668f..825d337a505a 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -969,7 +969,7 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, unsigned flags);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 87fa8d8a1031..28c366d307e7 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -486,7 +486,8 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
return 1;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e020d269416d..10ded83414de 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -58,7 +58,8 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER
extern int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end,
+ unsigned flags);
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 41fedec69ac3..49db50d1db04 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -834,7 +834,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index d6c1069e9954..ed0c9c43d0cf 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
/* kvm_unmap_hva flushes everything anyways */
kvm_unmap_hva(kvm, start);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5ab3af7275d8..5303dbc5c9bc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1596,7 +1596,8 @@ asmlinkage void kvm_spurious_fault(void);
_ASM_EXTABLE(666b, 667b)
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4e03841f053d..a5d0207e7189 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1916,7 +1916,8 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2c2c0254c2d8..4eaa4e46c7d0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -482,7 +482,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
- need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end);
+ need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
+ range->flags);
need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
It looks like that this GPU core triggers an abort when
reading VIVS_HI_CHIP_PRODUCT_ID and/or VIVS_HI_CHIP_CUSTOMER_ID.
I looked at different versions of Vivante's kernel driver and did
not found anything about this issue or what feature flag can be
used. So go the simplest route and do not read these two registers
on the affected GPU core.
Signed-off-by: Christian Gmeiner <christian.gmeiner(a)gmail.com>
Reported-by: Josua Mayer <josua.mayer(a)jm0.eu>
Fixes: 815e45bbd4d3 ("drm/etnaviv: determine product, customer and eco id")
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d5a4cd85a0f6..d3906688c2b3 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -337,10 +337,17 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
gpu->identity.revision = gpu_read(gpu, VIVS_HI_CHIP_REV);
- gpu->identity.product_id = gpu_read(gpu, VIVS_HI_CHIP_PRODUCT_ID);
- gpu->identity.customer_id = gpu_read(gpu, VIVS_HI_CHIP_CUSTOMER_ID);
gpu->identity.eco_id = gpu_read(gpu, VIVS_HI_CHIP_ECO_ID);
+ /*
+ * Reading these two registers on GC600 rev 0x19 result in a
+ * unhandled fault: external abort on non-linefetch
+ */
+ if (!etnaviv_is_model_rev(gpu, GC600, 0x19)) {
+ gpu->identity.product_id = gpu_read(gpu, VIVS_HI_CHIP_PRODUCT_ID);
+ gpu->identity.customer_id = gpu_read(gpu, VIVS_HI_CHIP_CUSTOMER_ID);
+ }
+
/*
* !!!! HACK ALERT !!!!
* Because people change device IDs without letting software
--
2.26.2
As the error capture will compress user buffers as directed to by the
user, it can take an arbitrary amount of time and space. Break up the
compression loops with a call to cond_resched(), that will allow other
processes to schedule (avoiding the soft lockups) and also serve as a
warning should we try to make this loop atomic in the future.
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6a3a2ce0b394..6551ff04d5a6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -311,6 +311,8 @@ static int compress_page(struct i915_vma_compress *c,
if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
return -EIO;
+
+ cond_resched();
} while (zstream->avail_in);
/* Fallback to uncompressed if we increase size? */
@@ -397,6 +399,7 @@ static int compress_page(struct i915_vma_compress *c,
if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
memcpy(ptr, src, PAGE_SIZE);
dst->pages[dst->page_count++] = ptr;
+ cond_resched();
return 0;
}
--
2.20.1
It looks like that this GPU core triggers an abort when
reading VIVS_HI_CHIP_PRODUCT_ID and/or VIVS_HI_CHIP_ECO_ID.
I looked at different versions of Vivante's kernel driver and did
not found anything about this issue or what feature flag can be
used. So go the simplest route and do not read these two registers
on the affected GPU core.
Signed-off-by: Christian Gmeiner <christian.gmeiner(a)gmail.com>
Reported-by: Josua Mayer <josua.mayer(a)jm0.eu>
Fixes: 815e45bbd4d3 ("drm/etnaviv: determine product, customer and eco id")
Cc: stable(a)vger.kernel.org
---
Changelog:
V2:
- use correct register for conditional reads.
---
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d5a4cd85a0f6..c6404b8d067f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -337,9 +337,16 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
gpu->identity.revision = gpu_read(gpu, VIVS_HI_CHIP_REV);
- gpu->identity.product_id = gpu_read(gpu, VIVS_HI_CHIP_PRODUCT_ID);
gpu->identity.customer_id = gpu_read(gpu, VIVS_HI_CHIP_CUSTOMER_ID);
- gpu->identity.eco_id = gpu_read(gpu, VIVS_HI_CHIP_ECO_ID);
+
+ /*
+ * Reading these two registers on GC600 rev 0x19 result in a
+ * unhandled fault: external abort on non-linefetch
+ */
+ if (!etnaviv_is_model_rev(gpu, GC600, 0x19)) {
+ gpu->identity.product_id = gpu_read(gpu, VIVS_HI_CHIP_PRODUCT_ID);
+ gpu->identity.eco_id = gpu_read(gpu, VIVS_HI_CHIP_ECO_ID);
+ }
/*
* !!!! HACK ALERT !!!!
--
2.26.2
From: Ding Hui <dinghui(a)sangfor.com.cn>
Some device drivers call libusb_clear_halt when target ep queue
is not empty. (eg. spice client connected to qemu for usb redir)
Before commit f5249461b504 ("xhci: Clear the host side toggle
manually when endpoint is soft reset"), that works well.
But now, we got the error log:
EP not empty, refuse reset
xhci_endpoint_reset failed and left ep_state's EP_SOFT_CLEAR_TOGGLE
bit still set
So all the subsequent urb sumbits to the ep will fail with the
warn log:
Can't enqueue URB while manually clearing toggle
We need to clear ep_state EP_SOFT_CLEAR_TOGGLE bit after
xhci_endpoint_reset, even if it failed.
Fixes: f5249461b504 ("xhci: Clear the host side toggle manually when endpoint is soft reset"
Cc: stable <stable(a)vger.kernel.org> # v4.17+
Signed-off-by: Ding Hui <dinghui(a)sangfor.com.cn>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 3c41b14ecce7..e9884ae9c77d 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3236,10 +3236,11 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
wait_for_completion(cfg_cmd->completion);
- ep->ep_state &= ~EP_SOFT_CLEAR_TOGGLE;
xhci_free_command(xhci, cfg_cmd);
cleanup:
xhci_free_command(xhci, stop_cmd);
+ if (ep->ep_state & EP_SOFT_CLEAR_TOGGLE)
+ ep->ep_state &= ~EP_SOFT_CLEAR_TOGGLE;
}
static int xhci_check_streams_endpoint(struct xhci_hcd *xhci,
--
2.17.1
This is the start of the stable review cycle for the 4.14.195 release.
There are 50 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 26 Aug 2020 08:23:34 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.195-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.14.195-rc1
Stephen Boyd <sboyd(a)kernel.org>
clk: Evict unregistered clks from parent caches
Juergen Gross <jgross(a)suse.com>
xen: don't reschedule in preemption off sections
Peter Xu <peterx(a)redhat.com>
mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible
Al Viro <viro(a)zeniv.linux.org.uk>
do_epoll_ctl(): clean the failure exits up a bit
Marc Zyngier <maz(a)kernel.org>
epoll: Keep a reference on files added to the check list
Vasant Hegde <hegdevasant(a)linux.vnet.ibm.com>
powerpc/pseries: Do not initiate shutdown when system is running on UPS
Tom Rix <trix(a)redhat.com>
net: dsa: b53: check for timeout
Haiyang Zhang <haiyangz(a)microsoft.com>
hv_netvsc: Fix the queue_mapping in netvsc_vf_xmit()
Jiri Wiesner <jwiesner(a)suse.com>
bonding: fix active-backup failover for current ARP slave
Alex Williamson <alex.williamson(a)redhat.com>
vfio/type1: Add proper error unwind for vfio_iommu_replay()
Dinghao Liu <dinghao.liu(a)zju.edu.cn>
ASoC: intel: Fix memleak in sst_media_open
Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
ASoC: msm8916-wcd-analog: fix register Interrupt offset
Cong Wang <xiyou.wangcong(a)gmail.com>
bonding: fix a potential double-unregister
Jarod Wilson <jarod(a)redhat.com>
bonding: show saner speed for broadcast mode
Fugang Duan <fugang.duan(a)nxp.com>
net: fec: correct the error path for regulator disable in probe
Grzegorz Szczurek <grzegorzx.szczurek(a)intel.com>
i40e: Fix crash during removing i40e driver
Przemyslaw Patynowski <przemyslawx.patynowski(a)intel.com>
i40e: Set RX_ONLY mode for unicast promiscuous on VLAN
Eric Sandeen <sandeen(a)redhat.com>
ext4: fix potential negative array index in do_split()
Luc Van Oostenryck <luc.vanoostenryck(a)gmail.com>
alpha: fix annotation of io{read,write}{16,32}be()
Eiichi Tsukata <devel(a)etsukata.com>
xfs: Fix UBSAN null-ptr-deref in xfs_sysfs_init
Mao Wenan <wenan.mao(a)linux.alibaba.com>
virtio_ring: Avoid loop when vq is broken in virtqueue_poll
Javed Hasan <jhasan(a)marvell.com>
scsi: libfc: Free skb in fc_disc_gpn_id_resp() for valid cases
Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
cpufreq: intel_pstate: Fix cpuinfo_max_freq when MSR_TURBO_RATIO_LIMIT is 0
Zhe Li <lizhe67(a)huawei.com>
jffs2: fix UAF problem
Darrick J. Wong <darrick.wong(a)oracle.com>
xfs: fix inode quota reservation checks
Greg Ungerer <gerg(a)linux-m68k.org>
m68knommu: fix overwriting of bits in ColdFire V3 cache control
Xiongfeng Wang <wangxiongfeng2(a)huawei.com>
Input: psmouse - add a newline when printing 'proto' by sysfs
Evgeny Novikov <novikov(a)ispras.ru>
media: vpss: clean up resources in init
Huacai Chen <chenhc(a)lemote.com>
rtc: goldfish: Enable interrupt in set_alarm() when necessary
Chuhong Yuan <hslester96(a)gmail.com>
media: budget-core: Improve exception handling in budget_register()
Stanley Chu <stanley.chu(a)mediatek.com>
scsi: ufs: Add DELAY_BEFORE_LPM quirk for Micron devices
Lukas Wunner <lukas(a)wunner.de>
spi: Prevent adding devices below an unregistering controller
Yang Shi <shy828301(a)gmail.com>
mm/memory.c: skip spurious TLB flush for retried page fault
zhangyi (F) <yi.zhang(a)huawei.com>
jbd2: add the missing unlock_buffer() in the error path of jbd2_write_superblock()
Jan Kara <jack(a)suse.cz>
ext4: fix checking of directory entry validity for inline directories
Charan Teja Reddy <charante(a)codeaurora.org>
mm, page_alloc: fix core hung in free_pcppages_bulk()
Doug Berger <opendmb(a)gmail.com>
mm: include CMA pages in lowmem_reserve at boot
Wei Yongjun <weiyongjun1(a)huawei.com>
kernel/relay.c: fix memleak on destroy relay channel
Jann Horn <jannh(a)google.com>
romfs: fix uninitialized memory leak in romfs_dev_read()
Josef Bacik <josef(a)toxicpanda.com>
btrfs: sysfs: use NOFS for device creation
Qu Wenruo <wqu(a)suse.com>
btrfs: inode: fix NULL pointer dereference if inode doesn't need compression
Nikolay Borisov <nborisov(a)suse.com>
btrfs: Move free_pages_out label in inline extent handling branch in compress_file_range
Josef Bacik <josef(a)toxicpanda.com>
btrfs: don't show full path of bind mounts in subvol=
Marcos Paulo de Souza <mpdesouza(a)suse.com>
btrfs: export helpers for subvolume name/id resolution
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc: Allow 4224 bytes of stack expansion for the signal frame
Christophe Leroy <christophe.leroy(a)c-s.fr>
powerpc/mm: Only read faulting instruction when necessary in do_page_fault()
Hugh Dickins <hughd(a)google.com>
khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
Hugh Dickins <hughd(a)google.com>
khugepaged: khugepaged_test_exit() check mmget_still_valid()
Masami Hiramatsu <mhiramat(a)kernel.org>
perf probe: Fix memory leakage when the probe point is not found
Chris Wilson <chris(a)chris-wilson.co.uk>
drm/vgem: Replace opencoded version of drm_gem_dumb_map_offset()
-------------
Diffstat:
Makefile | 4 +-
arch/alpha/include/asm/io.h | 8 +--
arch/m68k/include/asm/m53xxacr.h | 6 +-
arch/powerpc/mm/fault.c | 55 ++++++++++++------
arch/powerpc/platforms/pseries/ras.c | 1 -
drivers/clk/clk.c | 52 +++++++++++++----
drivers/cpufreq/intel_pstate.c | 1 +
drivers/gpu/drm/vgem/vgem_drv.c | 27 ---------
drivers/input/mouse/psmouse-base.c | 2 +-
drivers/media/pci/ttpci/budget-core.c | 11 +++-
drivers/media/platform/davinci/vpss.c | 20 +++++--
drivers/net/bonding/bond_main.c | 42 ++++++++++++--
drivers/net/dsa/b53/b53_common.c | 2 +
drivers/net/ethernet/freescale/fec_main.c | 4 +-
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 2 +-
drivers/net/ethernet/intel/i40e/i40e_common.c | 35 ++++++++---
drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +
drivers/net/hyperv/netvsc_drv.c | 2 +-
drivers/rtc/rtc-goldfish.c | 1 +
drivers/scsi/libfc/fc_disc.c | 12 +++-
drivers/scsi/ufs/ufs_quirks.h | 1 +
drivers/scsi/ufs/ufshcd.c | 2 +
drivers/spi/Kconfig | 3 +
drivers/spi/spi.c | 21 ++++++-
drivers/vfio/vfio_iommu_type1.c | 71 +++++++++++++++++++++--
drivers/virtio/virtio_ring.c | 3 +
drivers/xen/preempt.c | 2 +-
fs/btrfs/ctree.h | 2 +
fs/btrfs/export.c | 8 +--
fs/btrfs/export.h | 5 ++
fs/btrfs/inode.c | 23 +++++---
fs/btrfs/super.c | 18 ++++--
fs/btrfs/sysfs.c | 4 ++
fs/eventpoll.c | 19 +++---
fs/ext4/namei.c | 22 +++++--
fs/jbd2/journal.c | 4 +-
fs/jffs2/dir.c | 6 +-
fs/romfs/storage.c | 4 +-
fs/xfs/xfs_sysfs.h | 6 +-
fs/xfs/xfs_trans_dquot.c | 2 +-
kernel/relay.c | 1 +
mm/hugetlb.c | 24 ++++----
mm/khugepaged.c | 7 +--
mm/memory.c | 3 +
mm/page_alloc.c | 7 ++-
sound/soc/codecs/msm8916-wcd-analog.c | 4 +-
sound/soc/intel/atom/sst-mfld-platform-pcm.c | 5 +-
tools/perf/util/probe-finder.c | 2 +-
48 files changed, 403 insertions(+), 166 deletions(-)
PNY Pro Elite USB 3.1 Gen 2 device (SSD) doesn't respond to ATA_12
pass-through command (i.e. it just hangs). If it doesn't support this
command, it should respond properly to the host. Let's just add a quirk
to be able to move forward with other operations.
Cc: stable(a)vger.kernel.org
Signed-off-by: Thinh Nguyen <thinhn(a)synopsys.com>
---
drivers/usb/storage/unusual_uas.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 162b09d69f62..971f8a4354c8 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -80,6 +80,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_BROKEN_FUA),
+/* Reported-by: Thinh Nguyen <thinhn(a)synopsys.com> */
+UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999,
+ "PNY",
+ "Pro Elite SSD",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_NO_ATA_1X),
+
/* Reported-by: Hans de Goede <hdegoede(a)redhat.com> */
UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
"VIA",
base-commit: d5643d2249b279077427b2c2b2ffae9b70c95b0b
--
2.28.0
On 04/08/2020 16:13, Adam Ford wrote:
>
>
> On Thu, Jul 9, 2020 at 7:12 AM Adam Ford <aford173(a)gmail.com <mailto:aford173@gmail.com>> wrote:
>
> There appears to be a timing issue where using a divider of 32 breaks
> the DSS for OMAP36xx despite the TRM stating 32 is a valid
> number. Through experimentation, it appears that 31 works.
>
> This same fix was issued for kernels 4.5+. However, between
> kernels 4.4 and 4.5, the directory structure was changed when the
> dss directory was moved inside the omapfb directory. That broke the
> patch on kernels older than 4.5, because it didn't permit the patch
> to apply cleanly for 4.4 and older.
>
> A similar patch was applied to the 3.16 kernel already, but not to 4.4.
> Commit 4b911101a5cd ("drm/omap: fix max fclk divider for omap36xx") is
> on the 3.16 stable branch with notes from Ben about the path change.
>
> Since this was applied for 3.16 already, this patch is for kernels
> 3.17 through 4.4 only.
>
> Fixes: f7018c213502 ("video: move fbdev to drivers/video/fbdev")
>
> Cc: <stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>> #3.17 - 4.4
> CC: <tomi.valkeinen(a)ti.com <mailto:tomi.valkeinen@ti.com>>
> Signed-off-by: Adam Ford <aford173(a)gmail.com <mailto:aford173@gmail.com>>
>
>
> Tomi,
>
> Can you comment on this? The 4.4 is still waiting for this fix. The other branches are fixed.
Looks good to me.
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ti.com>
Tomi
--
Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
For support of long running hypercalls xen_maybe_preempt_hcall() is
calling cond_resched() in case a hypercall marked as preemptible has
been interrupted.
Normally this is no problem, as only hypercalls done via some ioctl()s
are marked to be preemptible. In rare cases when during such a
preemptible hypercall an interrupt occurs and any softirq action is
started from irq_exit(), a further hypercall issued by the softirq
handler will be regarded to be preemptible, too. This might lead to
rescheduling in spite of the softirq handler potentially having set
preempt_disable(), leading to splats like:
BUG: sleeping function called from invalid context at drivers/xen/preempt.c:37
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 20775, name: xl
INFO: lockdep is turned off.
CPU: 1 PID: 20775 Comm: xl Tainted: G D W 5.4.46-1_prgmr_debug.el7.x86_64 #1
Call Trace:
<IRQ>
dump_stack+0x8f/0xd0
___might_sleep.cold.76+0xb2/0x103
xen_maybe_preempt_hcall+0x48/0x70
xen_do_hypervisor_callback+0x37/0x40
RIP: e030:xen_hypercall_xen_version+0xa/0x20
Code: ...
RSP: e02b:ffffc900400dcc30 EFLAGS: 00000246
RAX: 000000000004000d RBX: 0000000000000200 RCX: ffffffff8100122a
RDX: ffff88812e788000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffffff83ee3ad0 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000000 R11: 0000000000000246 R12: ffff8881824aa0b0
R13: 0000000865496000 R14: 0000000865496000 R15: ffff88815d040000
? xen_hypercall_xen_version+0xa/0x20
? xen_force_evtchn_callback+0x9/0x10
? check_events+0x12/0x20
? xen_restore_fl_direct+0x1f/0x20
? _raw_spin_unlock_irqrestore+0x53/0x60
? debug_dma_sync_single_for_cpu+0x91/0xc0
? _raw_spin_unlock_irqrestore+0x53/0x60
? xen_swiotlb_sync_single_for_cpu+0x3d/0x140
? mlx4_en_process_rx_cq+0x6b6/0x1110 [mlx4_en]
? mlx4_en_poll_rx_cq+0x64/0x100 [mlx4_en]
? net_rx_action+0x151/0x4a0
? __do_softirq+0xed/0x55b
? irq_exit+0xea/0x100
? xen_evtchn_do_upcall+0x2c/0x40
? xen_do_hypervisor_callback+0x29/0x40
</IRQ>
? xen_hypercall_domctl+0xa/0x20
? xen_hypercall_domctl+0x8/0x20
? privcmd_ioctl+0x221/0x990 [xen_privcmd]
? do_vfs_ioctl+0xa5/0x6f0
? ksys_ioctl+0x60/0x90
? trace_hardirqs_off_thunk+0x1a/0x20
? __x64_sys_ioctl+0x16/0x20
? do_syscall_64+0x62/0x250
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
Fix that by testing preempt_count() before calling cond_resched().
In kernel 5.8 this can't happen any more due to the entry code rework
(more than 100 patches, so not a candidate for backporting).
The issue was introduced in kernel 4.3, so this patch should go into
all stable kernels in [4.3 ... 5.7].
Reported-by: Sarah Newman <srn(a)prgmr.com>
Fixes: 0fa2f5cb2b0ecd8 ("sched/preempt, xen: Use need_resched() instead of should_resched()")
Cc: Sarah Newman <srn(a)prgmr.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Juergen Gross <jgross(a)suse.com>
Tested-by: Chris Brannon <cmb(a)prgmr.com>
---
drivers/xen/preempt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
index 17240c5325a3..6ad87b5c95ed 100644
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
asmlinkage __visible void xen_maybe_preempt_hcall(void)
{
if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
- && need_resched())) {
+ && need_resched() && !preempt_count())) {
/*
* Clear flag as we may be rescheduled on a different
* cpu.
--
2.26.2
Dear stable kernel maintainers,
Please consider the attached mbox file, which contains 9 patches which
cherry pick cleanly onto 5.4:
1. commit fcf1b6a35c16 ("Documentation/llvm: add documentation on
building w/ Clang/LLVM")
2. commit 0f44fbc162b7 ("Documentation/llvm: fix the name of llvm-size")
3. commit 63b903dfebde ("net: wan: wanxl: use allow to pass
CROSS_COMPILE_M68k for rebuilding firmware")
4. commit 734f3719d343 ("net: wan: wanxl: use $(M68KCC) instead of
$(M68KAS) for rebuilding firmware")
5. commit eefb8c124fd9 ("x86/boot: kbuild: allow readelf executable to
be specified")
6. commit 94f7345b7124 ("kbuild: remove PYTHON2 variable")
7. commit aa824e0c962b ("kbuild: remove AS variable")
8. commit 7e20e47c70f8 ("kbuild: replace AS=clang with LLVM_IAS=1")
9. commit a0d1c951ef08 ("kbuild: support LLVM=1 to switch the default
tools to Clang/LLVM")
This series improves/simplifies building kernels with Clang and LLVM
utilities; it will help the various CI systems testing kernels built
with Clang+LLVM utilities (in fact I will be pointing to this, if
accepted, next week at plumbers with those CI system maintainers), and
we will make immediate use of it in Android (see also:
https://android-review.googlesource.com/c/platform/prebuilts/clang/host/lin…).
We can always carry it out of tree in Android, but I think the series
is fairly tame, and would prefer not to.
I only particularly care about 5+8+9 (eefb8c124fd9, 7e20e47c70f8, and
a0d1c951ef08), but the rest are required for them to cherry-pick
cleanly. I don't mind separating those three out, though they won't
be clean cherry-picks at that point. It might be good to have
Masahiro review the series. If accepted, I plan to wire up test
coverage of these immediately in
https://github.com/ClangBuiltLinux/continuous-integration/issues/300.
Most of the above landed in v5.7-rc1, with 94f7345b7124 landing in
v5.6-rc1 and eefb8c124fd9 landing in v5.5-rc3.
--
Thanks,
~Nick Desaulniers
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx(a)redhat.com>
Date: Thu, 6 Aug 2020 23:26:11 -0700
Subject: [PATCH] mm/hugetlb: fix calculation of
adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 27556d4d49fe..e52c878940bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
Hi Greg,
Here's the backport for a couple of epoll fixes that don't cleanly
backport to anything older than 5.7. These backports cleanly apply
from 5.4 all the way to 4.4.
Thanks,
M.
Al Viro (1):
do_epoll_ctl(): clean the failure exits up a bit
Marc Zyngier (1):
epoll: Keep a reference on files added to the check list
fs/eventpoll.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
--
2.27.0
From: Michael Ellerman <mpe(a)ellerman.id.au>
commit 63dee5df43a31f3844efabc58972f0a206ca4534 upstream.
We have powerpc specific logic in our page fault handling to decide if
an access to an unmapped address below the stack pointer should expand
the stack VMA.
The code was originally added in 2004 "ported from 2.4". The rough
logic is that the stack is allowed to grow to 1MB with no extra
checking. Over 1MB the access must be within 2048 bytes of the stack
pointer, or be from a user instruction that updates the stack pointer.
The 2048 byte allowance below the stack pointer is there to cover the
288 byte "red zone" as well as the "about 1.5kB" needed by the signal
delivery code.
Unfortunately since then the signal frame has expanded, and is now
4224 bytes on 64-bit kernels with transactional memory enabled. This
means if a process has consumed more than 1MB of stack, and its stack
pointer lies less than 4224 bytes from the next page boundary, signal
delivery will fault when trying to expand the stack and the process
will see a SEGV.
The total size of the signal frame is the size of struct rt_sigframe
(which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
64-bit).
The 2048 byte allowance was correct until 2008 as the signal frame
was:
struct rt_sigframe {
struct ucontext uc; /* 0 1440 */
/* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1440 16 */
unsigned int tramp[6]; /* 1456 24 */
struct siginfo * pinfo; /* 1480 8 */
void * puc; /* 1488 8 */
struct siginfo info; /* 1496 128 */
/* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1624 288 */
/* size: 1920, cachelines: 15, members: 7 */
/* padding: 8 */
};
1920 + 128 = 2048
Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
ptrace and signal support") (Jul 2008) the signal frame expanded to
2304 bytes:
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */ <--
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1696 16 */
unsigned int tramp[6]; /* 1712 24 */
struct siginfo * pinfo; /* 1736 8 */
void * puc; /* 1744 8 */
struct siginfo info; /* 1752 128 */
/* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1880 288 */
/* size: 2176, cachelines: 17, members: 7 */
/* padding: 8 */
};
2176 + 128 = 2304
At this point we should have been exposed to the bug, though as far as
I know it was never reported. I no longer have a system old enough to
easily test on.
Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
grow-down stack segment") caused our stack expansion code to never
trigger, as there was always a VMA found for a write up to PAGE_SIZE
below r1.
That meant the bug was hidden as we continued to expand the signal
frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
state to the signal context") (Feb 2013):
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */ <--
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[288]; /* 3576 288 */
/* size: 3872, cachelines: 31, members: 8 */
/* padding: 8 */
/* last cacheline: 32 bytes */
};
3872 + 128 = 4000
And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
userspace to 512 bytes") (Feb 2014):
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[512]; /* 3576 512 */ <--
/* size: 4096, cachelines: 32, members: 8 */
/* padding: 8 */
};
4096 + 128 = 4224
Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
gap, between vmas") exposed us to the existing bug, because it changed
the stack VMA to be the correct/real size, meaning our stack expansion
code is now triggered.
Fix it by increasing the allowance to 4224 bytes.
Hard-coding 4224 is obviously unsafe against future expansions of the
signal frame in the same way as the existing code. We can't easily use
sizeof() because the signal frame structure is not in a header. We
will either fix that, or rip out all the custom stack expansion
checking logic entirely.
Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
Cc: stable(a)vger.kernel.org # v2.6.27+
Reported-by: Tom Lane <tgl(a)sss.pgh.pa.us>
Tested-by: Daniel Axtens <dja(a)axtens.net>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20200724092528.1578671-2-mpe@ellerman.id.au
Signed-off-by: Daniel Axtens <dja(a)axtens.net>
---
arch/powerpc/mm/fault.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 2791f568bdb2..3e4fb430ae45 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -192,6 +192,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
return MM_FAULT_CONTINUE;
}
+// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
+#define SIGFRAME_MAX_SIZE (4096 + 128)
+
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault. For 400-family processors
@@ -341,7 +344,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
/*
* N.B. The POWER/Open ABI allows programs to access up to
* 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
+ * The kernel signal delivery code writes up to about 4kB
* below the stack pointer (r1) before decrementing it.
* The exec code can write slightly over 640kB to the stack
* before setting the user r1. Thus we allow the stack to
@@ -365,7 +368,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* between the last mapped region and the stack will
* expand the stack rather than segfaulting.
*/
- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+ if (address + SIGFRAME_MAX_SIZE < uregs->gpr[1] && !store_update_sp)
goto bad_area;
}
if (expand_stack(vma, address))
--
2.25.1
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 90a9b102eddf6a3f987d15f4454e26a2532c1c98 Mon Sep 17 00:00:00 2001
From: Vasant Hegde <hegdevasant(a)linux.vnet.ibm.com>
Date: Thu, 20 Aug 2020 11:48:44 +0530
Subject: [PATCH] powerpc/pseries: Do not initiate shutdown when system is
running on UPS
As per PAPR we have to look for both EPOW sensor value and event
modifier to identify the type of event and take appropriate action.
In LoPAPR v1.1 section 10.2.2 includes table 136 "EPOW Action Codes":
SYSTEM_SHUTDOWN 3
The system must be shut down. An EPOW-aware OS logs the EPOW error
log information, then schedules the system to be shut down to begin
after an OS defined delay internal (default is 10 minutes.)
Then in section 10.3.2.2.8 there is table 146 "Platform Event Log
Format, Version 6, EPOW Section", which includes the "EPOW Event
Modifier":
For EPOW sensor value = 3
0x01 = Normal system shutdown with no additional delay
0x02 = Loss of utility power, system is running on UPS/Battery
0x03 = Loss of system critical functions, system should be shutdown
0x04 = Ambient temperature too high
All other values = reserved
We have a user space tool (rtas_errd) on LPAR to monitor for
EPOW_SHUTDOWN_ON_UPS. Once it gets an event it initiates shutdown
after predefined time. It also starts monitoring for any new EPOW
events. If it receives "Power restored" event before predefined time
it will cancel the shutdown. Otherwise after predefined time it will
shutdown the system.
Commit 79872e35469b ("powerpc/pseries: All events of
EPOW_SYSTEM_SHUTDOWN must initiate shutdown") changed our handling of
the "on UPS/Battery" case, to immediately shutdown the system. This
breaks existing setups that rely on the userspace tool to delay
shutdown and let the system run on the UPS.
Fixes: 79872e35469b ("powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown")
Cc: stable(a)vger.kernel.org # v4.0+
Signed-off-by: Vasant Hegde <hegdevasant(a)linux.vnet.ibm.com>
[mpe: Massage change log and add PAPR references]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20200820061844.306460-1-hegdevasant@linux.vnet.ib…
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f3736fcd98fc..13c86a292c6d 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier)
case EPOW_SHUTDOWN_ON_UPS:
pr_emerg("Loss of system power detected. System is running on"
" UPS/battery. Check RTAS error log for details\n");
- orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Wed, 19 Aug 2020 17:12:17 +0100
Subject: [PATCH] epoll: Keep a reference on files added to the check list
When adding a new fd to an epoll, and that this new fd is an
epoll fd itself, we recursively scan the fds attached to it
to detect cycles, and add non-epool files to a "check list"
that gets subsequently parsed.
However, this check list isn't completely safe when deletions
can happen concurrently. To sidestep the issue, make sure that
a struct file placed on the check list sees its f_count increased,
ensuring that a concurrent deletion won't result in the file
disapearing from under our feet.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..196003d9242c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ get_file(epi->ffd.file);
list_add(&epi->ffd.file->f_tfile_llink,
&tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2204,13 +2207,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
clear_tfile_check_list();
goto error_tgt_fput;
}
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error) {
out_del:
list_del(&tf.file->f_tfile_llink);
+ if (!is_file_epoll(tf.file))
+ fput(tf.file);
goto error_tgt_fput;
}
if (is_file_epoll(tf.file)) {
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Wed, 19 Aug 2020 17:12:17 +0100
Subject: [PATCH] epoll: Keep a reference on files added to the check list
When adding a new fd to an epoll, and that this new fd is an
epoll fd itself, we recursively scan the fds attached to it
to detect cycles, and add non-epool files to a "check list"
that gets subsequently parsed.
However, this check list isn't completely safe when deletions
can happen concurrently. To sidestep the issue, make sure that
a struct file placed on the check list sees its f_count increased,
ensuring that a concurrent deletion won't result in the file
disapearing from under our feet.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..196003d9242c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ get_file(epi->ffd.file);
list_add(&epi->ffd.file->f_tfile_llink,
&tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2204,13 +2207,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
clear_tfile_check_list();
goto error_tgt_fput;
}
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error) {
out_del:
list_del(&tf.file->f_tfile_llink);
+ if (!is_file_epoll(tf.file))
+ fput(tf.file);
goto error_tgt_fput;
}
if (is_file_epoll(tf.file)) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Wed, 19 Aug 2020 17:12:17 +0100
Subject: [PATCH] epoll: Keep a reference on files added to the check list
When adding a new fd to an epoll, and that this new fd is an
epoll fd itself, we recursively scan the fds attached to it
to detect cycles, and add non-epool files to a "check list"
that gets subsequently parsed.
However, this check list isn't completely safe when deletions
can happen concurrently. To sidestep the issue, make sure that
a struct file placed on the check list sees its f_count increased,
ensuring that a concurrent deletion won't result in the file
disapearing from under our feet.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..196003d9242c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ get_file(epi->ffd.file);
list_add(&epi->ffd.file->f_tfile_llink,
&tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2204,13 +2207,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
clear_tfile_check_list();
goto error_tgt_fput;
}
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error) {
out_del:
list_del(&tf.file->f_tfile_llink);
+ if (!is_file_epoll(tf.file))
+ fput(tf.file);
goto error_tgt_fput;
}
if (is_file_epoll(tf.file)) {
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Wed, 19 Aug 2020 17:12:17 +0100
Subject: [PATCH] epoll: Keep a reference on files added to the check list
When adding a new fd to an epoll, and that this new fd is an
epoll fd itself, we recursively scan the fds attached to it
to detect cycles, and add non-epool files to a "check list"
that gets subsequently parsed.
However, this check list isn't completely safe when deletions
can happen concurrently. To sidestep the issue, make sure that
a struct file placed on the check list sees its f_count increased,
ensuring that a concurrent deletion won't result in the file
disapearing from under our feet.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..196003d9242c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ get_file(epi->ffd.file);
list_add(&epi->ffd.file->f_tfile_llink,
&tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2204,13 +2207,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
clear_tfile_check_list();
goto error_tgt_fput;
}
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error) {
out_del:
list_del(&tf.file->f_tfile_llink);
+ if (!is_file_epoll(tf.file))
+ fput(tf.file);
goto error_tgt_fput;
}
if (is_file_epoll(tf.file)) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Wed, 19 Aug 2020 17:12:17 +0100
Subject: [PATCH] epoll: Keep a reference on files added to the check list
When adding a new fd to an epoll, and that this new fd is an
epoll fd itself, we recursively scan the fds attached to it
to detect cycles, and add non-epool files to a "check list"
that gets subsequently parsed.
However, this check list isn't completely safe when deletions
can happen concurrently. To sidestep the issue, make sure that
a struct file placed on the check list sees its f_count increased,
ensuring that a concurrent deletion won't result in the file
disapearing from under our feet.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..196003d9242c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ get_file(epi->ffd.file);
list_add(&epi->ffd.file->f_tfile_llink,
&tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2204,13 +2207,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
clear_tfile_check_list();
goto error_tgt_fput;
}
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error) {
out_del:
list_del(&tf.file->f_tfile_llink);
+ if (!is_file_epoll(tf.file))
+ fput(tf.file);
goto error_tgt_fput;
}
if (is_file_epoll(tf.file)) {
From: Michael Ellerman <mpe(a)ellerman.id.au>
commit 63dee5df43a31f3844efabc58972f0a206ca4534 upstream.
We have powerpc specific logic in our page fault handling to decide if
an access to an unmapped address below the stack pointer should expand
the stack VMA.
The code was originally added in 2004 "ported from 2.4". The rough
logic is that the stack is allowed to grow to 1MB with no extra
checking. Over 1MB the access must be within 2048 bytes of the stack
pointer, or be from a user instruction that updates the stack pointer.
The 2048 byte allowance below the stack pointer is there to cover the
288 byte "red zone" as well as the "about 1.5kB" needed by the signal
delivery code.
Unfortunately since then the signal frame has expanded, and is now
4224 bytes on 64-bit kernels with transactional memory enabled. This
means if a process has consumed more than 1MB of stack, and its stack
pointer lies less than 4224 bytes from the next page boundary, signal
delivery will fault when trying to expand the stack and the process
will see a SEGV.
The total size of the signal frame is the size of struct rt_sigframe
(which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
64-bit).
The 2048 byte allowance was correct until 2008 as the signal frame
was:
struct rt_sigframe {
struct ucontext uc; /* 0 1440 */
/* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1440 16 */
unsigned int tramp[6]; /* 1456 24 */
struct siginfo * pinfo; /* 1480 8 */
void * puc; /* 1488 8 */
struct siginfo info; /* 1496 128 */
/* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1624 288 */
/* size: 1920, cachelines: 15, members: 7 */
/* padding: 8 */
};
1920 + 128 = 2048
Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
ptrace and signal support") (Jul 2008) the signal frame expanded to
2304 bytes:
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */ <--
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1696 16 */
unsigned int tramp[6]; /* 1712 24 */
struct siginfo * pinfo; /* 1736 8 */
void * puc; /* 1744 8 */
struct siginfo info; /* 1752 128 */
/* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1880 288 */
/* size: 2176, cachelines: 17, members: 7 */
/* padding: 8 */
};
2176 + 128 = 2304
At this point we should have been exposed to the bug, though as far as
I know it was never reported. I no longer have a system old enough to
easily test on.
Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
grow-down stack segment") caused our stack expansion code to never
trigger, as there was always a VMA found for a write up to PAGE_SIZE
below r1.
That meant the bug was hidden as we continued to expand the signal
frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
state to the signal context") (Feb 2013):
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */ <--
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[288]; /* 3576 288 */
/* size: 3872, cachelines: 31, members: 8 */
/* padding: 8 */
/* last cacheline: 32 bytes */
};
3872 + 128 = 4000
And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
userspace to 512 bytes") (Feb 2014):
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[512]; /* 3576 512 */ <--
/* size: 4096, cachelines: 32, members: 8 */
/* padding: 8 */
};
4096 + 128 = 4224
Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
gap, between vmas") exposed us to the existing bug, because it changed
the stack VMA to be the correct/real size, meaning our stack expansion
code is now triggered.
Fix it by increasing the allowance to 4224 bytes.
Hard-coding 4224 is obviously unsafe against future expansions of the
signal frame in the same way as the existing code. We can't easily use
sizeof() because the signal frame structure is not in a header. We
will either fix that, or rip out all the custom stack expansion
checking logic entirely.
Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
Cc: stable(a)vger.kernel.org # v2.6.27+
Reported-by: Tom Lane <tgl(a)sss.pgh.pa.us>
Tested-by: Daniel Axtens <dja(a)axtens.net>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20200724092528.1578671-2-mpe@ellerman.id.au
Signed-off-by: Daniel Axtens <dja(a)axtens.net>
---
arch/powerpc/mm/fault.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index d1f860ca03ad..101c202c813c 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -192,6 +192,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
return MM_FAULT_CONTINUE;
}
+// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
+#define SIGFRAME_MAX_SIZE (4096 + 128)
+
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault. For 400-family processors
@@ -341,7 +344,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
/*
* N.B. The POWER/Open ABI allows programs to access up to
* 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
+ * The kernel signal delivery code writes up to about 4kB
* below the stack pointer (r1) before decrementing it.
* The exec code can write slightly over 640kB to the stack
* before setting the user r1. Thus we allow the stack to
@@ -365,7 +368,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* between the last mapped region and the stack will
* expand the stack rather than segfaulting.
*/
- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+ if (address + SIGFRAME_MAX_SIZE < uregs->gpr[1] && !store_update_sp)
goto bad_area;
}
if (expand_stack(vma, address))
--
2.25.1
From: Michael Ellerman <mpe(a)ellerman.id.au>
commit 63dee5df43a31f3844efabc58972f0a206ca4534 upstream.
We have powerpc specific logic in our page fault handling to decide if
an access to an unmapped address below the stack pointer should expand
the stack VMA.
The code was originally added in 2004 "ported from 2.4". The rough
logic is that the stack is allowed to grow to 1MB with no extra
checking. Over 1MB the access must be within 2048 bytes of the stack
pointer, or be from a user instruction that updates the stack pointer.
The 2048 byte allowance below the stack pointer is there to cover the
288 byte "red zone" as well as the "about 1.5kB" needed by the signal
delivery code.
Unfortunately since then the signal frame has expanded, and is now
4224 bytes on 64-bit kernels with transactional memory enabled. This
means if a process has consumed more than 1MB of stack, and its stack
pointer lies less than 4224 bytes from the next page boundary, signal
delivery will fault when trying to expand the stack and the process
will see a SEGV.
The total size of the signal frame is the size of struct rt_sigframe
(which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
64-bit).
The 2048 byte allowance was correct until 2008 as the signal frame
was:
struct rt_sigframe {
struct ucontext uc; /* 0 1440 */
/* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1440 16 */
unsigned int tramp[6]; /* 1456 24 */
struct siginfo * pinfo; /* 1480 8 */
void * puc; /* 1488 8 */
struct siginfo info; /* 1496 128 */
/* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1624 288 */
/* size: 1920, cachelines: 15, members: 7 */
/* padding: 8 */
};
1920 + 128 = 2048
Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
ptrace and signal support") (Jul 2008) the signal frame expanded to
2304 bytes:
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */ <--
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1696 16 */
unsigned int tramp[6]; /* 1712 24 */
struct siginfo * pinfo; /* 1736 8 */
void * puc; /* 1744 8 */
struct siginfo info; /* 1752 128 */
/* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1880 288 */
/* size: 2176, cachelines: 17, members: 7 */
/* padding: 8 */
};
2176 + 128 = 2304
At this point we should have been exposed to the bug, though as far as
I know it was never reported. I no longer have a system old enough to
easily test on.
Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
grow-down stack segment") caused our stack expansion code to never
trigger, as there was always a VMA found for a write up to PAGE_SIZE
below r1.
That meant the bug was hidden as we continued to expand the signal
frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
state to the signal context") (Feb 2013):
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */ <--
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[288]; /* 3576 288 */
/* size: 3872, cachelines: 31, members: 8 */
/* padding: 8 */
/* last cacheline: 32 bytes */
};
3872 + 128 = 4000
And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
userspace to 512 bytes") (Feb 2014):
struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[512]; /* 3576 512 */ <--
/* size: 4096, cachelines: 32, members: 8 */
/* padding: 8 */
};
4096 + 128 = 4224
Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
gap, between vmas") exposed us to the existing bug, because it changed
the stack VMA to be the correct/real size, meaning our stack expansion
code is now triggered.
Fix it by increasing the allowance to 4224 bytes.
Hard-coding 4224 is obviously unsafe against future expansions of the
signal frame in the same way as the existing code. We can't easily use
sizeof() because the signal frame structure is not in a header. We
will either fix that, or rip out all the custom stack expansion
checking logic entirely.
Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
Cc: stable(a)vger.kernel.org # v2.6.27+
Reported-by: Tom Lane <tgl(a)sss.pgh.pa.us>
Tested-by: Daniel Axtens <dja(a)axtens.net>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20200724092528.1578671-2-mpe@ellerman.id.au
Signed-off-by: Daniel Axtens <dja(a)axtens.net>
---
arch/powerpc/mm/fault.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5fc8a010fdf0..b990415d4922 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -227,6 +227,9 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
return is_exec || (address >= TASK_SIZE);
}
+// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
+#define SIGFRAME_MAX_SIZE (4096 + 128)
+
static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
struct vm_area_struct *vma,
bool store_update_sp)
@@ -234,7 +237,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
/*
* N.B. The POWER/Open ABI allows programs to access up to
* 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
+ * The kernel signal delivery code writes a bit over 4KB
* below the stack pointer (r1) before decrementing it.
* The exec code can write slightly over 640kB to the stack
* before setting the user r1. Thus we allow the stack to
@@ -258,7 +261,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
* between the last mapped region and the stack will
* expand the stack rather than segfaulting.
*/
- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+ if (address + SIGFRAME_MAX_SIZE < uregs->gpr[1] && !store_update_sp)
return true;
}
return false;
--
2.25.1
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b711d4eaf0c408a811311ee3e94d6e9e5a230a9a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Sun, 16 Aug 2020 08:23:05 -0700
Subject: [PATCH] io_uring: find and cancel head link async work on files exit
Commit f254ac04c874 ("io_uring: enable lookup of links holding inflight files")
only handled 2 out of the three head link cases we have, we also need to
lookup and cancel work that is blocked in io-wq if that work has a link
that's holding a reference to the files structure.
Put the "cancel head links that hold this request pending" logic into
io_attempt_cancel(), which will to through the motions of finding and
canceling head links that hold the current inflight files stable request
pending.
Cc: stable(a)vger.kernel.org
Reported-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index dc506b75659c..346a3eb84785 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8063,6 +8063,33 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx,
return found;
}
+static bool io_cancel_link_cb(struct io_wq_work *work, void *data)
+{
+ return io_match_link(container_of(work, struct io_kiocb, work), data);
+}
+
+static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+ enum io_wq_cancel cret;
+
+ /* cancel this particular work, if it's running */
+ cret = io_wq_cancel_work(ctx->io_wq, &req->work);
+ if (cret != IO_WQ_CANCEL_NOTFOUND)
+ return;
+
+ /* find links that hold this pending, cancel those */
+ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true);
+ if (cret != IO_WQ_CANCEL_NOTFOUND)
+ return;
+
+ /* if we have a poll link holding this pending, cancel that */
+ if (io_poll_remove_link(ctx, req))
+ return;
+
+ /* final option, timeout link is holding this req pending */
+ io_timeout_remove_link(ctx, req);
+}
+
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
@@ -8116,10 +8143,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
continue;
}
} else {
- io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
- /* could be a link, check and remove if it is */
- if (!io_poll_remove_link(ctx, cancel_req))
- io_timeout_remove_link(ctx, cancel_req);
+ /* cancel this request, or head link requests */
+ io_attempt_cancel(ctx, cancel_req);
io_put_req(cancel_req);
}
This is a note to let you know that I've just added the patch titled
xhci: Always restore EP_SOFT_CLEAR_TOGGLE even if ep reset failed
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From f1ec7ae6c9f8c016db320e204cb519a1da1581b8 Mon Sep 17 00:00:00 2001
From: Ding Hui <dinghui(a)sangfor.com.cn>
Date: Fri, 21 Aug 2020 12:15:49 +0300
Subject: xhci: Always restore EP_SOFT_CLEAR_TOGGLE even if ep reset failed
Some device drivers call libusb_clear_halt when target ep queue
is not empty. (eg. spice client connected to qemu for usb redir)
Before commit f5249461b504 ("xhci: Clear the host side toggle
manually when endpoint is soft reset"), that works well.
But now, we got the error log:
EP not empty, refuse reset
xhci_endpoint_reset failed and left ep_state's EP_SOFT_CLEAR_TOGGLE
bit still set
So all the subsequent urb sumbits to the ep will fail with the
warn log:
Can't enqueue URB while manually clearing toggle
We need to clear ep_state EP_SOFT_CLEAR_TOGGLE bit after
xhci_endpoint_reset, even if it failed.
Fixes: f5249461b504 ("xhci: Clear the host side toggle manually when endpoint is soft reset")
Cc: stable <stable(a)vger.kernel.org> # v4.17+
Signed-off-by: Ding Hui <dinghui(a)sangfor.com.cn>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200821091549.20556-4-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 3c41b14ecce7..e9884ae9c77d 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3236,10 +3236,11 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
wait_for_completion(cfg_cmd->completion);
- ep->ep_state &= ~EP_SOFT_CLEAR_TOGGLE;
xhci_free_command(xhci, cfg_cmd);
cleanup:
xhci_free_command(xhci, stop_cmd);
+ if (ep->ep_state & EP_SOFT_CLEAR_TOGGLE)
+ ep->ep_state &= ~EP_SOFT_CLEAR_TOGGLE;
}
static int xhci_check_streams_endpoint(struct xhci_hcd *xhci,
--
2.28.0
This is a note to let you know that I've just added the patch titled
xhci: Do warm-reset when both CAS and XDEV_RESUME are set
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 904df64a5f4d5ebd670801d869ca0a6d6a6e8df6 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Fri, 21 Aug 2020 12:15:48 +0300
Subject: xhci: Do warm-reset when both CAS and XDEV_RESUME are set
Sometimes re-plugging a USB device during system sleep renders the device
useless:
[ 173.418345] xhci_hcd 0000:00:14.0: Get port status 2-4 read: 0x14203e2, return 0x10262
...
[ 176.496485] usb 2-4: Waited 2000ms for CONNECT
[ 176.496781] usb usb2-port4: status 0000.0262 after resume, -19
[ 176.497103] usb 2-4: can't resume, status -19
[ 176.497438] usb usb2-port4: logical disconnect
Because PLS equals to XDEV_RESUME, xHCI driver reports U3 to usbcore,
despite of CAS bit is flagged.
So proritize CAS over XDEV_RESUME to let usbcore handle warm-reset for
the port.
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200821091549.20556-3-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-hub.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index c3554e37e09f..4e14e164cb68 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -740,15 +740,6 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci,
{
u32 pls = status_reg & PORT_PLS_MASK;
- /* resume state is a xHCI internal state.
- * Do not report it to usb core, instead, pretend to be U3,
- * thus usb core knows it's not ready for transfer
- */
- if (pls == XDEV_RESUME) {
- *status |= USB_SS_PORT_LS_U3;
- return;
- }
-
/* When the CAS bit is set then warm reset
* should be performed on port
*/
@@ -770,6 +761,16 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci,
*/
pls |= USB_PORT_STAT_CONNECTION;
} else {
+ /*
+ * Resume state is an xHCI internal state. Do not report it to
+ * usb core, instead, pretend to be U3, thus usb core knows
+ * it's not ready for transfer.
+ */
+ if (pls == XDEV_RESUME) {
+ *status |= USB_SS_PORT_LS_U3;
+ return;
+ }
+
/*
* If CAS bit isn't set but the Port is already at
* Compliance Mode, fake a connection so the USB core
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: host: xhci: fix ep context print mismatch in debugfs
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 0077b1b2c8d9ad5f7a08b62fb8524cdb9938388f Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li(a)nxp.com>
Date: Fri, 21 Aug 2020 12:15:47 +0300
Subject: usb: host: xhci: fix ep context print mismatch in debugfs
dci is 0 based and xhci_get_ep_ctx() will do ep index increment to get
the ep context.
[rename dci to ep_index -Mathias]
Cc: stable <stable(a)vger.kernel.org> # v4.15+
Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver")
Signed-off-by: Li Jun <jun.li(a)nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200821091549.20556-2-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-debugfs.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 92e25a62fdb5..c88bffd68742 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -274,7 +274,7 @@ static int xhci_slot_context_show(struct seq_file *s, void *unused)
static int xhci_endpoint_context_show(struct seq_file *s, void *unused)
{
- int dci;
+ int ep_index;
dma_addr_t dma;
struct xhci_hcd *xhci;
struct xhci_ep_ctx *ep_ctx;
@@ -283,9 +283,9 @@ static int xhci_endpoint_context_show(struct seq_file *s, void *unused)
xhci = hcd_to_xhci(bus_to_hcd(dev->udev->bus));
- for (dci = 1; dci < 32; dci++) {
- ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, dci);
- dma = dev->out_ctx->dma + dci * CTX_SIZE(xhci->hcc_params);
+ for (ep_index = 0; ep_index < 31; ep_index++) {
+ ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
+ dma = dev->out_ctx->dma + (ep_index + 1) * CTX_SIZE(xhci->hcc_params);
seq_printf(s, "%pad: %s\n", &dma,
xhci_decode_ep_context(le32_to_cpu(ep_ctx->ep_info),
le32_to_cpu(ep_ctx->ep_info2),
--
2.28.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4501bac0e553bed117b7e1b166d49731caf7260 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak(a)codeaurora.org>
Date: Mon, 10 Aug 2020 12:36:19 +0530
Subject: [PATCH] opp: Enable resources again if they were disabled earlier
dev_pm_opp_set_rate() can now be called with freq = 0 in order
to either drop performance or bandwidth votes or to disable
regulators on platforms which support them.
In such cases, a subsequent call to dev_pm_opp_set_rate() with
the same frequency ends up returning early because 'old_freq == freq'
Instead make it fall through and put back the dropped performance
and bandwidth votes and/or enable back the regulators.
Cc: v5.3+ <stable(a)vger.kernel.org> # v5.3+
Fixes: cd7ea582866f ("opp: Make dev_pm_opp_set_rate() handle freq = 0 to drop performance votes")
Reported-by: Sajida Bhanu <sbhanu(a)codeaurora.org>
Reviewed-by: Sibi Sankar <sibis(a)codeaurora.org>
Reported-by: Matthias Kaehlcke <mka(a)chromium.org>
Tested-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Stephen Boyd <sboyd(a)kernel.org>
Signed-off-by: Rajendra Nayak <rnayak(a)codeaurora.org>
[ Viresh: Don't skip clk_set_rate() and massaged changelog ]
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bdb028c7793d..9668ea04cc80 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -934,10 +934,13 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
/* Return early if nothing to do */
if (old_freq == freq) {
- dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
- __func__, freq);
- ret = 0;
- goto put_opp_table;
+ if (!opp_table->required_opp_tables && !opp_table->regulators &&
+ !opp_table->paths) {
+ dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+ __func__, freq);
+ ret = 0;
+ goto put_opp_table;
+ }
}
/*
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4501bac0e553bed117b7e1b166d49731caf7260 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak(a)codeaurora.org>
Date: Mon, 10 Aug 2020 12:36:19 +0530
Subject: [PATCH] opp: Enable resources again if they were disabled earlier
dev_pm_opp_set_rate() can now be called with freq = 0 in order
to either drop performance or bandwidth votes or to disable
regulators on platforms which support them.
In such cases, a subsequent call to dev_pm_opp_set_rate() with
the same frequency ends up returning early because 'old_freq == freq'
Instead make it fall through and put back the dropped performance
and bandwidth votes and/or enable back the regulators.
Cc: v5.3+ <stable(a)vger.kernel.org> # v5.3+
Fixes: cd7ea582866f ("opp: Make dev_pm_opp_set_rate() handle freq = 0 to drop performance votes")
Reported-by: Sajida Bhanu <sbhanu(a)codeaurora.org>
Reviewed-by: Sibi Sankar <sibis(a)codeaurora.org>
Reported-by: Matthias Kaehlcke <mka(a)chromium.org>
Tested-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Stephen Boyd <sboyd(a)kernel.org>
Signed-off-by: Rajendra Nayak <rnayak(a)codeaurora.org>
[ Viresh: Don't skip clk_set_rate() and massaged changelog ]
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bdb028c7793d..9668ea04cc80 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -934,10 +934,13 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
/* Return early if nothing to do */
if (old_freq == freq) {
- dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
- __func__, freq);
- ret = 0;
- goto put_opp_table;
+ if (!opp_table->required_opp_tables && !opp_table->regulators &&
+ !opp_table->paths) {
+ dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+ __func__, freq);
+ ret = 0;
+ goto put_opp_table;
+ }
}
/*
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From df3ab3cb7eae63c6eb7c9aebcc196a75d59f65dd Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Thu, 16 Jul 2020 10:46:43 +0100
Subject: [PATCH] drm/i915: Provide the perf pmu.module
Rather than manually implement our own module reference counting for perf
pmu events, finally realise that there is a module parameter to struct
pmu for this very purpose.
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200716094643.31410-1-chris@…
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
(cherry picked from commit 27e897beec1c59861f15d4d3562c39ad1143620f)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 28bc5f13ae52..056994224c6b 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -445,8 +445,6 @@ static void i915_pmu_event_destroy(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
drm_WARN_ON(&i915->drm, event->parent);
-
- module_put(THIS_MODULE);
}
static int
@@ -538,10 +536,8 @@ static int i915_pmu_event_init(struct perf_event *event)
if (ret)
return ret;
- if (!event->parent) {
- __module_get(THIS_MODULE);
+ if (!event->parent)
event->destroy = i915_pmu_event_destroy;
- }
return 0;
}
@@ -1130,6 +1126,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
if (!pmu->base.attr_groups)
goto err_attr;
+ pmu->base.module = THIS_MODULE;
pmu->base.task_ctx_nr = perf_invalid_context;
pmu->base.event_init = i915_pmu_event_init;
pmu->base.add = i915_pmu_event_add;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From fdfe7cbd58806522e799e2a50a15aee7f2cbb7b6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:24 +0100
Subject: [PATCH] KVM: Pass MMU notifier range flags to kvm_unmap_hva_range()
The 'flags' field of 'struct mmu_notifier_range' is used to indicate
whether invalidate_range_{start,end}() are permitted to block. In the
case of kvm_mmu_notifier_invalidate_range_start(), this field is not
forwarded on to the architecture-specific implementation of
kvm_unmap_hva_range() and therefore the backend cannot sensibly decide
whether or not to block.
Add an extra 'flags' parameter to kvm_unmap_hva_range() so that
architectures are aware as to whether or not they are permitted to block.
Cc: <stable(a)vger.kernel.org>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-2-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 65568b23868a..e52c927aade5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -473,7 +473,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, unsigned flags);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0121ef2c7c8d..dc351802ff18 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -2213,7 +2213,7 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat
}
int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end, unsigned flags)
{
if (!kvm->arch.mmu.pgd)
return 0;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index d35eaed1668f..825d337a505a 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -969,7 +969,7 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, unsigned flags);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 87fa8d8a1031..28c366d307e7 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -486,7 +486,8 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
return 1;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e020d269416d..10ded83414de 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -58,7 +58,8 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER
extern int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end,
+ unsigned flags);
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 41fedec69ac3..49db50d1db04 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -834,7 +834,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index d6c1069e9954..ed0c9c43d0cf 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
/* kvm_unmap_hva flushes everything anyways */
kvm_unmap_hva(kvm, start);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5ab3af7275d8..5303dbc5c9bc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1596,7 +1596,8 @@ asmlinkage void kvm_spurious_fault(void);
_ASM_EXTABLE(666b, 667b)
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4e03841f053d..a5d0207e7189 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1916,7 +1916,8 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2c2c0254c2d8..4eaa4e46c7d0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -482,7 +482,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
- need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end);
+ need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
+ range->flags);
need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ef3f5830b859604eda8723c26d90ab23edc027a4 Mon Sep 17 00:00:00 2001
From: "zhangyi (F)" <yi.zhang(a)huawei.com>
Date: Sat, 20 Jun 2020 14:19:48 +0800
Subject: [PATCH] jbd2: add the missing unlock_buffer() in the error path of
jbd2_write_superblock()
jbd2_write_superblock() is under the buffer lock of journal superblock
before ending that superblock write, so add a missing unlock_buffer() in
in the error path before submitting buffer.
Fixes: 742b06b5628f ("jbd2: check superblock mapped prior to committing")
Signed-off-by: zhangyi (F) <yi.zhang(a)huawei.com>
Reviewed-by: Ritesh Harjani <riteshh(a)linux.ibm.com>
Cc: stable(a)kernel.org
Link: https://lore.kernel.org/r/20200620061948.2049579-1-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e4944436e733..5493a0da23dd 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1367,8 +1367,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
int ret;
/* Buffer got discarded which means block device got invalidated */
- if (!buffer_mapped(bh))
+ if (!buffer_mapped(bh)) {
+ unlock_buffer(bh);
return -EIO;
+ }
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7303cb5bfe845f7d43cd9b2dbd37dbb266efda9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Fri, 31 Jul 2020 18:21:35 +0200
Subject: [PATCH] ext4: fix checking of directory entry validity for inline
directories
ext4_search_dir() and ext4_generic_delete_entry() can be called both for
standard director blocks and for inline directories stored inside inode
or inline xattr space. For the second case we didn't call
ext4_check_dir_entry() with proper constraints that could result in
accepting corrupted directory entry as well as false positive filesystem
errors like:
EXT4-fs error (device dm-0): ext4_search_dir:1395: inode #28320400:
block 113246792: comm dockerd: bad entry in directory: directory entry too
close to block end - offset=0, inode=28320403, rec_len=32, name_len=8,
size=4096
Fix the arguments passed to ext4_check_dir_entry().
Fixes: 109ba779d6cc ("ext4: check for directory entries too close to block end")
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20200731162135.8080-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cb2eb1967e73..b92571beab72 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1396,8 +1396,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
return -1;
*res_dir = de;
return 1;
@@ -2482,7 +2482,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7303cb5bfe845f7d43cd9b2dbd37dbb266efda9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Fri, 31 Jul 2020 18:21:35 +0200
Subject: [PATCH] ext4: fix checking of directory entry validity for inline
directories
ext4_search_dir() and ext4_generic_delete_entry() can be called both for
standard director blocks and for inline directories stored inside inode
or inline xattr space. For the second case we didn't call
ext4_check_dir_entry() with proper constraints that could result in
accepting corrupted directory entry as well as false positive filesystem
errors like:
EXT4-fs error (device dm-0): ext4_search_dir:1395: inode #28320400:
block 113246792: comm dockerd: bad entry in directory: directory entry too
close to block end - offset=0, inode=28320403, rec_len=32, name_len=8,
size=4096
Fix the arguments passed to ext4_check_dir_entry().
Fixes: 109ba779d6cc ("ext4: check for directory entries too close to block end")
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20200731162135.8080-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cb2eb1967e73..b92571beab72 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1396,8 +1396,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
return -1;
*res_dir = de;
return 1;
@@ -2482,7 +2482,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8979ef70850eb469e1094279259d1ef393ffe85f Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd(a)chromium.org>
Date: Tue, 11 Aug 2020 14:28:36 -0700
Subject: [PATCH] opp: Put opp table in dev_pm_opp_set_rate() for empty tables
We get the opp_table pointer at the top of the function and so we should
put the pointer at the end of the function like all other exit paths
from this function do.
Cc: v5.7+ <stable(a)vger.kernel.org> # v5.7+
Fixes: aca48b61f963 ("opp: Manage empty OPP tables with clk handle")
Reviewed-by: Rajendra Nayak <rnayak(a)codeaurora.org>
Signed-off-by: Stephen Boyd <swboyd(a)chromium.org>
[ Viresh: Split the patch into two ]
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 9d7fb45b1786..f2f32786ee45 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -893,8 +893,10 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
* have OPP table for the device, while others don't and
* opp_set_rate() just needs to behave like clk_set_rate().
*/
- if (!_get_opp_count(opp_table))
- return 0;
+ if (!_get_opp_count(opp_table)) {
+ ret = 0;
+ goto put_opp_table;
+ }
if (!opp_table->required_opp_tables && !opp_table->regulators &&
!opp_table->paths) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ddf75be47ca748f8b12d28ac64d624354fddf189 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Mon, 3 Aug 2020 13:09:01 +0200
Subject: [PATCH] spi: Prevent adding devices below an unregistering controller
CONFIG_OF_DYNAMIC and CONFIG_ACPI allow adding SPI devices at runtime
using a DeviceTree overlay or DSDT patch. CONFIG_SPI_SLAVE allows the
same via sysfs.
But there are no precautions to prevent adding a device below a
controller that's being removed. Such a device is unusable and may not
even be able to unbind cleanly as it becomes inaccessible once the
controller has been torn down. E.g. it is then impossible to quiesce
the device's interrupt.
of_spi_notify() and acpi_spi_notify() do hold a ref on the controller,
but otherwise run lockless against spi_unregister_controller().
Fix by holding the spi_add_lock in spi_unregister_controller() and
bailing out of spi_add_device() if the controller has been unregistered
concurrently.
Fixes: ce79d54ae447 ("spi/of: Add OF notifier handler")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.19+
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Cc: Octavian Purdila <octavian.purdila(a)intel.com>
Cc: Pantelis Antoniou <pantelis.antoniou(a)konsulko.com>
Link: https://lore.kernel.org/r/a8c3205088a969dc8410eec1eba9aface60f36af.15964510…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index c3008e423f59..c6ea760ea5f0 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -1017,4 +1017,7 @@ config SPI_SLAVE_SYSTEM_CONTROL
endif # SPI_SLAVE
+config SPI_DYNAMIC
+ def_bool ACPI || OF_DYNAMIC || SPI_SLAVE
+
endif # SPI
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 0b260484b4f5..92b8fb416dca 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -475,6 +475,12 @@ static LIST_HEAD(spi_controller_list);
*/
static DEFINE_MUTEX(board_lock);
+/*
+ * Prevents addition of devices with same chip select and
+ * addition of devices below an unregistering controller.
+ */
+static DEFINE_MUTEX(spi_add_lock);
+
/**
* spi_alloc_device - Allocate a new SPI device
* @ctlr: Controller to which device is connected
@@ -554,7 +560,6 @@ static int spi_dev_check(struct device *dev, void *data)
*/
int spi_add_device(struct spi_device *spi)
{
- static DEFINE_MUTEX(spi_add_lock);
struct spi_controller *ctlr = spi->controller;
struct device *dev = ctlr->dev.parent;
int status;
@@ -582,6 +587,13 @@ int spi_add_device(struct spi_device *spi)
goto done;
}
+ /* Controller may unregister concurrently */
+ if (IS_ENABLED(CONFIG_SPI_DYNAMIC) &&
+ !device_is_registered(&ctlr->dev)) {
+ status = -ENODEV;
+ goto done;
+ }
+
/* Descriptors take precedence */
if (ctlr->cs_gpiods)
spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select];
@@ -2797,6 +2809,10 @@ void spi_unregister_controller(struct spi_controller *ctlr)
struct spi_controller *found;
int id = ctlr->bus_num;
+ /* Prevent addition of new devices, unregister existing ones */
+ if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
+ mutex_lock(&spi_add_lock);
+
device_for_each_child(&ctlr->dev, NULL, __unregister);
/* First make sure that this controller was ever added */
@@ -2817,6 +2833,9 @@ void spi_unregister_controller(struct spi_controller *ctlr)
if (found == ctlr)
idr_remove(&spi_master_idr, id);
mutex_unlock(&board_lock);
+
+ if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
+ mutex_unlock(&spi_add_lock);
}
EXPORT_SYMBOL_GPL(spi_unregister_controller);
Tascam FE-8 is known to support communication by asynchronous transaction
only. The support can be implemented in userspace application and
snd-firewire-ctl-services project has the support. However, ALSA
firewire-tascam driver is bound to the model.
This commit changes device entries so that the model is excluded. In a
commit 53b3ffee7885 ("ALSA: firewire-tascam: change device probing
processing"), I addressed to the concern that version field in
configuration differs depending on installed firmware. However, as long
as I checked, the version number is fixed. It's safe to return version
number back to modalias.
Fixes: 53b3ffee7885 ("ALSA: firewire-tascam: change device probing processing")
Cc: <stable(a)vger.kernel.org> # 4.4+
Signed-off-by: Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
---
sound/firewire/tascam/tascam.c | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/sound/firewire/tascam/tascam.c b/sound/firewire/tascam/tascam.c
index 5dac0d9fc58e..75f2edd8e78f 100644
--- a/sound/firewire/tascam/tascam.c
+++ b/sound/firewire/tascam/tascam.c
@@ -39,9 +39,6 @@ static const struct snd_tscm_spec model_specs[] = {
.midi_capture_ports = 2,
.midi_playback_ports = 4,
},
- // This kernel module doesn't support FE-8 because the most of features
- // can be implemented in userspace without any specific support of this
- // module.
};
static int identify_model(struct snd_tscm *tscm)
@@ -211,11 +208,39 @@ static void snd_tscm_remove(struct fw_unit *unit)
}
static const struct ieee1394_device_id snd_tscm_id_table[] = {
+ // Tascam, FW-1884.
+ {
+ .match_flags = IEEE1394_MATCH_VENDOR_ID |
+ IEEE1394_MATCH_SPECIFIER_ID |
+ IEEE1394_MATCH_VERSION,
+ .vendor_id = 0x00022e,
+ .specifier_id = 0x00022e,
+ .version = 0x800000,
+ },
+ // Tascam, FE-8 (.version = 0x800001)
+ // This kernel module doesn't support FE-8 because the most of features
+ // can be implemented in userspace without any specific support of this
+ // module.
+ //
+ // .version = 0x800002 is unknown.
+ //
+ // Tascam, FW-1082.
+ {
+ .match_flags = IEEE1394_MATCH_VENDOR_ID |
+ IEEE1394_MATCH_SPECIFIER_ID |
+ IEEE1394_MATCH_VERSION,
+ .vendor_id = 0x00022e,
+ .specifier_id = 0x00022e,
+ .version = 0x800003,
+ },
+ // Tascam, FW-1804.
{
.match_flags = IEEE1394_MATCH_VENDOR_ID |
- IEEE1394_MATCH_SPECIFIER_ID,
+ IEEE1394_MATCH_SPECIFIER_ID |
+ IEEE1394_MATCH_VERSION,
.vendor_id = 0x00022e,
.specifier_id = 0x00022e,
+ .version = 0x800004,
},
{}
};
--
2.25.1
The patch titled
Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()
has been removed from the -mm tree. Its filename was
mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Charan Teja Reddy <charante(a)codeaurora.org>
Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()
The following race is observed with the repeated online, offline and a
delay between two successive online of memory blocks of movable zone.
P1 P2
Online the first memory block in
the movable zone. The pcp struct
values are initialized to default
values,i.e., pcp->high = 0 &
pcp->batch = 1.
Allocate the pages from the
movable zone.
Try to Online the second memory
block in the movable zone thus it
entered the online_pages() but yet
to call zone_pcp_update().
This process is entered into
the exit path thus it tries
to release the order-0 pages
to pcp lists through
free_unref_page_commit().
As pcp->high = 0, pcp->count = 1
proceed to call the function
free_pcppages_bulk().
Update the pcp values thus the
new pcp values are like, say,
pcp->high = 378, pcp->batch = 63.
Read the pcp's batch value using
READ_ONCE() and pass the same to
free_pcppages_bulk(), pcp values
passed here are, batch = 63,
count = 1.
Since num of pages in the pcp
lists are less than ->batch,
then it will stuck in
while(list_empty(list)) loop
with interrupts disabled thus
a core hung.
Avoid this by ensuring free_pcppages_bulk() is called with proper count of
pcp list pages.
The mentioned race is some what easily reproducible without [1] because
pcp's are not updated for the first memory block online and thus there is
a enough race window for P2 between alloc+free and pcp struct values
update through onlining of second memory block.
With [1], the race still exists but it is very narrow as we update the pcp
struct values for the first memory block online itself.
This is not limited to the movable zone, it could also happen in cases
with the normal zone (e.g., hotplug to a node that only has DMA memory, or
no other memory yet).
[1]: https://patchwork.kernel.org/patch/11696389/
Link: http://lkml.kernel.org/r/1597150703-19003-1-git-send-email-charante@codeaur…
Fixes: 5f8dcc21211a ("page-allocator: split per-cpu list into one-list-per-migrate-type")
Signed-off-by: Charan Teja Reddy <charante(a)codeaurora.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Vinayak Menon <vinmenon(a)codeaurora.org>
Cc: <stable(a)vger.kernel.org> [2.6+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/mm/page_alloc.c~mm-page_alloc-fix-core-hung-in-free_pcppages_bulk
+++ a/mm/page_alloc.c
@@ -1302,6 +1302,11 @@ static void free_pcppages_bulk(struct zo
struct page *page, *tmp;
LIST_HEAD(head);
+ /*
+ * Ensure proper count is passed which otherwise would stuck in the
+ * below while (list_empty(list)) loop.
+ */
+ count = min(pcp->count, count);
while (count) {
struct list_head *list;
_
Patches currently in -mm which might be from charante(a)codeaurora.org are
The patch titled
Subject: mm: include CMA pages in lowmem_reserve at boot
has been removed from the -mm tree. Its filename was
mm-include-cma-pages-in-lowmem_reserve-at-boot.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Doug Berger <opendmb(a)gmail.com>
Subject: mm: include CMA pages in lowmem_reserve at boot
The lowmem_reserve arrays provide a means of applying pressure against
allocations from lower zones that were targeted at higher zones. Its
values are a function of the number of pages managed by higher zones and
are assigned by a call to the setup_per_zone_lowmem_reserve() function.
The function is initially called at boot time by the function
init_per_zone_wmark_min() and may be called later by accesses of the
/proc/sys/vm/lowmem_reserve_ratio sysctl file.
The function init_per_zone_wmark_min() was moved up from a module_init to
a core_initcall to resolve a sequencing issue with khugepaged.
Unfortunately this created a sequencing issue with CMA page accounting.
The CMA pages are added to the managed page count of a zone when
cma_init_reserved_areas() is called at boot also as a core_initcall. This
makes it uncertain whether the CMA pages will be added to the managed page
counts of their zones before or after the call to
init_per_zone_wmark_min() as it becomes dependent on link order. With the
current link order the pages are added to the managed count after the
lowmem_reserve arrays are initialized at boot.
This means the lowmem_reserve values at boot may be lower than the values
used later if /proc/sys/vm/lowmem_reserve_ratio is accessed even if the
ratio values are unchanged.
In many cases the difference is not significant, but for example
an ARM platform with 1GB of memory and the following memory layout
[ 0.000000] cma: Reserved 256 MiB at 0x0000000030000000
[ 0.000000] Zone ranges:
[ 0.000000] DMA [mem 0x0000000000000000-0x000000002fffffff]
[ 0.000000] Normal empty
[ 0.000000] HighMem [mem 0x0000000030000000-0x000000003fffffff]
would result in 0 lowmem_reserve for the DMA zone. This would allow
userspace to deplete the DMA zone easily. Funnily enough
$ cat /proc/sys/vm/lowmem_reserve_ratio
would fix up the situation because it forces setup_per_zone_lowmem_reserve
as a side effect.
This commit breaks the link order dependency by invoking
init_per_zone_wmark_min() as a postcore_initcall so that the CMA pages
have the chance to be properly accounted in their zone(s) and allowing the
lowmem_reserve arrays to receive consistent values.
Link: http://lkml.kernel.org/r/1597423766-27849-1-git-send-email-opendmb@gmail.com
Fixes: bc22af74f271 ("mm: update min_free_kbytes from khugepaged after core initialization")
Signed-off-by: Doug Berger <opendmb(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Jason Baron <jbaron(a)akamai.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/page_alloc.c~mm-include-cma-pages-in-lowmem_reserve-at-boot
+++ a/mm/page_alloc.c
@@ -7888,7 +7888,7 @@ int __meminit init_per_zone_wmark_min(vo
return 0;
}
-core_initcall(init_per_zone_wmark_min)
+postcore_initcall(init_per_zone_wmark_min)
/*
* min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
_
Patches currently in -mm which might be from opendmb(a)gmail.com are
The patch titled
Subject: squashfs: avoid bio_alloc() failure with 1Mbyte blocks
has been removed from the -mm tree. Its filename was
squashfs-avoid-bio_alloc-failure-with-1mbyte-blocks.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Phillip Lougher <phillip(a)squashfs.org.uk>
Subject: squashfs: avoid bio_alloc() failure with 1Mbyte blocks
This is a regression introduced by the patch "migrate from ll_rw_block
usage to BIO".
Bio_alloc() is limited to 256 pages (1 Mbyte). This can cause a failure
when reading 1 Mbyte block filesystems. The problem is a datablock can be
fully (or almost uncompressed), requiring 256 pages, but, because blocks
are not aligned to page boundaries, it may require 257 pages to read.
Bio_kmalloc() can handle 1024 pages, and so use this for the edge
condition.
Link: http://lkml.kernel.org/r/20200815035637.15319-1-phillip@squashfs.org.uk
Fixes: 93e72b3c612a ("squashfs: migrate from ll_rw_block usage to BIO")
Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk>
Reported-by: Nicolas Prochazka <nicolas.prochazka(a)gmail.com>
Reported-by: Tomoatsu Shimada <shimada(a)walbrix.com>
Reviewed-by: Guenter Roeck <groeck(a)chromium.org>
Cc: Philippe Liard <pliard(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Adrien Schildknecht <adrien+dev(a)schischi.me>
Cc: Daniel Rosenberg <drosen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/squashfs/block.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/fs/squashfs/block.c~squashfs-avoid-bio_alloc-failure-with-1mbyte-blocks
+++ a/fs/squashfs/block.c
@@ -87,7 +87,11 @@ static int squashfs_bio_read(struct supe
int error, i;
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, page_count);
+ if (page_count <= BIO_MAX_PAGES)
+ bio = bio_alloc(GFP_NOIO, page_count);
+ else
+ bio = bio_kmalloc(GFP_NOIO, page_count);
+
if (!bio)
return -ENOMEM;
_
Patches currently in -mm which might be from phillip(a)squashfs.org.uk are
The patch titled
Subject: uprobes: __replace_page() avoid BUG in munlock_vma_page()
has been removed from the -mm tree. Its filename was
uprobes-__replace_page-avoid-bug-in-munlock_vma_page.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: uprobes: __replace_page() avoid BUG in munlock_vma_page()
syzbot crashed on the VM_BUG_ON_PAGE(PageTail) in munlock_vma_page(), when
called from uprobes __replace_page(). Which of many ways to fix it?
Settled on not calling when PageCompound (since Head and Tail are equals
in this context, PageCompound the usual check in uprobes.c, and the prior
use of FOLL_SPLIT_PMD will have cleared PageMlocked already).
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008161338360.20413@eggly.anvils
Fixes: 5a52c9df62b4 ("uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Acked-by: Song Liu <songliubraving(a)fb.com>
Acked-by: Oleg Nesterov <oleg(a)redhat.com>
Reviewed-by: Srikar Dronamraju <srikar(a)linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/events/uprobes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/events/uprobes.c~uprobes-__replace_page-avoid-bug-in-munlock_vma_page
+++ a/kernel/events/uprobes.c
@@ -205,7 +205,7 @@ static int __replace_page(struct vm_area
try_to_free_swap(old_page);
page_vma_mapped_walk_done(&pvmw);
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page))
munlock_vma_page(old_page);
put_page(old_page);
_
Patches currently in -mm which might be from hughd(a)google.com are
The patch titled
Subject: kernel/relay.c: fix memleak on destroy relay channel
has been removed from the -mm tree. Its filename was
kernel-relayc-fix-memleak-on-destroy-relay-channel.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Wei Yongjun <weiyongjun1(a)huawei.com>
Subject: kernel/relay.c: fix memleak on destroy relay channel
kmemleak report memory leak as follows:
unreferenced object 0x607ee4e5f948 (size 8):
comm "syz-executor.1", pid 2098, jiffies 4295031601 (age 288.468s)
hex dump (first 8 bytes):
00 00 00 00 00 00 00 00 ........
backtrace:
[<00000000ca1de2fa>] relay_open kernel/relay.c:583 [inline]
[<00000000ca1de2fa>] relay_open+0xb6/0x970 kernel/relay.c:563
[<0000000038ae5a4b>] do_blk_trace_setup+0x4a8/0xb20 kernel/trace/blktrace.c:557
[<00000000d5e778e9>] __blk_trace_setup+0xb6/0x150 kernel/trace/blktrace.c:597
[<0000000038fdf803>] blk_trace_ioctl+0x146/0x280 kernel/trace/blktrace.c:738
[<00000000ce25a0ca>] blkdev_ioctl+0xb2/0x6a0 block/ioctl.c:613
[<00000000579e47e0>] block_ioctl+0xe5/0x120 fs/block_dev.c:1871
[<00000000b1588c11>] vfs_ioctl fs/ioctl.c:48 [inline]
[<00000000b1588c11>] __do_sys_ioctl fs/ioctl.c:753 [inline]
[<00000000b1588c11>] __se_sys_ioctl fs/ioctl.c:739 [inline]
[<00000000b1588c11>] __x64_sys_ioctl+0x170/0x1ce fs/ioctl.c:739
[<0000000088fc9942>] do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
[<000000004f6dd57a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
'chan->buf' is malloced in relay_open() by alloc_percpu() but not free
while destroy the relay channel. Fix it by adding free_percpu() before
return from relay_destroy_channel().
Link: http://lkml.kernel.org/r/20200817122826.48518-1-weiyongjun1@huawei.com
Fixes: 017c59c042d0 ("relay: Use per CPU constructs for the relay channel buffer pointers")
Signed-off-by: Wei Yongjun <weiyongjun1(a)huawei.com>
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Michel Lespinasse <walken(a)google.com>
Cc: Daniel Axtens <dja(a)axtens.net>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Akash Goel <akash.goel(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/relay.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/relay.c~kernel-relayc-fix-memleak-on-destroy-relay-channel
+++ a/kernel/relay.c
@@ -197,6 +197,7 @@ free_buf:
static void relay_destroy_channel(struct kref *kref)
{
struct rchan *chan = container_of(kref, struct rchan, kref);
+ free_percpu(chan->buf);
kfree(chan);
}
_
Patches currently in -mm which might be from weiyongjun1(a)huawei.com are
The patch titled
Subject: romfs: fix uninitialized memory leak in romfs_dev_read()
has been removed from the -mm tree. Its filename was
romfs-fix-uninitialized-memory-leak-in-romfs_dev_read.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Jann Horn <jannh(a)google.com>
Subject: romfs: fix uninitialized memory leak in romfs_dev_read()
romfs has a superblock field that limits the size of the filesystem; data
beyond that limit is never accessed.
romfs_dev_read() fetches a caller-supplied number of bytes from the
backing device. It returns 0 on success or an error code on failure;
therefore, its API can't represent short reads, it's all-or-nothing.
However, when romfs_dev_read() detects that the requested operation would
cross the filesystem size limit, it currently silently truncates the
requested number of bytes. This e.g. means that when the content of a
file with size 0x1000 starts one byte before the filesystem size limit,
->readpage() will only fill a single byte of the supplied page while
leaving the rest uninitialized, leaking that uninitialized memory to
userspace.
Fix it by returning an error code instead of truncating the read when the
requested read operation would go beyond the end of the filesystem.
Link: http://lkml.kernel.org/r/20200818013202.2246365-1-jannh@google.com
Fixes: da4458bda237 ("NOMMU: Make it possible for RomFS to use MTD devices directly")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/romfs/storage.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/fs/romfs/storage.c~romfs-fix-uninitialized-memory-leak-in-romfs_dev_read
+++ a/fs/romfs/storage.c
@@ -217,10 +217,8 @@ int romfs_dev_read(struct super_block *s
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
_
Patches currently in -mm which might be from jannh(a)google.com are
The patch titled
Subject: mm/vunmap: add cond_resched() in vunmap_pmd_range
has been removed from the -mm tree. Its filename was
mm-vunmap-add-cond_resched-in-vunmap_pmd_range.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Subject: mm/vunmap: add cond_resched() in vunmap_pmd_range
Like zap_pte_range add cond_resched so that we can avoid softlockups as
reported below. On non-preemptible kernel with large I/O map region (like
the one we get when using persistent memory with sector mode), an unmap of
the namespace can report below softlockups.
22724.027334] watchdog: BUG: soft lockup - CPU#49 stuck for 23s! [ndctl:50777]
NIP [c0000000000dc224] plpar_hcall+0x38/0x58
LR [c0000000000d8898] pSeries_lpar_hpte_invalidate+0x68/0xb0
Call Trace:
[c0000004e87a7780] [c0000004fb197c00] 0xc0000004fb197c00 (unreliable)
[c0000004e87a7810] [c00000000007f4e4] flush_hash_page+0x114/0x200
[c0000004e87a7890] [c0000000000833cc] hpte_need_flush+0x2dc/0x540
[c0000004e87a7950] [c0000000003f5798] vunmap_page_range+0x538/0x6f0
[c0000004e87a7a70] [c0000000003f76d0] free_unmap_vmap_area+0x30/0x70
[c0000004e87a7aa0] [c0000000003f7a6c] remove_vm_area+0xfc/0x140
[c0000004e87a7ad0] [c0000000003f7dd8] __vunmap+0x68/0x270
[c0000004e87a7b50] [c000000000079de4] __iounmap.part.0+0x34/0x60
[c0000004e87a7bb0] [c000000000376394] memunmap+0x54/0x70
[c0000004e87a7bd0] [c000000000881d7c] release_nodes+0x28c/0x300
[c0000004e87a7c40] [c00000000087a65c] device_release_driver_internal+0x16c/0x280
[c0000004e87a7c80] [c000000000876fc4] unbind_store+0x124/0x170
[c0000004e87a7cd0] [c000000000875be4] drv_attr_store+0x44/0x60
[c0000004e87a7cf0] [c00000000057c734] sysfs_kf_write+0x64/0x90
[c0000004e87a7d10] [c00000000057bc10] kernfs_fop_write+0x1b0/0x290
[c0000004e87a7d60] [c000000000488e6c] __vfs_write+0x3c/0x70
[c0000004e87a7d80] [c00000000048c868] vfs_write+0xd8/0x260
[c0000004e87a7dd0] [c00000000048ccac] ksys_write+0xdc/0x130
[c0000004e87a7e20] [c00000000000b588] system_call+0x5c/0x70
Link: http://lkml.kernel.org/r/20200807075933.310240-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Reported-by: Harish Sriram <harish(a)linux.ibm.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 2 ++
1 file changed, 2 insertions(+)
--- a/mm/vmalloc.c~mm-vunmap-add-cond_resched-in-vunmap_pmd_range
+++ a/mm/vmalloc.c
@@ -104,6 +104,8 @@ static void vunmap_pmd_range(pud_t *pud,
if (pmd_none_or_clear_bad(pmd))
continue;
vunmap_pte_range(pmd, addr, next, mask);
+
+ cond_resched();
} while (pmd++, addr = next, addr != end);
}
_
Patches currently in -mm which might be from aneesh.kumar(a)linux.ibm.com are
The patch titled
Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
has been removed from the -mm tree. Its filename was
khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in
__khugepaged_enter(): yes, when one thread is about to dump core, has set
core_state, and is waiting for others, another might do something calling
__khugepaged_enter(), which now crashes because I lumped the core_state
test (known as "mmget_still_valid") into khugepaged_test_exit(). I still
think it's best to lump them together, so just in this exceptional case,
check mm->mm_users directly instead of khugepaged_test_exit().
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils
Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Acked-by: Yang Shi <shy828301(a)gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/khugepaged.c~khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter
+++ a/mm/khugepaged.c
@@ -466,7 +466,7 @@ int __khugepaged_enter(struct mm_struct
return -ENOMEM;
/* __khugepaged_exit() must not run from under us */
- VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
+ VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
free_mm_slot(mm_slot);
return 0;
_
Patches currently in -mm which might be from hughd(a)google.com are
The following commit has been merged into the efi/urgent branch of tip:
Commit-ID: c8502eb2d43b6b9b1dc382299a4d37031be63876
Gitweb: https://git.kernel.org/tip/c8502eb2d43b6b9b1dc382299a4d37031be63876
Author: Arvind Sankar <nivedita(a)alum.mit.edu>
AuthorDate: Fri, 17 Jul 2020 15:45:26 -04:00
Committer: Ard Biesheuvel <ardb(a)kernel.org>
CommitterDate: Thu, 20 Aug 2020 11:18:36 +02:00
efi/x86: Mark kernel rodata non-executable for mixed mode
When remapping the kernel rodata section RO in the EFI pagetables, the
protection flags that were used for the text section are being reused,
but the rodata section should not be marked executable.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Arvind Sankar <nivedita(a)alum.mit.edu>
Link: https://lore.kernel.org/r/20200717194526.3452089-1-nivedita@alum.mit.edu
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
arch/x86/platform/efi/efi_64.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 413583f..6af4da1 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -259,6 +259,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT;
rodata = __pa(__start_rodata);
pfn = rodata >> PAGE_SHIFT;
+
+ pf = _PAGE_NX | _PAGE_ENC;
if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) {
pr_err("Failed to map kernel rodata 1:1\n");
return 1;
The following commit has been merged into the efi/urgent branch of tip:
Commit-ID: a37ca6a2af9df2972372b918f09390c9303acfbd
Gitweb: https://git.kernel.org/tip/a37ca6a2af9df2972372b918f09390c9303acfbd
Author: Arvind Sankar <nivedita(a)alum.mit.edu>
AuthorDate: Wed, 29 Jul 2020 15:33:00 -04:00
Committer: Ard Biesheuvel <ardb(a)kernel.org>
CommitterDate: Thu, 20 Aug 2020 11:18:55 +02:00
efi/libstub: Handle NULL cmdline
Treat a NULL cmdline the same as empty. Although this is unlikely to
happen in practice, the x86 kernel entry does check for NULL cmdline and
handles it, so do it here as well.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Arvind Sankar <nivedita(a)alum.mit.edu>
Link: https://lore.kernel.org/r/20200729193300.598448-1-nivedita@alum.mit.edu
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
drivers/firmware/efi/libstub/efi-stub-helper.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 37ff34e..f53652a 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -187,10 +187,14 @@ int efi_printk(const char *fmt, ...)
*/
efi_status_t efi_parse_options(char const *cmdline)
{
- size_t len = strlen(cmdline) + 1;
+ size_t len;
efi_status_t status;
char *str, *buf;
+ if (!cmdline)
+ return EFI_SUCCESS;
+
+ len = strlen(cmdline) + 1;
status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, len, (void **)&buf);
if (status != EFI_SUCCESS)
return status;
The patch titled
Subject: mm: madvise: fix vma user-after-free
has been added to the -mm tree. Its filename is
mm-madvise-fix-vma-user-after-free.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-madvise-fix-vma-user-after-fre…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-madvise-fix-vma-user-after-fre…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Yang Shi <shy828301(a)gmail.com>
Subject: mm: madvise: fix vma user-after-free
The syzbot reported the below use-after-free:
BUG: KASAN: use-after-free in madvise_willneed mm/madvise.c:293 [inline]
BUG: KASAN: use-after-free in madvise_vma mm/madvise.c:942 [inline]
BUG: KASAN: use-after-free in do_madvise.part.0+0x1c8b/0x1cf0 mm/madvise.c:1145
Read of size 8 at addr ffff8880a6163eb0 by task syz-executor.0/9996
CPU: 0 PID: 9996 Comm: syz-executor.0 Not tainted 5.9.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x18f/0x20d lib/dump_stack.c:118
print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383
__kasan_report mm/kasan/report.c:513 [inline]
kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
madvise_willneed mm/madvise.c:293 [inline]
madvise_vma mm/madvise.c:942 [inline]
do_madvise.part.0+0x1c8b/0x1cf0 mm/madvise.c:1145
do_madvise mm/madvise.c:1169 [inline]
__do_sys_madvise mm/madvise.c:1171 [inline]
__se_sys_madvise mm/madvise.c:1169 [inline]
__x64_sys_madvise+0xd9/0x110 mm/madvise.c:1169
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45d4d9
Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f04f7464c78 EFLAGS: 00000246 ORIG_RAX: 000000000000001c
RAX: ffffffffffffffda RBX: 0000000000020800 RCX: 000000000045d4d9
RDX: 0000000000000003 RSI: 0000000000600003 RDI: 0000000020000000
RBP: 000000000118d020 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cfec
R13: 00007ffc579cce7f R14: 00007f04f74659c0 R15: 000000000118cfec
Allocated by task 9992:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
kasan_set_track mm/kasan/common.c:56 [inline]
__kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461
slab_post_alloc_hook mm/slab.h:518 [inline]
slab_alloc mm/slab.c:3312 [inline]
kmem_cache_alloc+0x138/0x3a0 mm/slab.c:3482
vm_area_alloc+0x1c/0x110 kernel/fork.c:347
mmap_region+0x8e5/0x1780 mm/mmap.c:1743
do_mmap+0xcf9/0x11d0 mm/mmap.c:1545
vm_mmap_pgoff+0x195/0x200 mm/util.c:506
ksys_mmap_pgoff+0x43a/0x560 mm/mmap.c:1596
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Freed by task 9992:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
kasan_set_track+0x1c/0x30 mm/kasan/common.c:56
kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355
__kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422
__cache_free mm/slab.c:3418 [inline]
kmem_cache_free.part.0+0x67/0x1f0 mm/slab.c:3693
remove_vma+0x132/0x170 mm/mmap.c:184
remove_vma_list mm/mmap.c:2613 [inline]
__do_munmap+0x743/0x1170 mm/mmap.c:2869
do_munmap mm/mmap.c:2877 [inline]
mmap_region+0x257/0x1780 mm/mmap.c:1716
do_mmap+0xcf9/0x11d0 mm/mmap.c:1545
vm_mmap_pgoff+0x195/0x200 mm/util.c:506
ksys_mmap_pgoff+0x43a/0x560 mm/mmap.c:1596
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
It is because vma is accessed after releasing mmap_lock, but someone else
acquired the mmap_lock and the vma is gone.
Releasing mmap_lock after accessing vma should fix the problem.
Link: https://lkml.kernel.org/r/20200816141204.162624-1-shy828301@gmail.com
Fixes: 692fe62433d4c ("mm: Handle MADV_WILLNEED through vfs_fadvise()")
Reported-by: syzbot+b90df26038d1d5d85c97(a)syzkaller.appspotmail.com
Signed-off-by: Yang Shi <shy828301(a)gmail.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/madvise.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/madvise.c~mm-madvise-fix-vma-user-after-free
+++ a/mm/madvise.c
@@ -289,9 +289,9 @@ static long madvise_willneed(struct vm_a
*/
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
get_file(file);
- mmap_read_unlock(current->mm);
offset = (loff_t)(start - vma->vm_start)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ mmap_read_unlock(current->mm);
vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
fput(file);
mmap_read_lock(current->mm);
_
Patches currently in -mm which might be from shy828301(a)gmail.com are
mm-madvise-fix-vma-user-after-free.patch
From: Nick Desaulniers <ndesaulniers(a)google.com>
Basically, consider .text.{hot|unlikely|unknown}.* part of .text, too.
When compiling with profiling information (collected via PGO
instrumentations or AutoFDO sampling), Clang will separate code into
.text.hot, .text.unlikely, or .text.unknown sections based on profiling
information. After D79600 (clang-11), these sections will have a
trailing `.` suffix, ie. .text.hot., .text.unlikely., .text.unknown..
When using -ffunction-sections together with profiling infomation,
either explicitly (FGKASLR) or implicitly (LTO), code may be placed in
sections following the convention:
.text.hot.<foo>, .text.unlikely.<bar>, .text.unknown.<baz>
where <foo>, <bar>, and <baz> are functions. (This produces one section
per function; we generally try to merge these all back via linker script
so that we don't have 50k sections).
For the above cases, we need to teach our linker scripts that such
sections might exist and that we'd explicitly like them grouped
together, otherwise we can wind up with code outside of the
_stext/_etext boundaries that might not be mapped properly for some
architectures, resulting in boot failures.
If the linker script is not told about possible input sections, then
where the section is placed as output is a heuristic-laiden mess that's
non-portable between linkers (ie. BFD and LLD), and has resulted in many
hard to debug bugs. Kees Cook is working on cleaning this up by adding
--orphan-handling=warn linker flag used in ARCH=powerpc to additional
architectures. In the case of linker scripts, borrowing from the Zen of
Python: explicit is better than implicit.
Also, ld.bfd's internal linker script considers .text.hot AND
.text.hot.* to be part of .text, as well as .text.unlikely and
.text.unlikely.*. I didn't see support for .text.unknown.*, and didn't
see Clang producing such code in our kernel builds, but I see code in
LLVM that can produce such section names if profiling information is
missing. That may point to a larger issue with generating or collecting
profiles, but I would much rather be safe and explicit than have to
debug yet another issue related to orphan section placement.
Reported-by: Jian Cai <jiancai(a)google.com>
Suggested-by: Fāng-ruì Sòng <maskray(a)google.com>
Tested-by: Luis Lozano <llozano(a)google.com>
Tested-by: Manoj Gupta <manojgupta(a)google.com>
Acked-by: Kees Cook <keescook(a)chromium.org>
Cc: stable(a)vger.kernel.org
Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=add44f8d5c5c0…
Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=1de778ed23ce7…
Link: https://reviews.llvm.org/D79600
Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1084760
Debugged-by: Luis Lozano <llozano(a)google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Kees Cook <keescook(a)chromium.org>
---
include/asm-generic/vmlinux.lds.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 2593957f6e8b..af5211ca857c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -561,7 +561,10 @@
*/
#define TEXT_TEXT \
ALIGN_FUNCTION(); \
- *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \
+ *(.text.hot .text.hot.*) \
+ *(TEXT_MAIN .text.fixup) \
+ *(.text.unlikely .text.unlikely.*) \
+ *(.text.unknown .text.unknown.*) \
NOINSTR_TEXT \
*(.text..refcount) \
*(.ref.text) \
--
2.25.1
When the primary firmware node pointer is removed from a
device (set to NULL) the secondary firmware node pointer,
when it exists, is made the primary node for the device.
However, the secondary firmware node pointer of the original
primary firmware node is never cleared (set to NULL).
To avoid situation where the secondary firmware node pointer
is pointing to a non-existing object, clearing it properly
when the primary node is removed from a device in
set_primary_fwnode().
Fixes: 97badf873ab6 ("device property: Make it possible to use secondary firmware nodes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/base/core.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index ac1046a382bc0..f6f620aa94086 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4264,9 +4264,9 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
*/
void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
{
- if (fwnode) {
- struct fwnode_handle *fn = dev->fwnode;
+ struct fwnode_handle *fn = dev->fwnode;
+ if (fwnode) {
if (fwnode_is_primary(fn))
fn = fn->secondary;
@@ -4276,8 +4276,12 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
}
dev->fwnode = fwnode;
} else {
- dev->fwnode = fwnode_is_primary(dev->fwnode) ?
- dev->fwnode->secondary : NULL;
+ if (fwnode_is_primary(fn)) {
+ dev->fwnode = fn->secondary;
+ fn->secondary = NULL;
+ } else {
+ dev->fwnode = NULL;
+ }
}
}
EXPORT_SYMBOL_GPL(set_primary_fwnode);
--
2.28.0
This is the start of the stable review cycle for the 5.4.60 release.
There are 152 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat, 22 Aug 2020 09:15:09 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.60-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.60-rc1
hersen wu <hersenxs.wu(a)amd.com>
drm/amd/display: dchubbub p-state warning during surface planes switch
Sandeep Raghuraman <sandy.8925(a)gmail.com>
drm/amdgpu: Fix bug where DPM is not enabled after hibernate and resume
Xin Xiong <xiongx18(a)fudan.edu.cn>
drm: fix drm_dp_mst_port refcount leaks in drm_dp_mst_allocate_vcpi
Marius Iacob <themariusus(a)gmail.com>
drm: Added orientation quirk for ASUS tablet model T103HAF
Denis Efremov <efremov(a)linux.com>
drm/panfrost: Use kvfree() to free bo->sgts
Denis Efremov <efremov(a)linux.com>
drm/radeon: fix fb_div check in ni_init_smc_spll_table()
Tomasz Maciej Nowak <tmn505(a)gmail.com>
arm64: dts: marvell: espressobin: add ethernet alias
Hugh Dickins <hughd(a)google.com>
khugepaged: retract_page_tables() remember to test exit
Geert Uytterhoeven <geert+renesas(a)glider.be>
sh: landisk: Add missing initialization of sh_io_port_base
Zhang Rui <rui.zhang(a)intel.com>
perf/x86/rapl: Fix missing psys sysfs attributes
Daniel Díaz <daniel.diaz(a)linaro.org>
tools build feature: Quote CC and CXX for their arguments
Vincent Whitchurch <vincent.whitchurch(a)axis.com>
perf bench mem: Always memset source before memcpy
Dinghao Liu <dinghao.liu(a)zju.edu.cn>
ALSA: echoaudio: Fix potential Oops in snd_echo_resume()
Ondrej Mosnacek <omosnace(a)redhat.com>
crypto: algif_aead - fix uninitialized ctx->init
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
mfd: dln2: Run event handler loop under spinlock
Dhananjay Phadke <dphadke(a)linux.microsoft.com>
i2c: iproc: fix race between client unreg and isr
Tiezhu Yang <yangtiezhu(a)loongson.cn>
test_kmod: avoid potential double free in trigger_config_run_type()
Colin Ian King <colin.king(a)canonical.com>
fs/ufs: avoid potential u32 multiplication overflow
Eric Biggers <ebiggers(a)google.com>
fs/minix: remove expected error message in block_to_path()
Eric Biggers <ebiggers(a)google.com>
fs/minix: fix block limit check for V1 filesystems
Eric Biggers <ebiggers(a)google.com>
fs/minix: set s_maxbytes correctly
Jeffrey Mitchell <jeffrey.mitchell(a)starlab.io>
nfs: Fix getxattr kernel panic and memory overflow
Wang Hai <wanghai38(a)huawei.com>
net: qcom/emac: add missed clk_disable_unprepare in error path of emac_clks_phase1_init
Dan Carpenter <dan.carpenter(a)oracle.com>
drm/vmwgfx: Fix two list_for_each loop exit tests
Dan Carpenter <dan.carpenter(a)oracle.com>
drm/vmwgfx: Use correct vmw_legacy_display_unit pointer
Christophe Leroy <christophe.leroy(a)csgroup.eu>
recordmcount: Fix build failure on non arm64
Colin Ian King <colin.king(a)canonical.com>
Input: sentelic - fix error return when fsp_reg_write fails
Dilip Kota <eswara.kota(a)linux.intel.com>
x86/tsr: Fix tsc frequency enumeration bug on Lightning Mountain SoC
Dan Carpenter <dan.carpenter(a)oracle.com>
md-cluster: Fix potential error pointer dereference in resize_bitmaps()
Krzysztof Sobota <krzysztof.sobota(a)nokia.com>
watchdog: initialize device before misc_register
Scott Mayhew <smayhew(a)redhat.com>
nfs: nfs_file_write() should check for writeback errors
Ewan D. Milne <emilne(a)redhat.com>
scsi: lpfc: nvmet: Avoid hang / use-after-free again when destroying targetport
Stafford Horne <shorne(a)gmail.com>
openrisc: Fix oops caused when dumping stack
Jane Chu <jane.chu(a)oracle.com>
libnvdimm/security: ensure sysfs poll thread woke up and fetch updated attr
Jane Chu <jane.chu(a)oracle.com>
libnvdimm/security: fix a typo
Nicolas Saenz Julienne <nsaenzjulienne(a)suse.de>
clk: bcm2835: Do not use prediv with bcm2711's PLLs
Zhihao Cheng <chengzhihao1(a)huawei.com>
ubifs: Fix wrong orphan node deletion in ubifs_jnl_update|rename
Scott Mayhew <smayhew(a)redhat.com>
nfs: ensure correct writeback errors are returned on close()
Wolfram Sang <wsa+renesas(a)sang-engineering.com>
i2c: rcar: avoid race when unregistering slave
Thomas Hebb <tommyhebb(a)gmail.com>
tools build feature: Use CC and CXX from parent
Rayagonda Kokatanur <rayagonda.kokatanur(a)broadcom.com>
pwm: bcm-iproc: handle clk_get_rate() return
Xu Wang <vulab(a)iscas.ac.cn>
clk: clk-atlas6: fix return value check in atlas6_clk_init()
Konrad Dybcio <konradybcio(a)gmail.com>
clk: qcom: gcc-sdm660: Fix up gcc_mss_mnoc_bimc_axi_clk
Wolfram Sang <wsa+renesas(a)sang-engineering.com>
i2c: rcar: slave: only send STOP event when we have been addressed
Liu Yi L <yi.l.liu(a)intel.com>
iommu/vt-d: Enforce PASID devTLB field mask
Jonathan Marek <jonathan(a)marek.ca>
clk: qcom: clk-alpha-pll: remove unused/incorrect PLL_CAL_VAL
Jonathan Marek <jonathan(a)marek.ca>
clk: qcom: gcc: fix sm8150 GPU and NPU clocks
Colin Ian King <colin.king(a)canonical.com>
iommu/omap: Check for failure of a call to omap_iommu_dump_ctx
Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
selftests/powerpc: ptrace-pkey: Don't update expected UAMOR value
Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
selftests/powerpc: ptrace-pkey: Update the test to mark an invalid pkey correctly
Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
selftests/powerpc: ptrace-pkey: Rename variables to make it easier to follow code
Cristian Ciocaltea <cristian.ciocaltea(a)gmail.com>
clk: actions: Fix h_clk for Actions S500 SoC
Ming Lei <ming.lei(a)redhat.com>
dm rq: don't call blk_mq_queue_stopped() in dm_stop_queue()
Steve Longerbeam <slongerbeam(a)gmail.com>
gpu: ipu-v3: image-convert: Wait for all EOFs before completing a tile
Steve Longerbeam <slongerbeam(a)gmail.com>
gpu: ipu-v3: image-convert: Combine rotate/no-rotate irq handlers
Herbert Xu <herbert(a)gondor.apana.org.au>
crypto: caam - Remove broken arc4 support
Yoshihiro Shimoda <yoshihiro.shimoda.uh(a)renesas.com>
mmc: renesas_sdhi_internal_dmac: clean up the code for dma complete
Mark Zhang <markz(a)mellanox.com>
RDMA/counter: Allow manually bind QPs with different pids to same counter
Mark Zhang <markz(a)mellanox.com>
RDMA/counter: Only bind user QPs in auto mode
Vladimir Oltean <vladimir.oltean(a)nxp.com>
devres: keep both device name and resource name in pretty name
Herbert Xu <herbert(a)gondor.apana.org.au>
crypto: af_alg - Fix regression on empty requests
Johan Hovold <johan(a)kernel.org>
USB: serial: ftdi_sio: fix break and sysrq handling
Johan Hovold <johan(a)kernel.org>
USB: serial: ftdi_sio: clean up receive processing
Johan Hovold <johan(a)kernel.org>
USB: serial: ftdi_sio: make process-packet buffer unsigned
Jesper Dangaard Brouer <brouer(a)redhat.com>
selftests/bpf: test_progs use another shell exit on non-actions
Jesper Dangaard Brouer <brouer(a)redhat.com>
selftests/bpf: Test_progs indicate to shell on non-actions
Yishai Hadas <yishaih(a)mellanox.com>
IB/uverbs: Set IOVA on IB MR in uverbs layer
Paul Kocialkowski <paul.kocialkowski(a)bootlin.com>
media: rockchip: rga: Only set output CSC mode for RGB input
Paul Kocialkowski <paul.kocialkowski(a)bootlin.com>
media: rockchip: rga: Introduce color fmt macros and refactor CSC mode logic
Jason Gunthorpe <jgg(a)nvidia.com>
RDMA/ipoib: Fix ABBA deadlock with ipoib_reap_ah()
Kamal Heib <kamalheib1(a)gmail.com>
RDMA/ipoib: Return void from ipoib_ib_dev_stop()
Qiushi Wu <wu000273(a)umn.edu>
platform/chrome: cros_ec_ishtp: Fix a double-unlock issue
Boris Brezillon <boris.brezillon(a)collabora.com>
mtd: rawnand: fsl_upm: Remove unused mtd var
Eric Dumazet <edumazet(a)google.com>
octeontx2-af: change (struct qmem)->entry_sz from u8 to u16
Charles Keepax <ckeepax(a)opensource.cirrus.com>
mfd: arizona: Ensure 32k clock is put on driver unbind and error
Herbert Xu <herbert(a)gondor.apana.org.au>
crypto: algif_aead - Only wake up when ctx->more is zero
Paul Cercueil <paul(a)crapouillou.net>
pinctrl: ingenic: Properly detect GPIO direction when configured for IRQ
Mike Marshall <hubcap(a)omnibond.com>
orangefs: get rid of knob code...
Liu Ying <victor.liu(a)nxp.com>
drm/imx: imx-ldb: Disable both channels for split mode in enc->disable()
Sibi Sankar <sibis(a)codeaurora.org>
remoteproc: qcom_q6v5_mss: Validate modem blob firmware size before load
Sibi Sankar <sibis(a)codeaurora.org>
remoteproc: qcom_q6v5_mss: Validate MBA firmware size before load
Sibi Sankar <sibis(a)codeaurora.org>
remoteproc: qcom: q6v5: Update running state before requesting stop
Adrian Hunter <adrian.hunter(a)intel.com>
perf intel-pt: Fix duplicate branch after CBR
Adrian Hunter <adrian.hunter(a)intel.com>
perf intel-pt: Fix FUP packet state
Kees Cook <keescook(a)chromium.org>
module: Correctly truncate sysfs sections output
Anton Blanchard <anton(a)ozlabs.org>
pseries: Fix 64 bit logical memory block panic
Jeff Layton <jlayton(a)kernel.org>
ceph: handle zero-length feature mask in session messages
Jeff Layton <jlayton(a)kernel.org>
ceph: set sec_context xattr on symlink creation
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
watchdog: f71808e_wdt: clear watchdog timeout occurred flag
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
watchdog: f71808e_wdt: remove use of wrong watchdog_info option
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
watchdog: f71808e_wdt: indicate WDIOF_CARDRESET support in watchdog_info.options
Steven Rostedt (VMware) <rostedt(a)goodmis.org>
tracing: Move pipe reference to trace array instead of current_tracer
Steven Rostedt (VMware) <rostedt(a)goodmis.org>
tracing: Use trace_sched_process_free() instead of exit() for pid tracing
Kevin Hao <haokexin(a)gmail.com>
tracing/hwlat: Honor the tracing_cpumask
Muchun Song <songmuchun(a)bytedance.com>
kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler
Chengming Zhou <zhouchengming(a)bytedance.com>
ftrace: Setup correct FTRACE_FL_REGS flags for module
Jia He <justin.he(a)arm.com>
mm/memory_hotplug: fix unpaired mem_hotplug_begin/done
Michal Koutný <mkoutny(a)suse.com>
mm/page_counter.c: fix protection usage propagation
Junxiao Bi <junxiao.bi(a)oracle.com>
ocfs2: change slot number type s16 to u16
Hugh Dickins <hughd(a)google.com>
khugepaged: collapse_pte_mapped_thp() protect the pmd lock
Hugh Dickins <hughd(a)google.com>
khugepaged: collapse_pte_mapped_thp() flush the right range
Mikulas Patocka <mpatocka(a)redhat.com>
ext2: fix missing percpu_counter_inc
Paul Cercueil <paul(a)crapouillou.net>
MIPS: qi_lb60: Fix routing to audio amplifier
Huacai Chen <chenhc(a)lemote.com>
MIPS: CPU#0 is not hotpluggable
Lukas Wunner <lukas(a)wunner.de>
driver core: Avoid binding drivers to dead devices
Johannes Berg <johannes.berg(a)intel.com>
mac80211: fix misplaced while instead of if
Coly Li <colyli(a)suse.de>
bcache: fix overflow in offset_to_stripe()
Coly Li <colyli(a)suse.de>
bcache: allocate meta data pages as compound pages
ChangSyun Peng <allenpeng(a)synology.com>
md/raid5: Fix Force reconstruct-write io stuck in degraded raid5
Kees Cook <keescook(a)chromium.org>
net/compat: Add missing sock updates for SCM_RIGHTS
Jonathan McDowell <noodles(a)earth.li>
net: stmmac: dwmac1000: provide multicast filter fallback
Jonathan McDowell <noodles(a)earth.li>
net: ethernet: stmmac: Disable hardware multicast filter
Eugeniu Rosca <erosca(a)de.adit-jv.com>
media: vsp1: dl: Fix NULL pointer dereference on unbind
Paul Cercueil <paul(a)crapouillou.net>
pinctrl: ingenic: Enhance support for IRQ_TYPE_EDGE_BOTH
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc: Fix circular dependency between percpu.h and mmu.h
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc: Allow 4224 bytes of stack expansion for the signal frame
Christophe Leroy <christophe.leroy(a)csgroup.eu>
powerpc/ptdump: Fix build failure in hashpagetable.c
Paul Aurich <paul(a)darkrain42.org>
cifs: Fix leak when handling lease break for cached root fid
Max Filippov <jcmvbkbc(a)gmail.com>
xtensa: fix xtensa_pmu_setup prototype
Max Filippov <jcmvbkbc(a)gmail.com>
xtensa: add missing exclusive access state management
Alexandru Ardelean <alexandru.ardelean(a)analog.com>
iio: dac: ad5592r: fix unbalanced mutex unlocks in ad5592r_read_raw()
Christian Eggers <ceggers(a)arri.de>
dt-bindings: iio: io-channel-mux: Fix compatible string in example code
Shaokun Zhang <zhangshaokun(a)hisilicon.com>
arm64: perf: Correct the event index in sysfs
Pavel Machek <pavel(a)denx.de>
btrfs: fix return value mixup in btrfs_get_extent
Josef Bacik <josef(a)toxicpanda.com>
btrfs: make sure SB_I_VERSION doesn't get unset by remount
Filipe Manana <fdmanana(a)suse.com>
btrfs: fix memory leaks after failure to lookup checksums during inode logging
Qu Wenruo <wqu(a)suse.com>
btrfs: inode: fix NULL pointer dereference if inode doesn't need compression
Josef Bacik <josef(a)toxicpanda.com>
btrfs: only search for left_info if there is no right_info in try_merge_free_space
David Sterba <dsterba(a)suse.com>
btrfs: fix messages after changing compression level by remount
Filipe Manana <fdmanana(a)suse.com>
btrfs: fix race between page release and a fast fsync
Josef Bacik <josef(a)toxicpanda.com>
btrfs: don't WARN if we abort a transaction with EROFS
Josef Bacik <josef(a)toxicpanda.com>
btrfs: sysfs: use NOFS for device creation
Qu Wenruo <wqu(a)suse.com>
btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree
David Sterba <dsterba(a)suse.com>
btrfs: add missing check for nocow and compression inode flags
Qu Wenruo <wqu(a)suse.com>
btrfs: relocation: review the call sites which can be interrupted by signal
Josef Bacik <josef(a)toxicpanda.com>
btrfs: move the chunk_mutex in btrfs_read_chunk_tree
Josef Bacik <josef(a)toxicpanda.com>
btrfs: open device without device_list_mutex
Anand Jain <anand.jain(a)oracle.com>
btrfs: don't traverse into the seed devices in show_devname
Filipe Manana <fdmanana(a)suse.com>
btrfs: remove no longer needed use of log_writers for the log root tree
Filipe Manana <fdmanana(a)suse.com>
btrfs: stop incremening log_batch for the log root tree when syncing log
Tom Rix <trix(a)redhat.com>
btrfs: ref-verify: fix memory leak in add_block_entry
Qu Wenruo <wqu(a)suse.com>
btrfs: don't allocate anonymous block device for user invisible roots
Qu Wenruo <wqu(a)suse.com>
btrfs: free anon block device right after subvolume deletion
David Sterba <dsterba(a)suse.com>
btrfs: allow use of global block reserve for balance item deletion
Ansuel Smith <ansuelsmth(a)gmail.com>
PCI: qcom: Add support for tx term offset for rev 2.1.0
Ansuel Smith <ansuelsmth(a)gmail.com>
PCI: qcom: Define some PARF params needed for ipq8064 SoC
Rajat Jain <rajatja(a)google.com>
PCI: Add device even if driver attach failed
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
PCI: Mark AMD Navi10 GPU rev 0x00 ATS as broken
Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
PCI: hotplug: ACPI: Fix context refcounting in acpiphp_grab_context()
Guenter Roeck <linux(a)roeck-us.net>
genirq/PM: Always unlock IRQ descriptor in rearm_wake_irq()
Thomas Gleixner <tglx(a)linutronix.de>
genirq/affinity: Make affinity setting if activated opt-in
Steve French <stfrench(a)microsoft.com>
smb3: warn on confusing error scenario with sec=krb5
-------------
Diffstat:
.../bindings/iio/multiplexer/io-channel-mux.txt | 2 +-
Makefile | 4 +-
.../boot/dts/marvell/armada-3720-espressobin.dts | 6 +
arch/arm64/kernel/perf_event.c | 13 +-
arch/mips/boot/dts/ingenic/qi_lb60.dts | 2 +-
arch/mips/kernel/topology.c | 2 +-
arch/openrisc/kernel/stacktrace.c | 18 ++-
arch/powerpc/include/asm/percpu.h | 4 +-
arch/powerpc/mm/fault.c | 7 +-
arch/powerpc/mm/ptdump/hashpagetable.c | 2 +-
arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +-
arch/sh/boards/mach-landisk/setup.c | 3 +
arch/x86/events/rapl.c | 2 +-
arch/x86/kernel/apic/vector.c | 4 +
arch/x86/kernel/tsc_msr.c | 9 +-
arch/xtensa/include/asm/thread_info.h | 4 +
arch/xtensa/kernel/asm-offsets.c | 3 +
arch/xtensa/kernel/entry.S | 11 ++
arch/xtensa/kernel/perf_event.c | 2 +-
crypto/af_alg.c | 11 +-
crypto/algif_aead.c | 10 +-
crypto/algif_skcipher.c | 11 +-
drivers/base/dd.c | 4 +-
drivers/clk/actions/owl-s500.c | 2 +-
drivers/clk/bcm/clk-bcm2835.c | 25 +++-
drivers/clk/qcom/clk-alpha-pll.c | 2 -
drivers/clk/qcom/gcc-sdm660.c | 3 +
drivers/clk/qcom/gcc-sm8150.c | 8 +-
drivers/clk/sirf/clk-atlas6.c | 2 +-
drivers/crypto/caam/caamalg.c | 29 -----
drivers/crypto/caam/compat.h | 1 -
.../drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c | 69 +++++++++-
drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 5 +-
drivers/gpu/drm/drm_dp_mst_topology.c | 7 +-
drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 +
drivers/gpu/drm/imx/imx-ldb.c | 7 +-
drivers/gpu/drm/panfrost/panfrost_gem.c | 2 +-
drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +-
drivers/gpu/drm/radeon/ni_dpm.c | 2 +-
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 +-
drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 5 +-
drivers/gpu/ipu-v3/ipu-image-convert.c | 145 +++++++++++++--------
drivers/i2c/busses/i2c-bcm-iproc.c | 13 +-
drivers/i2c/busses/i2c-rcar.c | 15 ++-
drivers/iio/dac/ad5592r-base.c | 4 +-
drivers/infiniband/core/counters.c | 4 +-
drivers/infiniband/core/uverbs_cmd.c | 4 +
drivers/infiniband/hw/cxgb4/mem.c | 1 -
drivers/infiniband/hw/mlx4/mr.c | 1 -
drivers/infiniband/ulp/ipoib/ipoib.h | 2 +-
drivers/infiniband/ulp/ipoib/ipoib_ib.c | 67 +++++-----
drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +
drivers/input/mouse/sentelic.c | 2 +-
drivers/iommu/omap-iommu-debug.c | 3 +
drivers/irqchip/irq-gic-v3-its.c | 5 +-
drivers/md/bcache/bcache.h | 2 +-
drivers/md/bcache/bset.c | 2 +-
drivers/md/bcache/btree.c | 2 +-
drivers/md/bcache/journal.c | 4 +-
drivers/md/bcache/super.c | 2 +-
drivers/md/bcache/writeback.c | 14 +-
drivers/md/bcache/writeback.h | 19 ++-
drivers/md/dm-rq.c | 3 -
drivers/md/md-cluster.c | 1 +
drivers/md/raid5.c | 3 +-
drivers/media/platform/rockchip/rga/rga-hw.c | 29 +++--
drivers/media/platform/rockchip/rga/rga-hw.h | 5 +
drivers/media/platform/vsp1/vsp1_dl.c | 2 +
drivers/mfd/arizona-core.c | 18 +++
drivers/mfd/dln2.c | 4 +
drivers/mmc/host/renesas_sdhi_internal_dmac.c | 18 ++-
drivers/mtd/nand/raw/fsl_upm.c | 1 -
drivers/net/ethernet/marvell/octeontx2/af/common.h | 2 +-
drivers/net/ethernet/qualcomm/emac/emac.c | 17 ++-
.../net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 1 +
.../net/ethernet/stmicro/stmmac/dwmac1000_core.c | 3 +
drivers/nvdimm/security.c | 13 +-
drivers/pci/bus.c | 6 +-
drivers/pci/controller/dwc/pcie-qcom.c | 41 +++++-
drivers/pci/hotplug/acpiphp_glue.c | 14 +-
drivers/pci/quirks.c | 5 +-
drivers/pinctrl/pinctrl-ingenic.c | 9 +-
drivers/platform/chrome/cros_ec_ishtp.c | 4 +-
drivers/pwm/pwm-bcm-iproc.c | 9 +-
drivers/remoteproc/qcom_q6v5.c | 2 +
drivers/remoteproc/qcom_q6v5_mss.c | 11 +-
drivers/scsi/lpfc/lpfc_nvmet.c | 2 +-
drivers/usb/serial/ftdi_sio.c | 57 ++++----
drivers/watchdog/f71808e_wdt.c | 13 +-
drivers/watchdog/watchdog_dev.c | 18 +--
fs/btrfs/ctree.h | 4 +-
fs/btrfs/disk-io.c | 13 +-
fs/btrfs/extent-tree.c | 9 +-
fs/btrfs/extent_io.c | 16 ++-
fs/btrfs/free-space-cache.c | 4 +-
fs/btrfs/inode.c | 20 ++-
fs/btrfs/ioctl.c | 30 +++--
fs/btrfs/ref-verify.c | 2 +
fs/btrfs/relocation.c | 12 +-
fs/btrfs/super.c | 41 +++---
fs/btrfs/sysfs.c | 3 +
fs/btrfs/tree-log.c | 22 +---
fs/btrfs/volumes.c | 44 ++++++-
fs/ceph/dir.c | 4 +
fs/ceph/mds_client.c | 6 +-
fs/cifs/smb2misc.c | 73 ++++++++---
fs/cifs/smb2pdu.c | 2 +
fs/ext2/ialloc.c | 3 +-
fs/minix/inode.c | 12 +-
fs/minix/itree_v1.c | 12 +-
fs/minix/itree_v2.c | 13 +-
fs/minix/minix.h | 1 -
fs/nfs/file.c | 17 ++-
fs/nfs/nfs4file.c | 5 +-
fs/nfs/nfs4proc.c | 2 -
fs/nfs/nfs4xdr.c | 6 +-
fs/ocfs2/ocfs2.h | 4 +-
fs/ocfs2/suballoc.c | 4 +-
fs/ocfs2/super.c | 4 +-
fs/orangefs/file.c | 26 +---
fs/orangefs/inode.c | 39 +-----
fs/orangefs/orangefs-kernel.h | 4 -
fs/ubifs/journal.c | 10 +-
fs/ufs/super.c | 2 +-
include/crypto/if_alg.h | 4 +-
include/linux/intel-iommu.h | 4 +-
include/linux/irq.h | 13 ++
include/net/sock.h | 4 +
kernel/irq/manage.c | 6 +-
kernel/irq/pm.c | 8 +-
kernel/kprobes.c | 7 +
kernel/module.c | 22 +++-
kernel/trace/ftrace.c | 15 ++-
kernel/trace/trace.c | 12 +-
kernel/trace/trace.h | 2 +-
kernel/trace/trace_events.c | 4 +-
kernel/trace/trace_hwlat.c | 5 +-
lib/devres.c | 11 +-
lib/test_kmod.c | 2 +-
mm/khugepaged.c | 70 +++++-----
mm/memory_hotplug.c | 5 +-
mm/page_counter.c | 6 +-
net/compat.c | 1 +
net/core/sock.c | 21 +++
net/mac80211/sta_info.c | 2 +-
scripts/recordmcount.c | 2 +
sound/pci/echoaudio/echoaudio.c | 2 -
tools/build/Makefile.feature | 2 +-
tools/build/feature/Makefile | 2 -
tools/perf/bench/mem-functions.c | 21 +--
.../perf/util/intel-pt-decoder/intel-pt-decoder.c | 29 ++---
tools/testing/selftests/bpf/test_progs.c | 5 +
.../testing/selftests/powerpc/ptrace/ptrace-pkey.c | 55 ++++----
153 files changed, 1120 insertions(+), 636 deletions(-)
From: Randy Dunlap <rdunlap(a)infradead.org>
[ Upstream commit c7fabbc51352f50cc58242a6dc3b9c1a3599849b ]
Drop duplicated words in sound/pci/.
{and, the, at}
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Link: https://lore.kernel.org/r/20200806021926.32418-1-rdunlap@infradead.org
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
sound/pci/cs46xx/cs46xx_lib.c | 2 +-
sound/pci/cs46xx/dsp_spos_scb_lib.c | 2 +-
sound/pci/hda/hda_codec.c | 2 +-
sound/pci/hda/hda_generic.c | 2 +-
sound/pci/hda/patch_sigmatel.c | 2 +-
sound/pci/ice1712/prodigy192.c | 2 +-
sound/pci/oxygen/xonar_dg.c | 2 +-
7 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c
index 2706f271a83b0..8a174c170e0aa 100644
--- a/sound/pci/cs46xx/cs46xx_lib.c
+++ b/sound/pci/cs46xx/cs46xx_lib.c
@@ -780,7 +780,7 @@ static void snd_cs46xx_set_capture_sample_rate(struct snd_cs46xx *chip, unsigned
rate = 48000 / 9;
/*
- * We can not capture at at rate greater than the Input Rate (48000).
+ * We can not capture at a rate greater than the Input Rate (48000).
* Return an error if an attempt is made to stray outside that limit.
*/
if (rate > 48000)
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 7488e1b7a7707..4e726d39b05d1 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -1742,7 +1742,7 @@ int cs46xx_iec958_pre_open (struct snd_cs46xx *chip)
struct dsp_spos_instance * ins = chip->dsp_spos_instance;
if ( ins->spdif_status_out & DSP_SPDIF_STATUS_OUTPUT_ENABLED ) {
- /* remove AsynchFGTxSCB and and PCMSerialInput_II */
+ /* remove AsynchFGTxSCB and PCMSerialInput_II */
cs46xx_dsp_disable_spdif_out (chip);
/* save state */
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 825d9b27dbe12..4962a9d8a572b 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3496,7 +3496,7 @@ EXPORT_SYMBOL_GPL(snd_hda_set_power_save);
* @nid: NID to check / update
*
* Check whether the given NID is in the amp list. If it's in the list,
- * check the current AMP status, and update the the power-status according
+ * check the current AMP status, and update the power-status according
* to the mute status.
*
* This function is supposed to be set or called from the check_power_status
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 869c322ddae31..7cd1047a4edf3 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -837,7 +837,7 @@ static void activate_amp_in(struct hda_codec *codec, struct nid_path *path,
}
}
-/* sync power of each widget in the the given path */
+/* sync power of each widget in the given path */
static hda_nid_t path_power_update(struct hda_codec *codec,
struct nid_path *path,
bool allow_powerdown)
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index d1a6d20ace0da..80b72d0702c5e 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -862,7 +862,7 @@ static int stac_auto_create_beep_ctls(struct hda_codec *codec,
static struct snd_kcontrol_new beep_vol_ctl =
HDA_CODEC_VOLUME(NULL, 0, 0, 0);
- /* check for mute support for the the amp */
+ /* check for mute support for the amp */
if ((caps & AC_AMPCAP_MUTE) >> AC_AMPCAP_MUTE_SHIFT) {
const struct snd_kcontrol_new *temp;
if (spec->anabeep_nid == nid)
diff --git a/sound/pci/ice1712/prodigy192.c b/sound/pci/ice1712/prodigy192.c
index 3919aed39ca03..5e52086d7b986 100644
--- a/sound/pci/ice1712/prodigy192.c
+++ b/sound/pci/ice1712/prodigy192.c
@@ -31,7 +31,7 @@
* Experimentally I found out that only a combination of
* OCKS0=1, OCKS1=1 (128fs, 64fs output) and ice1724 -
* VT1724_MT_I2S_MCLK_128X=0 (256fs input) yields correct
- * sampling rate. That means the the FPGA doubles the
+ * sampling rate. That means that the FPGA doubles the
* MCK01 rate.
*
* Copyright (c) 2003 Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c
index 4cf3200e988b0..df44135e1b0c9 100644
--- a/sound/pci/oxygen/xonar_dg.c
+++ b/sound/pci/oxygen/xonar_dg.c
@@ -39,7 +39,7 @@
* GPIO 4 <- headphone detect
* GPIO 5 -> enable ADC analog circuit for the left channel
* GPIO 6 -> enable ADC analog circuit for the right channel
- * GPIO 7 -> switch green rear output jack between CS4245 and and the first
+ * GPIO 7 -> switch green rear output jack between CS4245 and the first
* channel of CS4361 (mechanical relay)
* GPIO 8 -> enable output to speakers
*
--
2.25.1
From: Randy Dunlap <rdunlap(a)infradead.org>
[ Upstream commit c7fabbc51352f50cc58242a6dc3b9c1a3599849b ]
Drop duplicated words in sound/pci/.
{and, the, at}
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Link: https://lore.kernel.org/r/20200806021926.32418-1-rdunlap@infradead.org
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
sound/pci/cs46xx/cs46xx_lib.c | 2 +-
sound/pci/cs46xx/dsp_spos_scb_lib.c | 2 +-
sound/pci/hda/hda_codec.c | 2 +-
sound/pci/hda/hda_generic.c | 2 +-
sound/pci/hda/patch_sigmatel.c | 2 +-
sound/pci/ice1712/prodigy192.c | 2 +-
sound/pci/oxygen/xonar_dg.c | 2 +-
7 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c
index 528102cc2d5d0..d824ff4ae3e3b 100644
--- a/sound/pci/cs46xx/cs46xx_lib.c
+++ b/sound/pci/cs46xx/cs46xx_lib.c
@@ -780,7 +780,7 @@ static void snd_cs46xx_set_capture_sample_rate(struct snd_cs46xx *chip, unsigned
rate = 48000 / 9;
/*
- * We can not capture at at rate greater than the Input Rate (48000).
+ * We can not capture at a rate greater than the Input Rate (48000).
* Return an error if an attempt is made to stray outside that limit.
*/
if (rate > 48000)
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 7488e1b7a7707..4e726d39b05d1 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -1742,7 +1742,7 @@ int cs46xx_iec958_pre_open (struct snd_cs46xx *chip)
struct dsp_spos_instance * ins = chip->dsp_spos_instance;
if ( ins->spdif_status_out & DSP_SPDIF_STATUS_OUTPUT_ENABLED ) {
- /* remove AsynchFGTxSCB and and PCMSerialInput_II */
+ /* remove AsynchFGTxSCB and PCMSerialInput_II */
cs46xx_dsp_disable_spdif_out (chip);
/* save state */
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index cbe0248225c1c..4e67614f15f8e 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3496,7 +3496,7 @@ EXPORT_SYMBOL_GPL(snd_hda_set_power_save);
* @nid: NID to check / update
*
* Check whether the given NID is in the amp list. If it's in the list,
- * check the current AMP status, and update the the power-status according
+ * check the current AMP status, and update the power-status according
* to the mute status.
*
* This function is supposed to be set or called from the check_power_status
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 949c90a859fab..184089c5e8cbc 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -820,7 +820,7 @@ static void activate_amp_in(struct hda_codec *codec, struct nid_path *path,
}
}
-/* sync power of each widget in the the given path */
+/* sync power of each widget in the given path */
static hda_nid_t path_power_update(struct hda_codec *codec,
struct nid_path *path,
bool allow_powerdown)
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index d1a6d20ace0da..80b72d0702c5e 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -862,7 +862,7 @@ static int stac_auto_create_beep_ctls(struct hda_codec *codec,
static struct snd_kcontrol_new beep_vol_ctl =
HDA_CODEC_VOLUME(NULL, 0, 0, 0);
- /* check for mute support for the the amp */
+ /* check for mute support for the amp */
if ((caps & AC_AMPCAP_MUTE) >> AC_AMPCAP_MUTE_SHIFT) {
const struct snd_kcontrol_new *temp;
if (spec->anabeep_nid == nid)
diff --git a/sound/pci/ice1712/prodigy192.c b/sound/pci/ice1712/prodigy192.c
index 3919aed39ca03..5e52086d7b986 100644
--- a/sound/pci/ice1712/prodigy192.c
+++ b/sound/pci/ice1712/prodigy192.c
@@ -31,7 +31,7 @@
* Experimentally I found out that only a combination of
* OCKS0=1, OCKS1=1 (128fs, 64fs output) and ice1724 -
* VT1724_MT_I2S_MCLK_128X=0 (256fs input) yields correct
- * sampling rate. That means the the FPGA doubles the
+ * sampling rate. That means that the FPGA doubles the
* MCK01 rate.
*
* Copyright (c) 2003 Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c
index 4cf3200e988b0..df44135e1b0c9 100644
--- a/sound/pci/oxygen/xonar_dg.c
+++ b/sound/pci/oxygen/xonar_dg.c
@@ -39,7 +39,7 @@
* GPIO 4 <- headphone detect
* GPIO 5 -> enable ADC analog circuit for the left channel
* GPIO 6 -> enable ADC analog circuit for the right channel
- * GPIO 7 -> switch green rear output jack between CS4245 and and the first
+ * GPIO 7 -> switch green rear output jack between CS4245 and the first
* channel of CS4361 (mechanical relay)
* GPIO 8 -> enable output to speakers
*
--
2.25.1
From: Randy Dunlap <rdunlap(a)infradead.org>
[ Upstream commit c7fabbc51352f50cc58242a6dc3b9c1a3599849b ]
Drop duplicated words in sound/pci/.
{and, the, at}
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Link: https://lore.kernel.org/r/20200806021926.32418-1-rdunlap@infradead.org
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
sound/pci/cs46xx/cs46xx_lib.c | 2 +-
sound/pci/cs46xx/dsp_spos_scb_lib.c | 2 +-
sound/pci/hda/hda_codec.c | 2 +-
sound/pci/hda/hda_generic.c | 2 +-
sound/pci/hda/patch_sigmatel.c | 2 +-
sound/pci/ice1712/prodigy192.c | 2 +-
sound/pci/oxygen/xonar_dg.c | 2 +-
7 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c
index 0020fd0efc466..09c547f4cc186 100644
--- a/sound/pci/cs46xx/cs46xx_lib.c
+++ b/sound/pci/cs46xx/cs46xx_lib.c
@@ -780,7 +780,7 @@ static void snd_cs46xx_set_capture_sample_rate(struct snd_cs46xx *chip, unsigned
rate = 48000 / 9;
/*
- * We can not capture at at rate greater than the Input Rate (48000).
+ * We can not capture at a rate greater than the Input Rate (48000).
* Return an error if an attempt is made to stray outside that limit.
*/
if (rate > 48000)
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 7488e1b7a7707..4e726d39b05d1 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -1742,7 +1742,7 @@ int cs46xx_iec958_pre_open (struct snd_cs46xx *chip)
struct dsp_spos_instance * ins = chip->dsp_spos_instance;
if ( ins->spdif_status_out & DSP_SPDIF_STATUS_OUTPUT_ENABLED ) {
- /* remove AsynchFGTxSCB and and PCMSerialInput_II */
+ /* remove AsynchFGTxSCB and PCMSerialInput_II */
cs46xx_dsp_disable_spdif_out (chip);
/* save state */
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 7d65fe31c8257..a56f018d586f5 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3394,7 +3394,7 @@ EXPORT_SYMBOL_GPL(snd_hda_set_power_save);
* @nid: NID to check / update
*
* Check whether the given NID is in the amp list. If it's in the list,
- * check the current AMP status, and update the the power-status according
+ * check the current AMP status, and update the power-status according
* to the mute status.
*
* This function is supposed to be set or called from the check_power_status
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 28ef409a9e6ae..9dee657ce9e27 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -823,7 +823,7 @@ static void activate_amp_in(struct hda_codec *codec, struct nid_path *path,
}
}
-/* sync power of each widget in the the given path */
+/* sync power of each widget in the given path */
static hda_nid_t path_power_update(struct hda_codec *codec,
struct nid_path *path,
bool allow_powerdown)
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 7cd147411b22d..f7896a9ae3d65 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -863,7 +863,7 @@ static int stac_auto_create_beep_ctls(struct hda_codec *codec,
static struct snd_kcontrol_new beep_vol_ctl =
HDA_CODEC_VOLUME(NULL, 0, 0, 0);
- /* check for mute support for the the amp */
+ /* check for mute support for the amp */
if ((caps & AC_AMPCAP_MUTE) >> AC_AMPCAP_MUTE_SHIFT) {
const struct snd_kcontrol_new *temp;
if (spec->anabeep_nid == nid)
diff --git a/sound/pci/ice1712/prodigy192.c b/sound/pci/ice1712/prodigy192.c
index 3919aed39ca03..5e52086d7b986 100644
--- a/sound/pci/ice1712/prodigy192.c
+++ b/sound/pci/ice1712/prodigy192.c
@@ -31,7 +31,7 @@
* Experimentally I found out that only a combination of
* OCKS0=1, OCKS1=1 (128fs, 64fs output) and ice1724 -
* VT1724_MT_I2S_MCLK_128X=0 (256fs input) yields correct
- * sampling rate. That means the the FPGA doubles the
+ * sampling rate. That means that the FPGA doubles the
* MCK01 rate.
*
* Copyright (c) 2003 Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c
index 4cf3200e988b0..df44135e1b0c9 100644
--- a/sound/pci/oxygen/xonar_dg.c
+++ b/sound/pci/oxygen/xonar_dg.c
@@ -39,7 +39,7 @@
* GPIO 4 <- headphone detect
* GPIO 5 -> enable ADC analog circuit for the left channel
* GPIO 6 -> enable ADC analog circuit for the right channel
- * GPIO 7 -> switch green rear output jack between CS4245 and and the first
+ * GPIO 7 -> switch green rear output jack between CS4245 and the first
* channel of CS4361 (mechanical relay)
* GPIO 8 -> enable output to speakers
*
--
2.25.1
From: Marcos Paulo de Souza <mpdesouza(a)suse.com>
[BUG]
After commit 9afc66498a0b ("btrfs: block-group: refactor how we read one
block group item"), cache->length is being assigned after calling
btrfs_create_block_group_cache. This causes a problem since
set_free_space_tree_thresholds is calculate the free-space threshould to
decide is the free-space tree should convert from extents to bitmaps.
The current code calls set_free_space_tree_thresholds with cache->length
being 0, which then makes cache->bitmap_high_thresh being zero. This
implies the system will always use bitmap instead of extents, which is
not desired if the block group is not fragmented.
This behavior can be seen by a test that expects to repair systems
with FREE_SPACE_EXTENT and FREE_SPACE_BITMAP, but the current code only
created FREE_SPACE_BITMAP.
[FIX]
Call set_free_space_tree_thresholds after setting cache->length.
Link: https://github.com/kdave/btrfs-progs/issues/251
Fixes: 9afc66498a0b ("btrfs: block-group: refactor how we read one block group item")
CC: stable(a)vger.kernel.org # 5.8+
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Marcos Paulo de Souza <mpdesouza(a)suse.com>
---
Changes from v1:
* Add a warning in set_free_space_tree_thresholds when bg->length is zero (Qu)
fs/btrfs/block-group.c | 4 +++-
fs/btrfs/free-space-tree.c | 3 +++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 44fdfa2eeb2e..01e8ba1da1d3 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1798,7 +1798,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
- set_free_space_tree_thresholds(cache);
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
@@ -1908,6 +1907,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
read_block_group_item(cache, path, key);
+ set_free_space_tree_thresholds(cache);
+
if (need_clear) {
/*
* When we mount with old space cache, we need to
@@ -2128,6 +2129,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
return -ENOMEM;
cache->length = size;
+ set_free_space_tree_thresholds(cache);
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 8b1f5c8897b7..1d191fbc754b 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -22,6 +22,9 @@ void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
size_t bitmap_size;
u64 num_bitmaps, total_bitmap_size;
+ if (cache->length == 0)
+ btrfs_warn(cache->fs_info, "block group length is zero");
+
/*
* We convert to bitmaps when the disk space required for using extents
* exceeds that required for using bitmaps.
--
2.28.0
From: Andy Lutomirski <luto(a)kernel.org>
Trying to clear DR7 around a #DB from usermode malfunctions if we
schedule when delivering SIGTRAP. Rather than trying to define a
special no-recursion region, just allow a single level of recursion.
We do the same thing for NMI, and it hasn't caused any problems yet.
Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling")
Reported-by: Kyle Huey <me(a)kylehuey.com>
Debugged-by: Josh Poimboeuf <jpoimboe(a)redhat.com>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.15978823…
---
arch/x86/kernel/traps.c | 65 ++++++++++++++++++++++--------------------------
1 file changed, 31 insertions(+), 34 deletions(-)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struc
#endif
}
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
{
- /*
- * Disable breakpoints during exception handling; recursive exceptions
- * are exceedingly 'fun'.
- *
- * Since this function is NOKPROBE, and that also applies to
- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
- * HW_BREAKPOINT_W on our stack)
- *
- * Entry text is excluded for HW_BP_X and cpu_entry_area, which
- * includes the entry stack is excluded for everything.
- */
- *dr7 = local_db_save();
+ unsigned long dr6;
/*
* The Intel SDM says:
@@ -755,15 +744,12 @@ static __always_inline void debug_enter(
*
* Keep it simple: clear DR6 immediately.
*/
- get_debugreg(*dr6, 6);
+ get_debugreg(dr6, 6);
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
- *dr6 &= ~DR6_RESERVED;
-}
+ dr6 &= ~DR6_RESERVED;
-static __always_inline void debug_exit(unsigned long dr7)
-{
- local_db_restore(dr7);
+ return dr6;
}
/*
@@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * Disable breakpoints during exception handling; recursive exceptions
+ * are exceedingly 'fun'.
+ *
+ * Since this function is NOKPROBE, and that also applies to
+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+ * HW_BREAKPOINT_W on our stack)
+ *
+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+ * includes the entry stack is excluded for everything.
+ */
+ unsigned long dr7 = local_db_save();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
@@ -883,6 +881,8 @@ static __always_inline void exc_debug_ke
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
+
+ local_db_restore(dr7);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +894,15 @@ static __always_inline void exc_debug_us
*/
WARN_ON_ONCE(!user_mode(regs));
+ /*
+ * NB: We can't easily clear DR7 here because
+ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+ * user memory, etc. This means that a recursive #DB is possible. If
+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+ * Since we're not on the IST stack right now, everything will be
+ * fine.
+ */
+
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
@@ -907,36 +916,24 @@ static __always_inline void exc_debug_us
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_kernel(regs, dr6);
- debug_exit(dr7);
+ exc_debug_kernel(regs, debug_read_clear_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_user(regs, dr6);
- debug_exit(dr7);
+ exc_debug_user(regs, debug_read_clear_dr6());
}
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
+ unsigned long dr6 = debug_read_clear_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
else
exc_debug_kernel(regs, dr6);
-
- debug_exit(dr7);
}
#endif
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Sometimes re-plugging a USB device during system sleep renders the device
useless:
[ 173.418345] xhci_hcd 0000:00:14.0: Get port status 2-4 read: 0x14203e2, return 0x10262
...
[ 176.496485] usb 2-4: Waited 2000ms for CONNECT
[ 176.496781] usb usb2-port4: status 0000.0262 after resume, -19
[ 176.497103] usb 2-4: can't resume, status -19
[ 176.497438] usb usb2-port4: logical disconnect
Because PLS equals to XDEV_RESUME, xHCI driver reports U3 to usbcore,
despite of CAS bit is flagged.
So proritize CAS over XDEV_RESUME to let usbcore handle warm-reset for
the port.
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-hub.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index c3554e37e09f..4e14e164cb68 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -740,15 +740,6 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci,
{
u32 pls = status_reg & PORT_PLS_MASK;
- /* resume state is a xHCI internal state.
- * Do not report it to usb core, instead, pretend to be U3,
- * thus usb core knows it's not ready for transfer
- */
- if (pls == XDEV_RESUME) {
- *status |= USB_SS_PORT_LS_U3;
- return;
- }
-
/* When the CAS bit is set then warm reset
* should be performed on port
*/
@@ -770,6 +761,16 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci,
*/
pls |= USB_PORT_STAT_CONNECTION;
} else {
+ /*
+ * Resume state is an xHCI internal state. Do not report it to
+ * usb core, instead, pretend to be U3, thus usb core knows
+ * it's not ready for transfer.
+ */
+ if (pls == XDEV_RESUME) {
+ *status |= USB_SS_PORT_LS_U3;
+ return;
+ }
+
/*
* If CAS bit isn't set but the Port is already at
* Compliance Mode, fake a connection so the USB core
--
2.17.1
From: Marcos Paulo de Souza <mpdesouza(a)suse.com>
[BUG]
After commit 9afc66498a0b ("btrfs: block-group: refactor how we read one
block group item"), cache->length is being assigned after calling
btrfs_create_block_group_cache. This causes a problem since
set_free_space_tree_thresholds is calculate the free-space threshould to
decide is the free-space tree should convert from extents to bitmaps.
The current code calls set_free_space_tree_thresholds with cache->length
being 0, which then makes cache->bitmap_high_thresh being zero. This
implies the system will always use bitmap instead of extents, which is
not desired if the block group is not fragmented.
This behavior can be seen by a test that expects to repair systems
with FREE_SPACE_EXTENT and FREE_SPACE_BITMAP, but the current code only
created FREE_SPACE_BITMAP.
[FIX]
Call set_free_space_tree_thresholds after setting cache->length.
Link: https://github.com/kdave/btrfs-progs/issues/251
Fixes: 9afc66498a0b ("btrfs: block-group: refactor how we read one block group item")
CC: stable(a)vger.kernel.org # 5.8+
Signed-off-by: Marcos Paulo de Souza <mpdesouza(a)suse.com>
---
fs/btrfs/block-group.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 44fdfa2eeb2e..01e8ba1da1d3 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1798,7 +1798,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
- set_free_space_tree_thresholds(cache);
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
@@ -1908,6 +1907,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
read_block_group_item(cache, path, key);
+ set_free_space_tree_thresholds(cache);
+
if (need_clear) {
/*
* When we mount with old space cache, we need to
@@ -2128,6 +2129,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
return -ENOMEM;
cache->length = size;
+ set_free_space_tree_thresholds(cache);
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
--
2.28.0
SWP_FS is used to make swap_{read,write}page() go through
the filesystem, and it's only used for swap files over
NFS. So, !SWP_FS means non NFS for now, it could be either
file backed or device backed. Something similar goes with
legacy SWP_FILE.
So in order to achieve the goal of the original patch,
SWP_BLKDEV should be used instead.
FS corruption can be observed with SSD device + XFS +
fragmented swapfile due to CONFIG_THP_SWAP=y.
I reproduced the issue with the following details:
Environment:
QEMU + upstream kernel + buildroot + NVMe (2 GB)
Kernel config:
CONFIG_BLK_DEV_NVME=y
CONFIG_THP_SWAP=y
Some reproducable steps:
mkfs.xfs -f /dev/nvme0n1
mkdir /tmp/mnt
mount /dev/nvme0n1 /tmp/mnt
bs="32k"
sz="1024m" # doesn't matter too much, I also tried 16m
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -F -S 0 -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fsync" /tmp/mnt/sw
mkswap /tmp/mnt/sw
swapon /tmp/mnt/sw
stress --vm 2 --vm-bytes 600M # doesn't matter too much as well
Symptoms:
- FS corruption (e.g. checksum failure)
- memory corruption at: 0xd2808010
- segfault
Fixes: f0eea189e8e9 ("mm, THP, swap: Don't allocate huge cluster for file backed swap device")
Fixes: 38d8b4e6bdc8 ("mm, THP, swap: delay splitting THP during swap out")
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: Rafael Aquini <aquini(a)redhat.com>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Gao Xiang <hsiangkao(a)redhat.com>
---
v1: https://lore.kernel.org/r/20200819195613.24269-1-hsiangkao@redhat.com
changes since v1:
- improve commit message description
Hi Andrew,
Kindly consider this one instead if no other concerns...
Thanks,
Gao Xiang
mm/swapfile.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c26916e95fd..2937daf3ca02 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1074,7 +1074,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
goto nextsi;
}
if (size == SWAPFILE_CLUSTER) {
- if (!(si->flags & SWP_FS))
+ if (si->flags & SWP_BLKDEV)
n_ret = swap_alloc_cluster(si, swp_entries);
} else
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
--
2.18.1
From: Charan Teja Reddy <charante(a)codeaurora.org>
Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()
The following race is observed with the repeated online, offline and a
delay between two successive online of memory blocks of movable zone.
P1 P2
Online the first memory block in
the movable zone. The pcp struct
values are initialized to default
values,i.e., pcp->high = 0 &
pcp->batch = 1.
Allocate the pages from the
movable zone.
Try to Online the second memory
block in the movable zone thus it
entered the online_pages() but yet
to call zone_pcp_update().
This process is entered into
the exit path thus it tries
to release the order-0 pages
to pcp lists through
free_unref_page_commit().
As pcp->high = 0, pcp->count = 1
proceed to call the function
free_pcppages_bulk().
Update the pcp values thus the
new pcp values are like, say,
pcp->high = 378, pcp->batch = 63.
Read the pcp's batch value using
READ_ONCE() and pass the same to
free_pcppages_bulk(), pcp values
passed here are, batch = 63,
count = 1.
Since num of pages in the pcp
lists are less than ->batch,
then it will stuck in
while(list_empty(list)) loop
with interrupts disabled thus
a core hung.
Avoid this by ensuring free_pcppages_bulk() is called with proper count of
pcp list pages.
The mentioned race is some what easily reproducible without [1] because
pcp's are not updated for the first memory block online and thus there is
a enough race window for P2 between alloc+free and pcp struct values
update through onlining of second memory block.
With [1], the race still exists but it is very narrow as we update the pcp
struct values for the first memory block online itself.
This is not limited to the movable zone, it could also happen in cases
with the normal zone (e.g., hotplug to a node that only has DMA memory, or
no other memory yet).
[1]: https://patchwork.kernel.org/patch/11696389/
Link: http://lkml.kernel.org/r/1597150703-19003-1-git-send-email-charante@codeaur…
Fixes: 5f8dcc21211a ("page-allocator: split per-cpu list into one-list-per-migrate-type")
Signed-off-by: Charan Teja Reddy <charante(a)codeaurora.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Vinayak Menon <vinmenon(a)codeaurora.org>
Cc: <stable(a)vger.kernel.org> [2.6+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/mm/page_alloc.c~mm-page_alloc-fix-core-hung-in-free_pcppages_bulk
+++ a/mm/page_alloc.c
@@ -1302,6 +1302,11 @@ static void free_pcppages_bulk(struct zo
struct page *page, *tmp;
LIST_HEAD(head);
+ /*
+ * Ensure proper count is passed which otherwise would stuck in the
+ * below while (list_empty(list)) loop.
+ */
+ count = min(pcp->count, count);
while (count) {
struct list_head *list;
_
From: Doug Berger <opendmb(a)gmail.com>
Subject: mm: include CMA pages in lowmem_reserve at boot
The lowmem_reserve arrays provide a means of applying pressure against
allocations from lower zones that were targeted at higher zones. Its
values are a function of the number of pages managed by higher zones and
are assigned by a call to the setup_per_zone_lowmem_reserve() function.
The function is initially called at boot time by the function
init_per_zone_wmark_min() and may be called later by accesses of the
/proc/sys/vm/lowmem_reserve_ratio sysctl file.
The function init_per_zone_wmark_min() was moved up from a module_init to
a core_initcall to resolve a sequencing issue with khugepaged.
Unfortunately this created a sequencing issue with CMA page accounting.
The CMA pages are added to the managed page count of a zone when
cma_init_reserved_areas() is called at boot also as a core_initcall. This
makes it uncertain whether the CMA pages will be added to the managed page
counts of their zones before or after the call to
init_per_zone_wmark_min() as it becomes dependent on link order. With the
current link order the pages are added to the managed count after the
lowmem_reserve arrays are initialized at boot.
This means the lowmem_reserve values at boot may be lower than the values
used later if /proc/sys/vm/lowmem_reserve_ratio is accessed even if the
ratio values are unchanged.
In many cases the difference is not significant, but for example
an ARM platform with 1GB of memory and the following memory layout
[ 0.000000] cma: Reserved 256 MiB at 0x0000000030000000
[ 0.000000] Zone ranges:
[ 0.000000] DMA [mem 0x0000000000000000-0x000000002fffffff]
[ 0.000000] Normal empty
[ 0.000000] HighMem [mem 0x0000000030000000-0x000000003fffffff]
would result in 0 lowmem_reserve for the DMA zone. This would allow
userspace to deplete the DMA zone easily. Funnily enough
$ cat /proc/sys/vm/lowmem_reserve_ratio
would fix up the situation because it forces setup_per_zone_lowmem_reserve
as a side effect.
This commit breaks the link order dependency by invoking
init_per_zone_wmark_min() as a postcore_initcall so that the CMA pages
have the chance to be properly accounted in their zone(s) and allowing the
lowmem_reserve arrays to receive consistent values.
Link: http://lkml.kernel.org/r/1597423766-27849-1-git-send-email-opendmb@gmail.com
Fixes: bc22af74f271 ("mm: update min_free_kbytes from khugepaged after core initialization")
Signed-off-by: Doug Berger <opendmb(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Jason Baron <jbaron(a)akamai.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/page_alloc.c~mm-include-cma-pages-in-lowmem_reserve-at-boot
+++ a/mm/page_alloc.c
@@ -7888,7 +7888,7 @@ int __meminit init_per_zone_wmark_min(vo
return 0;
}
-core_initcall(init_per_zone_wmark_min)
+postcore_initcall(init_per_zone_wmark_min)
/*
* min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
_
From: Phillip Lougher <phillip(a)squashfs.org.uk>
Subject: squashfs: avoid bio_alloc() failure with 1Mbyte blocks
This is a regression introduced by the patch "migrate from ll_rw_block
usage to BIO".
Bio_alloc() is limited to 256 pages (1 Mbyte). This can cause a failure
when reading 1 Mbyte block filesystems. The problem is a datablock can be
fully (or almost uncompressed), requiring 256 pages, but, because blocks
are not aligned to page boundaries, it may require 257 pages to read.
Bio_kmalloc() can handle 1024 pages, and so use this for the edge
condition.
Link: http://lkml.kernel.org/r/20200815035637.15319-1-phillip@squashfs.org.uk
Fixes: 93e72b3c612a ("squashfs: migrate from ll_rw_block usage to BIO")
Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk>
Reported-by: Nicolas Prochazka <nicolas.prochazka(a)gmail.com>
Reported-by: Tomoatsu Shimada <shimada(a)walbrix.com>
Reviewed-by: Guenter Roeck <groeck(a)chromium.org>
Cc: Philippe Liard <pliard(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Adrien Schildknecht <adrien+dev(a)schischi.me>
Cc: Daniel Rosenberg <drosen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/squashfs/block.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/fs/squashfs/block.c~squashfs-avoid-bio_alloc-failure-with-1mbyte-blocks
+++ a/fs/squashfs/block.c
@@ -87,7 +87,11 @@ static int squashfs_bio_read(struct supe
int error, i;
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, page_count);
+ if (page_count <= BIO_MAX_PAGES)
+ bio = bio_alloc(GFP_NOIO, page_count);
+ else
+ bio = bio_kmalloc(GFP_NOIO, page_count);
+
if (!bio)
return -ENOMEM;
_
From: Hugh Dickins <hughd(a)google.com>
Subject: uprobes: __replace_page() avoid BUG in munlock_vma_page()
syzbot crashed on the VM_BUG_ON_PAGE(PageTail) in munlock_vma_page(), when
called from uprobes __replace_page(). Which of many ways to fix it?
Settled on not calling when PageCompound (since Head and Tail are equals
in this context, PageCompound the usual check in uprobes.c, and the prior
use of FOLL_SPLIT_PMD will have cleared PageMlocked already).
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008161338360.20413@eggly.anvils
Fixes: 5a52c9df62b4 ("uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Acked-by: Song Liu <songliubraving(a)fb.com>
Acked-by: Oleg Nesterov <oleg(a)redhat.com>
Reviewed-by: Srikar Dronamraju <srikar(a)linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/events/uprobes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/events/uprobes.c~uprobes-__replace_page-avoid-bug-in-munlock_vma_page
+++ a/kernel/events/uprobes.c
@@ -205,7 +205,7 @@ static int __replace_page(struct vm_area
try_to_free_swap(old_page);
page_vma_mapped_walk_done(&pvmw);
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page))
munlock_vma_page(old_page);
put_page(old_page);
_
From: Jann Horn <jannh(a)google.com>
Subject: romfs: fix uninitialized memory leak in romfs_dev_read()
romfs has a superblock field that limits the size of the filesystem; data
beyond that limit is never accessed.
romfs_dev_read() fetches a caller-supplied number of bytes from the
backing device. It returns 0 on success or an error code on failure;
therefore, its API can't represent short reads, it's all-or-nothing.
However, when romfs_dev_read() detects that the requested operation would
cross the filesystem size limit, it currently silently truncates the
requested number of bytes. This e.g. means that when the content of a
file with size 0x1000 starts one byte before the filesystem size limit,
->readpage() will only fill a single byte of the supplied page while
leaving the rest uninitialized, leaking that uninitialized memory to
userspace.
Fix it by returning an error code instead of truncating the read when the
requested read operation would go beyond the end of the filesystem.
Link: http://lkml.kernel.org/r/20200818013202.2246365-1-jannh@google.com
Fixes: da4458bda237 ("NOMMU: Make it possible for RomFS to use MTD devices directly")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/romfs/storage.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/fs/romfs/storage.c~romfs-fix-uninitialized-memory-leak-in-romfs_dev_read
+++ a/fs/romfs/storage.c
@@ -217,10 +217,8 @@ int romfs_dev_read(struct super_block *s
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
_
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in
__khugepaged_enter(): yes, when one thread is about to dump core, has set
core_state, and is waiting for others, another might do something calling
__khugepaged_enter(), which now crashes because I lumped the core_state
test (known as "mmget_still_valid") into khugepaged_test_exit(). I still
think it's best to lump them together, so just in this exceptional case,
check mm->mm_users directly instead of khugepaged_test_exit().
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils
Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Acked-by: Yang Shi <shy828301(a)gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/khugepaged.c~khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter
+++ a/mm/khugepaged.c
@@ -466,7 +466,7 @@ int __khugepaged_enter(struct mm_struct
return -ENOMEM;
/* __khugepaged_exit() must not run from under us */
- VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
+ VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
free_mm_slot(mm_slot);
return 0;
_
The patch titled
Subject: mm, THP, swap: fix allocating cluster for swapfile by mistake
has been added to the -mm tree. Its filename is
mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-thp-swap-fix-allocating-cluste…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-thp-swap-fix-allocating-cluste…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Gao Xiang <hsiangkao(a)redhat.com>
Subject: mm, THP, swap: fix allocating cluster for swapfile by mistake
SWP_FS is used to make swap_{read,write}page() go through the filesystem,
and it's only used for swap files over NFS. So, !SWP_FS means non NFS for
now, it could be either file backed or device backed. Something similar
goes with legacy SWP_FILE.
So in order to achieve the goal of the original patch, SWP_BLKDEV should
be used instead.
FS corruption can be observed with SSD device + XFS + fragmented swapfile
due to CONFIG_THP_SWAP=y.
I reproduced the issue with the following details:
Environment:
QEMU + upstream kernel + buildroot + NVMe (2 GB)
Kernel config:
CONFIG_BLK_DEV_NVME=y
CONFIG_THP_SWAP=y
Some reproducable steps:
mkfs.xfs -f /dev/nvme0n1
mkdir /tmp/mnt
mount /dev/nvme0n1 /tmp/mnt
bs="32k"
sz="1024m" # doesn't matter too much, I also tried 16m
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -F -S 0 -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fsync" /tmp/mnt/sw
mkswap /tmp/mnt/sw
swapon /tmp/mnt/sw
stress --vm 2 --vm-bytes 600M # doesn't matter too much as well
Symptoms:
- FS corruption (e.g. checksum failure)
- memory corruption at: 0xd2808010
- segfault
Link: https://lkml.kernel.org/r/20200820045323.7809-1-hsiangkao@redhat.com
Fixes: f0eea189e8e9 ("mm, THP, swap: Don't allocate huge cluster for file backed swap device")
Fixes: 38d8b4e6bdc8 ("mm, THP, swap: delay splitting THP during swap out")
Signed-off-by: Gao Xiang <hsiangkao(a)redhat.com>
Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Acked-by: Rafael Aquini <aquini(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Carlos Maiolino <cmaiolino(a)redhat.com>
Cc: Eric Sandeen <esandeen(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/swapfile.c~mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake
+++ a/mm/swapfile.c
@@ -1078,7 +1078,7 @@ start_over:
goto nextsi;
}
if (size == SWAPFILE_CLUSTER) {
- if (!(si->flags & SWP_FS))
+ if (si->flags & SWP_BLKDEV)
n_ret = swap_alloc_cluster(si, swp_entries);
} else
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
_
Patches currently in -mm which might be from hsiangkao(a)redhat.com are
mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx(a)redhat.com>
Date: Thu, 6 Aug 2020 23:26:11 -0700
Subject: [PATCH] mm/hugetlb: fix calculation of
adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 27556d4d49fe..e52c878940bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx(a)redhat.com>
Date: Thu, 6 Aug 2020 23:26:11 -0700
Subject: [PATCH] mm/hugetlb: fix calculation of
adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 27556d4d49fe..e52c878940bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx(a)redhat.com>
Date: Thu, 6 Aug 2020 23:26:11 -0700
Subject: [PATCH] mm/hugetlb: fix calculation of
adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 27556d4d49fe..e52c878940bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx(a)redhat.com>
Date: Thu, 6 Aug 2020 23:26:11 -0700
Subject: [PATCH] mm/hugetlb: fix calculation of
adjust_range_if_pmd_sharing_possible
This is found by code observation only.
Firstly, the worst case scenario should assume the whole range was covered
by pmd sharing. The old algorithm might not work as expected for ranges
like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
expected range should be (0, 2g).
Since at it, remove the loop since it should not be required. With that,
the new code should be faster too when the invalidating range is huge.
Mike said:
: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
: adjust to (0, 1g+2m) which is incorrect.
:
: We should cc stable. The original reason for adjusting the range was to
: prevent data corruption (getting wrong page). Since the range is not
: always adjusted correctly, the potential for corruption still exists.
:
: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
: is only gong to be called in two cases:
:
: 1) for a single page
: 2) for range == entire vma
:
: In those cases, the current code should produce the correct results.
:
: To be safe, let's just cc stable.
Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 27556d4d49fe..e52c878940bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5314,25 +5314,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
- unsigned long check_addr;
+ unsigned long a_start, a_end;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
- unsigned long a_start = check_addr & PUD_MASK;
- unsigned long a_end = a_start + PUD_SIZE;
+ /* Extend the range to be PUD aligned for a worst case scenario */
+ a_start = ALIGN_DOWN(*start, PUD_SIZE);
+ a_end = ALIGN(*end, PUD_SIZE);
- /*
- * If sharing is possible, adjust start/end if necessary.
- */
- if (range_in_vma(vma, a_start, a_end)) {
- if (a_start < *start)
- *start = a_start;
- if (a_end > *end)
- *end = a_end;
- }
- }
+ /*
+ * Intersect the range with the vma range, since pmd sharing won't be
+ * across vma after all
+ */
+ *start = max(vma->vm_start, a_start);
+ *end = min(vma->vm_end, a_end);
}
/*
When offlining CPU's, fixup_irqs() migrates all interrupts away from the
outgoing CPU to an online CPU. Its always possible the device sent an
interrupt to the previous CPU destination. Pending interrupt bit in IRR in
lapic identifies such interrupts. apic_soft_disable() will not capture any
new interrupts in IRR. This causes interrupts from device to be lost during
cpu offline. The issue was found when explicitly setting MSI affinity to a
CPU and immediately offlining it. It was simple to recreate with a USB
ethernet device and doing I/O to it while the CPU is offlined. Lost
interrupts happen even when Interrupt Remapping is enabled.
Current code does apic_soft_disable() before migrating interrupts.
native_cpu_disable()
{
...
apic_soft_disable();
cpu_disable_common();
--> fixup_irqs(); // Too late to capture anything in IRR.
}
Just fliping the above call sequence seems to hit the IRR checks
and the lost interrupt is fixed for both legacy MSI and when
interrupt remapping is enabled.
Fixes: 60dcaad5736f ("x86/hotplug: Silence APIC and NMI when CPU is dead")
Link: https://lore.kernel.org/lkml/875zdarr4h.fsf@nanos.tec.linutronix.de/
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
To: linux-kernel(a)vger.kernel.org
To: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Sukumar Ghorai <sukumar.ghorai(a)intel.com>
Cc: Srikanth Nandamuri <srikanth.nandamuri(a)intel.com>
Cc: Evan Green <evgreen(a)chromium.org>
Cc: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/smpboot.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ffbd9a3d78d8..278cc9f92f2f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1603,13 +1603,20 @@ int native_cpu_disable(void)
if (ret)
return ret;
+ cpu_disable_common();
/*
* Disable the local APIC. Otherwise IPI broadcasts will reach
* it. It still responds normally to INIT, NMI, SMI, and SIPI
- * messages.
+ * messages. Its important to do apic_soft_disable() after
+ * fixup_irqs(), because fixup_irqs() called from cpu_disable_common()
+ * depends on IRR being set. After apic_soft_disable() CPU preserves
+ * currently set IRR/ISR but new interrupts will not set IRR.
+ * This causes interrupts sent to outgoing cpu before completion
+ * of irq migration to be lost. Check SDM Vol 3 "10.4.7.2 Local
+ * APIC State after It Has been Software Disabled" section for more
+ * details.
*/
apic_soft_disable();
- cpu_disable_common();
return 0;
}
--
2.13.6
As per PAPR we have to look for both EPOW sensor value and event modifier to
identify type of event and take appropriate action.
Sensor value = 3 (EPOW_SYSTEM_SHUTDOWN) schedule system to be shutdown after
OS defined delay (default 10 mins).
EPOW Event Modifier for sensor value = 3:
We have to initiate immediate shutdown for most of the event modifier except
value = 2 (system running on UPS).
Checking with firmware document its clear that we have to wait for predefined
time before initiating shutdown. If power is restored within time we should
cancel the shutdown process. I think commit 79872e35 accidently enabled
immediate poweroff for EPOW_SHUTDOWN_ON_UPS event.
We have user space tool (rtas_errd) on LPAR to monitor for EPOW_SHUTDOWN_ON_UPS.
Once it gets event it initiates shutdown after predefined time. Also starts
monitoring for any new EPOW events. If it receives "Power restored" event
before predefined time it will cancel the shutdown. Otherwise after
predefined time it will shutdown the system.
Fixes: 79872e35 (powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown)
Cc: stable(a)vger.kernel.org # v4.0+
Cc: Tyrel Datwyler <tyreld(a)linux.ibm.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Signed-off-by: Vasant Hegde <hegdevasant(a)linux.vnet.ibm.com>
---
Changes in v2:
- Updated patch description based on mpe, Tyrel comment.
-Vasant
arch/powerpc/platforms/pseries/ras.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f3736fcd98fc..13c86a292c6d 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier)
case EPOW_SHUTDOWN_ON_UPS:
pr_emerg("Loss of system power detected. System is running on"
" UPS/battery. Check RTAS error log for details\n");
- orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
--
2.26.2
From: WANG Cong <xiyou.wangcong(a)gmail.com>
[ Upstream commit 199ab00f3cdb6f154ea93fa76fd80192861a821d ]
Andrey reported a out-of-bound access in ip6_tnl_xmit(), this
is because we use an ipv4 dst in ip6_tnl_xmit() and cast an IPv4
neigh key as an IPv6 address:
neigh = dst_neigh_lookup(skb_dst(skb),
&ipv6_hdr(skb)->daddr);
if (!neigh)
goto tx_err_link_failure;
addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY)
addr6 = &ipv6_hdr(skb)->daddr;
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
Also the network header of the skb at this point should be still IPv4
for 4in6 tunnels, we shold not just use it as IPv6 header.
This patch fixes it by checking if skb->protocol is ETH_P_IPV6: if it
is, we are safe to do the nexthop lookup using skb_dst() and
ipv6_hdr(skb)->daddr; if not (aka IPv4), we have no clue about which
dest address we can pick here, we have to rely on callers to fill it
from tunnel config, so just fall to ip6_route_output() to make the
decision.
Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.")
Reported-by: Andrey Konovalov <andreyknvl(a)google.com>
Reported-by: syzbot+01400f5fc51cf4747bec(a)syzkaller.appspotmail.com
Tested-by: Andrey Konovalov <andreyknvl(a)google.com>
Cc: Steffen Klassert <steffen.klassert(a)secunet.com>
Signed-off-by: Cong Wang <xiyou.wangcong(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Alessio Balsini <balsini(a)android.com>
---
net/ipv6/ip6_tunnel.c | 32 +++++++++++++++++---------------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f072a4c4575c..96563990d654 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -972,26 +972,28 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
/* NBMA tunnel */
if (ipv6_addr_any(&t->parms.raddr)) {
- struct in6_addr *addr6;
- struct neighbour *neigh;
- int addr_type;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct in6_addr *addr6;
+ struct neighbour *neigh;
+ int addr_type;
- if (!skb_dst(skb))
- goto tx_err_link_failure;
+ if (!skb_dst(skb))
+ goto tx_err_link_failure;
- neigh = dst_neigh_lookup(skb_dst(skb),
- &ipv6_hdr(skb)->daddr);
- if (!neigh)
- goto tx_err_link_failure;
+ neigh = dst_neigh_lookup(skb_dst(skb),
+ &ipv6_hdr(skb)->daddr);
+ if (!neigh)
+ goto tx_err_link_failure;
- addr6 = (struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
+ addr6 = (struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
- if (addr_type == IPV6_ADDR_ANY)
- addr6 = &ipv6_hdr(skb)->daddr;
+ if (addr_type == IPV6_ADDR_ANY)
+ addr6 = &ipv6_hdr(skb)->daddr;
- memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
- neigh_release(neigh);
+ memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+ neigh_release(neigh);
+ }
} else if (!fl6->flowi6_mark)
dst = dst_cache_get(&t->dst_cache);
--
2.28.0.297.g1956fa8f8d-goog
Let's not try and use PAT attributes for I915_MAP_WC is the CPU doesn't
support PAT.
Fixes: 6056e50033d9 ("drm/i915/gem: Support discontiguous lmem object maps")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: <stable(a)vger.kernel.org> # v5.6+
---
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 7050519c87a4..5f1725268988 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -254,6 +254,9 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
return NULL;
+ if (type == I915_MAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+ return NULL;
+
/* A single page can always be kmapped */
if (n_pte == 1 && type == I915_MAP_WB)
return kmap(sg_page(sgt->sgl));
--
2.20.1
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a34a0a632dd991a371fec56431d73279f9c54029 Mon Sep 17 00:00:00 2001
From: Xin Xiong <xiongx18(a)fudan.edu.cn>
Date: Sun, 19 Jul 2020 23:45:45 +0800
Subject: [PATCH] drm: fix drm_dp_mst_port refcount leaks in
drm_dp_mst_allocate_vcpi
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
drm_dp_mst_allocate_vcpi() invokes
drm_dp_mst_topology_get_port_validated(), which increases the refcount
of the "port".
These reference counting issues take place in two exception handling
paths separately. Either when “slots” is less than 0 or when
drm_dp_init_vcpi() returns a negative value, the function forgets to
reduce the refcnt increased drm_dp_mst_topology_get_port_validated(),
which results in a refcount leak.
Fix these issues by pulling up the error handling when "slots" is less
than 0, and calling drm_dp_mst_topology_put_port() before termination
when drm_dp_init_vcpi() returns a negative value.
Fixes: 1e797f556c61 ("drm/dp: Split drm_dp_mst_allocate_vcpi")
Cc: <stable(a)vger.kernel.org> # v4.12+
Signed-off-by: Xiyu Yang <xiyuyang19(a)fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf(a)gmail.com>
Signed-off-by: Xin Xiong <xiongx18(a)fudan.edu.cn>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200719154545.GA41231@xin-vi…
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 09b32289497e..b23cb2fec3f3 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -4308,11 +4308,11 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr,
{
int ret;
- port = drm_dp_mst_topology_get_port_validated(mgr, port);
- if (!port)
+ if (slots < 0)
return false;
- if (slots < 0)
+ port = drm_dp_mst_topology_get_port_validated(mgr, port);
+ if (!port)
return false;
if (port->vcpi.vcpi > 0) {
@@ -4328,6 +4328,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr,
if (ret) {
DRM_DEBUG_KMS("failed to init vcpi slots=%d max=63 ret=%d\n",
DIV_ROUND_UP(pbn, mgr->pbn_div), ret);
+ drm_dp_mst_topology_put_port(port);
goto out;
}
DRM_DEBUG_KMS("initing vcpi for pbn=%d slots=%d\n",
The ordering of psp_tmr_terminate() and psp_asd_unload()
got reversed when the patches were applied to stable.
This patch does not exist in Linus' tree because the ordering
is correct there. It got reversed when the patches were applied
to stable. This patch is for stable only.
Fixes: 22ff658396b446 ("drm/amdgpu: asd function needs to be unloaded in suspend phase")
Fixes: 2c41c968c6f648 ("drm/amdgpu: add TMR destory function for psp")
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 5.7.x
Cc: Huang Rui <ray.huang(a)amd.com>
---
Make the description more explicit as to why this patch is only for stable.
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 3c6f60c5b1a5..088f43ebdceb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1679,15 +1679,15 @@ static int psp_suspend(void *handle)
}
}
- ret = psp_tmr_terminate(psp);
+ ret = psp_asd_unload(psp);
if (ret) {
- DRM_ERROR("Falied to terminate tmr\n");
+ DRM_ERROR("Failed to unload asd\n");
return ret;
}
- ret = psp_asd_unload(psp);
+ ret = psp_tmr_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to unload asd\n");
+ DRM_ERROR("Falied to terminate tmr\n");
return ret;
}
--
2.25.4
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ec11fe3705a7d434ec3493bdaddb9b6367e7366a Mon Sep 17 00:00:00 2001
From: hersen wu <hersenxs.wu(a)amd.com>
Date: Mon, 22 Jun 2020 13:29:16 -0400
Subject: [PATCH] drm/amd/display: OLED panel backlight adjust not work with
external display connected
[Why]
amdgpu_dm->backlight_caps is for single eDP only. the caps are upddated
for very connector. Real eDP caps will be overwritten by other external
display. For OLED panel, caps->aux_support is set to 1 for OLED pnael.
after external connected, caps+.aux_support is set to 0. This causes
OLED backlight adjustment not work.
[How]
within update_conector_ext_caps, backlight caps will be updated only for
eDP connector.
Cc: stable(a)vger.kernel.org
Signed-off-by: hersen wu <hersenxs.wu(a)amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas(a)amd.com>
Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6f937e25a62c..9c283531e1a2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2028,6 +2028,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
struct amdgpu_device *adev;
+ struct dc_link *link = NULL;
static const u8 pre_computed_values[] = {
50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 65, 66, 68, 69,
71, 72, 74, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98};
@@ -2035,6 +2036,10 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
if (!aconnector || !aconnector->dc_link)
return;
+ link = aconnector->dc_link;
+ if (link->connector_signal != SIGNAL_TYPE_EDP)
+ return;
+
conn_base = &aconnector->base;
adev = conn_base->dev->dev_private;
dm = &adev->dm;
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8425400759749e23aaa831f2482b96211201af33 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov(a)linux.com>
Date: Fri, 5 Jun 2020 20:37:43 +0300
Subject: [PATCH] drm/amd/display: Use kvfree() to free coeff in
build_regamma()
Use kvfree() instead of kfree() to free coeff in build_regamma()
because the memory is allocated with kvzalloc().
Fixes: e752058b8671 ("drm/amd/display: Optimize gamma calculations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Denis Efremov <efremov(a)linux.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 9431b48aecb4..56bb1f9f77ce 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
pow_buffer_ptr = -1; // reset back to no optimize
ret = true;
release:
- kfree(coeff);
+ kvfree(coeff);
return ret;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8425400759749e23aaa831f2482b96211201af33 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov(a)linux.com>
Date: Fri, 5 Jun 2020 20:37:43 +0300
Subject: [PATCH] drm/amd/display: Use kvfree() to free coeff in
build_regamma()
Use kvfree() instead of kfree() to free coeff in build_regamma()
because the memory is allocated with kvzalloc().
Fixes: e752058b8671 ("drm/amd/display: Optimize gamma calculations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Denis Efremov <efremov(a)linux.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 9431b48aecb4..56bb1f9f77ce 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
pow_buffer_ptr = -1; // reset back to no optimize
ret = true;
release:
- kfree(coeff);
+ kvfree(coeff);
return ret;
}
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8425400759749e23aaa831f2482b96211201af33 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov(a)linux.com>
Date: Fri, 5 Jun 2020 20:37:43 +0300
Subject: [PATCH] drm/amd/display: Use kvfree() to free coeff in
build_regamma()
Use kvfree() instead of kfree() to free coeff in build_regamma()
because the memory is allocated with kvzalloc().
Fixes: e752058b8671 ("drm/amd/display: Optimize gamma calculations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Denis Efremov <efremov(a)linux.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 9431b48aecb4..56bb1f9f77ce 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
pow_buffer_ptr = -1; // reset back to no optimize
ret = true;
release:
- kfree(coeff);
+ kvfree(coeff);
return ret;
}
The patch below was submitted to be applied to the 5.8-stable tree.
I fail to see how this patch meets the stable kernel rules as found at
Documentation/process/stable-kernel-rules.rst.
I could be totally wrong, and if so, please respond to
<stable(a)vger.kernel.org> and let me know why this patch should be
applied. Otherwise, it is now dropped from my patch queues, never to be
seen again.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1d8d42ba365101fa68d210c0e2ed2bc9582fda6c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann(a)suse.de>
Date: Fri, 5 Jun 2020 15:57:50 +0200
Subject: [PATCH] drm/mgag200: Remove declaration of mgag200_mmap() from header
file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Commit 94668ac796a5 ("drm/mgag200: Convert mgag200 driver to VRAM MM")
removed the implementation of mgag200_mmap(). Also remove the declaration.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Acked-by: Sam Ravnborg <sam(a)ravnborg.org>
Fixes: 94668ac796a5 ("drm/mgag200: Convert mgag200 driver to VRAM MM")
Cc: Gerd Hoffmann <kraxel(a)redhat.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Krzysztof Kozlowski <krzk(a)kernel.org>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Sam Ravnborg <sam(a)ravnborg.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: "Noralf Trønnes" <noralf(a)tronnes.org>
Cc: Armijn Hemel <armijn(a)tjaldur.nl>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Emil Velikov <emil.velikov(a)collabora.com>
Cc: <stable(a)vger.kernel.org> # v5.3+
Link: https://patchwork.freedesktop.org/patch/msgid/20200605135803.19811-2-tzimme…
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 47df62b1ad29..92b6679029fe 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -198,6 +198,5 @@ void mgag200_i2c_destroy(struct mga_i2c_chan *i2c);
int mgag200_mm_init(struct mga_device *mdev);
void mgag200_mm_fini(struct mga_device *mdev);
-int mgag200_mmap(struct file *filp, struct vm_area_struct *vma);
#endif /* __MGAG200_DRV_H__ */
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 119c53d2d4044c59c450c4f5a568d80b9d861856 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 8 Jul 2020 16:49:11 +0100
Subject: [PATCH] drm/vgem: Replace opencoded version of
drm_gem_dumb_map_offset()
drm_gem_dumb_map_offset() now exists and does everything
vgem_gem_dump_map does and *ought* to do.
In particular, vgem_gem_dumb_map() was trying to reject mmapping an
imported dmabuf by checking the existence of obj->filp. Unfortunately,
we always allocated an obj->filp, even if unused for an imported dmabuf.
Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192
("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the
obj->import_attach to reject such invalid mmaps.
This prevents vgem from allowing userspace mmapping the dumb handle and
attempting to incorrectly fault in remote pages belonging to another
device, where there may not even be a struct page.
v2: Use the default drm_gem_dumb_map_offset() callback
Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.13+
Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@…
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index e4dc7b267a0b..a775feda1cc7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -230,32 +230,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
return 0;
}
-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
- uint32_t handle, uint64_t *offset)
-{
- struct drm_gem_object *obj;
- int ret;
-
- obj = drm_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
-
- if (!obj->filp) {
- ret = -EINVAL;
- goto unref;
- }
-
- ret = drm_gem_create_mmap_offset(obj);
- if (ret)
- goto unref;
-
- *offset = drm_vma_node_offset_addr(&obj->vma_node);
-unref:
- drm_gem_object_put(obj);
-
- return ret;
-}
-
static struct drm_ioctl_desc vgem_ioctls[] = {
DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_RENDER_ALLOW),
@@ -446,7 +420,6 @@ static struct drm_driver vgem_driver = {
.fops = &vgem_driver_fops,
.dumb_create = vgem_gem_dumb_create,
- .dumb_map_offset = vgem_gem_dumb_map,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 119c53d2d4044c59c450c4f5a568d80b9d861856 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 8 Jul 2020 16:49:11 +0100
Subject: [PATCH] drm/vgem: Replace opencoded version of
drm_gem_dumb_map_offset()
drm_gem_dumb_map_offset() now exists and does everything
vgem_gem_dump_map does and *ought* to do.
In particular, vgem_gem_dumb_map() was trying to reject mmapping an
imported dmabuf by checking the existence of obj->filp. Unfortunately,
we always allocated an obj->filp, even if unused for an imported dmabuf.
Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192
("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the
obj->import_attach to reject such invalid mmaps.
This prevents vgem from allowing userspace mmapping the dumb handle and
attempting to incorrectly fault in remote pages belonging to another
device, where there may not even be a struct page.
v2: Use the default drm_gem_dumb_map_offset() callback
Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.13+
Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@…
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index e4dc7b267a0b..a775feda1cc7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -230,32 +230,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
return 0;
}
-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
- uint32_t handle, uint64_t *offset)
-{
- struct drm_gem_object *obj;
- int ret;
-
- obj = drm_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
-
- if (!obj->filp) {
- ret = -EINVAL;
- goto unref;
- }
-
- ret = drm_gem_create_mmap_offset(obj);
- if (ret)
- goto unref;
-
- *offset = drm_vma_node_offset_addr(&obj->vma_node);
-unref:
- drm_gem_object_put(obj);
-
- return ret;
-}
-
static struct drm_ioctl_desc vgem_ioctls[] = {
DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_RENDER_ALLOW),
@@ -446,7 +420,6 @@ static struct drm_driver vgem_driver = {
.fops = &vgem_driver_fops,
.dumb_create = vgem_gem_dumb_create,
- .dumb_map_offset = vgem_gem_dumb_map,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 119c53d2d4044c59c450c4f5a568d80b9d861856 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 8 Jul 2020 16:49:11 +0100
Subject: [PATCH] drm/vgem: Replace opencoded version of
drm_gem_dumb_map_offset()
drm_gem_dumb_map_offset() now exists and does everything
vgem_gem_dump_map does and *ought* to do.
In particular, vgem_gem_dumb_map() was trying to reject mmapping an
imported dmabuf by checking the existence of obj->filp. Unfortunately,
we always allocated an obj->filp, even if unused for an imported dmabuf.
Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192
("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the
obj->import_attach to reject such invalid mmaps.
This prevents vgem from allowing userspace mmapping the dumb handle and
attempting to incorrectly fault in remote pages belonging to another
device, where there may not even be a struct page.
v2: Use the default drm_gem_dumb_map_offset() callback
Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.13+
Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@…
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index e4dc7b267a0b..a775feda1cc7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -230,32 +230,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
return 0;
}
-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
- uint32_t handle, uint64_t *offset)
-{
- struct drm_gem_object *obj;
- int ret;
-
- obj = drm_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
-
- if (!obj->filp) {
- ret = -EINVAL;
- goto unref;
- }
-
- ret = drm_gem_create_mmap_offset(obj);
- if (ret)
- goto unref;
-
- *offset = drm_vma_node_offset_addr(&obj->vma_node);
-unref:
- drm_gem_object_put(obj);
-
- return ret;
-}
-
static struct drm_ioctl_desc vgem_ioctls[] = {
DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_RENDER_ALLOW),
@@ -446,7 +420,6 @@ static struct drm_driver vgem_driver = {
.fops = &vgem_driver_fops,
.dumb_create = vgem_gem_dumb_create,
- .dumb_map_offset = vgem_gem_dumb_map,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 119c53d2d4044c59c450c4f5a568d80b9d861856 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 8 Jul 2020 16:49:11 +0100
Subject: [PATCH] drm/vgem: Replace opencoded version of
drm_gem_dumb_map_offset()
drm_gem_dumb_map_offset() now exists and does everything
vgem_gem_dump_map does and *ought* to do.
In particular, vgem_gem_dumb_map() was trying to reject mmapping an
imported dmabuf by checking the existence of obj->filp. Unfortunately,
we always allocated an obj->filp, even if unused for an imported dmabuf.
Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192
("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the
obj->import_attach to reject such invalid mmaps.
This prevents vgem from allowing userspace mmapping the dumb handle and
attempting to incorrectly fault in remote pages belonging to another
device, where there may not even be a struct page.
v2: Use the default drm_gem_dumb_map_offset() callback
Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.13+
Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@…
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index e4dc7b267a0b..a775feda1cc7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -230,32 +230,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
return 0;
}
-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
- uint32_t handle, uint64_t *offset)
-{
- struct drm_gem_object *obj;
- int ret;
-
- obj = drm_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
-
- if (!obj->filp) {
- ret = -EINVAL;
- goto unref;
- }
-
- ret = drm_gem_create_mmap_offset(obj);
- if (ret)
- goto unref;
-
- *offset = drm_vma_node_offset_addr(&obj->vma_node);
-unref:
- drm_gem_object_put(obj);
-
- return ret;
-}
-
static struct drm_ioctl_desc vgem_ioctls[] = {
DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_RENDER_ALLOW),
@@ -446,7 +420,6 @@ static struct drm_driver vgem_driver = {
.fops = &vgem_driver_fops,
.dumb_create = vgem_gem_dumb_create,
- .dumb_map_offset = vgem_gem_dumb_map,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 119c53d2d4044c59c450c4f5a568d80b9d861856 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 8 Jul 2020 16:49:11 +0100
Subject: [PATCH] drm/vgem: Replace opencoded version of
drm_gem_dumb_map_offset()
drm_gem_dumb_map_offset() now exists and does everything
vgem_gem_dump_map does and *ought* to do.
In particular, vgem_gem_dumb_map() was trying to reject mmapping an
imported dmabuf by checking the existence of obj->filp. Unfortunately,
we always allocated an obj->filp, even if unused for an imported dmabuf.
Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192
("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the
obj->import_attach to reject such invalid mmaps.
This prevents vgem from allowing userspace mmapping the dumb handle and
attempting to incorrectly fault in remote pages belonging to another
device, where there may not even be a struct page.
v2: Use the default drm_gem_dumb_map_offset() callback
Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.13+
Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@…
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index e4dc7b267a0b..a775feda1cc7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -230,32 +230,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
return 0;
}
-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
- uint32_t handle, uint64_t *offset)
-{
- struct drm_gem_object *obj;
- int ret;
-
- obj = drm_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
-
- if (!obj->filp) {
- ret = -EINVAL;
- goto unref;
- }
-
- ret = drm_gem_create_mmap_offset(obj);
- if (ret)
- goto unref;
-
- *offset = drm_vma_node_offset_addr(&obj->vma_node);
-unref:
- drm_gem_object_put(obj);
-
- return ret;
-}
-
static struct drm_ioctl_desc vgem_ioctls[] = {
DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_RENDER_ALLOW),
@@ -446,7 +420,6 @@ static struct drm_driver vgem_driver = {
.fops = &vgem_driver_fops,
.dumb_create = vgem_gem_dumb_create,
- .dumb_map_offset = vgem_gem_dumb_map,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 511b6d9aed417739b6aa49d0b6b4354ad21020f1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 26 May 2020 10:07:53 +0100
Subject: [PATCH] drm/i915/gt: Do not schedule normal requests immediately
along virtual
When we push a virtual request onto the HW, we update the rq->engine to
point to the physical engine. A request that is then submitted by the
user that waits upon the virtual engine, but along the physical engine
in use, will then see that it is due to be submitted to the same engine
and take a shortcut (and be queued without waiting for the completion
fence). However, the virtual request may be preempted (either by higher
priority users, or by timeslicing) and removed from the physical engine
to be migrated over to one of its siblings. The dependent normal request
however is oblivious to the removal of the virtual request and remains
queued to execute on HW, believing that once it reaches the head of its
queue all of its predecessors will have completed executing!
v2: Beware restriction of signal->execution_mask prior to submission.
Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Testcase: igt/gem_exec_balancer/sliced
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.3+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-2-chris@…
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e64d82f7c830..0d810a62ff46 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1242,6 +1242,25 @@ i915_request_await_execution(struct i915_request *rq,
return 0;
}
+static int
+await_request_submit(struct i915_request *to, struct i915_request *from)
+{
+ /*
+ * If we are waiting on a virtual engine, then it may be
+ * constrained to execute on a single engine *prior* to submission.
+ * When it is submitted, it will be first submitted to the virtual
+ * engine and then passed to the physical engine. We cannot allow
+ * the waiter to be submitted immediately to the physical engine
+ * as it may then bypass the virtual request.
+ */
+ if (to->engine == READ_ONCE(from->engine))
+ return i915_sw_fence_await_sw_fence_gfp(&to->submit,
+ &from->submit,
+ I915_FENCE_GFP);
+ else
+ return __i915_request_await_execution(to, from, NULL);
+}
+
static int
i915_request_await_request(struct i915_request *to, struct i915_request *from)
{
@@ -1263,10 +1282,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
return ret;
}
- if (to->engine == READ_ONCE(from->engine))
- ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
- &from->submit,
- I915_FENCE_GFP);
+ if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
+ ret = await_request_submit(to, from);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 511b6d9aed417739b6aa49d0b6b4354ad21020f1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 26 May 2020 10:07:53 +0100
Subject: [PATCH] drm/i915/gt: Do not schedule normal requests immediately
along virtual
When we push a virtual request onto the HW, we update the rq->engine to
point to the physical engine. A request that is then submitted by the
user that waits upon the virtual engine, but along the physical engine
in use, will then see that it is due to be submitted to the same engine
and take a shortcut (and be queued without waiting for the completion
fence). However, the virtual request may be preempted (either by higher
priority users, or by timeslicing) and removed from the physical engine
to be migrated over to one of its siblings. The dependent normal request
however is oblivious to the removal of the virtual request and remains
queued to execute on HW, believing that once it reaches the head of its
queue all of its predecessors will have completed executing!
v2: Beware restriction of signal->execution_mask prior to submission.
Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Testcase: igt/gem_exec_balancer/sliced
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.3+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-2-chris@…
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e64d82f7c830..0d810a62ff46 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1242,6 +1242,25 @@ i915_request_await_execution(struct i915_request *rq,
return 0;
}
+static int
+await_request_submit(struct i915_request *to, struct i915_request *from)
+{
+ /*
+ * If we are waiting on a virtual engine, then it may be
+ * constrained to execute on a single engine *prior* to submission.
+ * When it is submitted, it will be first submitted to the virtual
+ * engine and then passed to the physical engine. We cannot allow
+ * the waiter to be submitted immediately to the physical engine
+ * as it may then bypass the virtual request.
+ */
+ if (to->engine == READ_ONCE(from->engine))
+ return i915_sw_fence_await_sw_fence_gfp(&to->submit,
+ &from->submit,
+ I915_FENCE_GFP);
+ else
+ return __i915_request_await_execution(to, from, NULL);
+}
+
static int
i915_request_await_request(struct i915_request *to, struct i915_request *from)
{
@@ -1263,10 +1282,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
return ret;
}
- if (to->engine == READ_ONCE(from->engine))
- ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
- &from->submit,
- I915_FENCE_GFP);
+ if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
+ ret = await_request_submit(to, from);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 511b6d9aed417739b6aa49d0b6b4354ad21020f1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 26 May 2020 10:07:53 +0100
Subject: [PATCH] drm/i915/gt: Do not schedule normal requests immediately
along virtual
When we push a virtual request onto the HW, we update the rq->engine to
point to the physical engine. A request that is then submitted by the
user that waits upon the virtual engine, but along the physical engine
in use, will then see that it is due to be submitted to the same engine
and take a shortcut (and be queued without waiting for the completion
fence). However, the virtual request may be preempted (either by higher
priority users, or by timeslicing) and removed from the physical engine
to be migrated over to one of its siblings. The dependent normal request
however is oblivious to the removal of the virtual request and remains
queued to execute on HW, believing that once it reaches the head of its
queue all of its predecessors will have completed executing!
v2: Beware restriction of signal->execution_mask prior to submission.
Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Testcase: igt/gem_exec_balancer/sliced
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.3+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-2-chris@…
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e64d82f7c830..0d810a62ff46 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1242,6 +1242,25 @@ i915_request_await_execution(struct i915_request *rq,
return 0;
}
+static int
+await_request_submit(struct i915_request *to, struct i915_request *from)
+{
+ /*
+ * If we are waiting on a virtual engine, then it may be
+ * constrained to execute on a single engine *prior* to submission.
+ * When it is submitted, it will be first submitted to the virtual
+ * engine and then passed to the physical engine. We cannot allow
+ * the waiter to be submitted immediately to the physical engine
+ * as it may then bypass the virtual request.
+ */
+ if (to->engine == READ_ONCE(from->engine))
+ return i915_sw_fence_await_sw_fence_gfp(&to->submit,
+ &from->submit,
+ I915_FENCE_GFP);
+ else
+ return __i915_request_await_execution(to, from, NULL);
+}
+
static int
i915_request_await_request(struct i915_request *to, struct i915_request *from)
{
@@ -1263,10 +1282,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
return ret;
}
- if (to->engine == READ_ONCE(from->engine))
- ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
- &from->submit,
- I915_FENCE_GFP);
+ if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
+ ret = await_request_submit(to, from);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1d9221e9d395c8c80d99d002bea3c9b6dd192d6a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 13 Jul 2020 15:16:36 +0100
Subject: [PATCH] drm/i915: Skip signaling a signaled request
Preempt-to-busy introduces various fascinating complications in that the
requests may complete as we are unsubmitting them from HW. As they may
then signal after unsubmission, we may find ourselves having to cleanup
the signaling request from within the signaling callback. This causes us
to recurse onto the same i915_request.lock.
However, if the request is already signaled (as it will be before we
enter the signal callbacks), we know we can skip the signaling of that
request during submission, neatly evading the spinlock recursion.
unsubmit(ve.rq0) # timeslice expiration or other preemption
-> virtual_submit_request(ve.rq0)
dma_fence_signal(ve.rq0) # request completed before preemption ack
-> submit_notify(ve.rq1)
-> virtual_submit_request(ve.rq1) # sees that we have completed ve.rq0
-> __i915_request_submit(ve.rq0)
[ 264.210142] BUG: spinlock recursion on CPU#2, sample_multi_tr/2093
[ 264.210150] lock: 0xffff9efd6ac55080, .magic: dead4ead, .owner: sample_multi_tr/2093, .owner_cpu: 2
[ 264.210155] CPU: 2 PID: 2093 Comm: sample_multi_tr Tainted: G U
[ 264.210158] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWR1.R00.X212.B01.1909060036 09/06/2019
[ 264.210160] Call Trace:
[ 264.210167] dump_stack+0x98/0xda
[ 264.210174] spin_dump.cold+0x24/0x3c
[ 264.210178] do_raw_spin_lock+0x9a/0xd0
[ 264.210184] _raw_spin_lock_nested+0x6a/0x70
[ 264.210314] __i915_request_submit+0x10a/0x3c0 [i915]
[ 264.210415] virtual_submit_request+0x9b/0x380 [i915]
[ 264.210516] submit_notify+0xaf/0x14c [i915]
[ 264.210602] __i915_sw_fence_complete+0x8a/0x230 [i915]
[ 264.210692] i915_sw_fence_complete+0x2d/0x40 [i915]
[ 264.210762] __dma_i915_sw_fence_wake+0x19/0x30 [i915]
[ 264.210767] dma_fence_signal_locked+0xb1/0x1c0
[ 264.210772] dma_fence_signal+0x29/0x50
[ 264.210871] i915_request_wait+0x5cb/0x830 [i915]
[ 264.210876] ? dma_resv_get_fences_rcu+0x294/0x5d0
[ 264.210974] i915_gem_object_wait_fence+0x2f/0x40 [i915]
[ 264.211084] i915_gem_object_wait+0xce/0x400 [i915]
[ 264.211178] i915_gem_wait_ioctl+0xff/0x290 [i915]
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
References: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: "Nayana, Venkata Ramana" <venkata.ramana.nayana(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.4+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200713141636.29326-1-chris@…
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d907d538176e..91786310c114 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -314,13 +314,18 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
+
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ goto unlock;
if (!__intel_breadcrumbs_arm_irq(b))
goto unlock;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3bb7320249ae..0b2fe55e6194 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -560,22 +560,25 @@ bool __i915_request_submit(struct i915_request *request)
engine->serial++;
result = true;
-xfer: /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-
+xfer:
if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
list_move_tail(&request->sched.link, &engine->active.requests);
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
- __notify_execute_cb(request);
}
- GEM_BUG_ON(!llist_empty(&request->execute_cb));
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
- !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
- !i915_request_enable_breadcrumb(request))
- intel_engine_signal_breadcrumbs(engine);
+ /* We may be recursing from the signal callback of another i915 fence */
+ if (!i915_request_signaled(request)) {
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+ __notify_execute_cb(request);
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &request->fence.flags) &&
+ !i915_request_enable_breadcrumb(request))
+ intel_engine_signal_breadcrumbs(engine);
- spin_unlock(&request->lock);
+ spin_unlock(&request->lock);
+ GEM_BUG_ON(!llist_empty(&request->execute_cb));
+ }
return result;
}
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1d9221e9d395c8c80d99d002bea3c9b6dd192d6a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 13 Jul 2020 15:16:36 +0100
Subject: [PATCH] drm/i915: Skip signaling a signaled request
Preempt-to-busy introduces various fascinating complications in that the
requests may complete as we are unsubmitting them from HW. As they may
then signal after unsubmission, we may find ourselves having to cleanup
the signaling request from within the signaling callback. This causes us
to recurse onto the same i915_request.lock.
However, if the request is already signaled (as it will be before we
enter the signal callbacks), we know we can skip the signaling of that
request during submission, neatly evading the spinlock recursion.
unsubmit(ve.rq0) # timeslice expiration or other preemption
-> virtual_submit_request(ve.rq0)
dma_fence_signal(ve.rq0) # request completed before preemption ack
-> submit_notify(ve.rq1)
-> virtual_submit_request(ve.rq1) # sees that we have completed ve.rq0
-> __i915_request_submit(ve.rq0)
[ 264.210142] BUG: spinlock recursion on CPU#2, sample_multi_tr/2093
[ 264.210150] lock: 0xffff9efd6ac55080, .magic: dead4ead, .owner: sample_multi_tr/2093, .owner_cpu: 2
[ 264.210155] CPU: 2 PID: 2093 Comm: sample_multi_tr Tainted: G U
[ 264.210158] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWR1.R00.X212.B01.1909060036 09/06/2019
[ 264.210160] Call Trace:
[ 264.210167] dump_stack+0x98/0xda
[ 264.210174] spin_dump.cold+0x24/0x3c
[ 264.210178] do_raw_spin_lock+0x9a/0xd0
[ 264.210184] _raw_spin_lock_nested+0x6a/0x70
[ 264.210314] __i915_request_submit+0x10a/0x3c0 [i915]
[ 264.210415] virtual_submit_request+0x9b/0x380 [i915]
[ 264.210516] submit_notify+0xaf/0x14c [i915]
[ 264.210602] __i915_sw_fence_complete+0x8a/0x230 [i915]
[ 264.210692] i915_sw_fence_complete+0x2d/0x40 [i915]
[ 264.210762] __dma_i915_sw_fence_wake+0x19/0x30 [i915]
[ 264.210767] dma_fence_signal_locked+0xb1/0x1c0
[ 264.210772] dma_fence_signal+0x29/0x50
[ 264.210871] i915_request_wait+0x5cb/0x830 [i915]
[ 264.210876] ? dma_resv_get_fences_rcu+0x294/0x5d0
[ 264.210974] i915_gem_object_wait_fence+0x2f/0x40 [i915]
[ 264.211084] i915_gem_object_wait+0xce/0x400 [i915]
[ 264.211178] i915_gem_wait_ioctl+0xff/0x290 [i915]
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
References: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: "Nayana, Venkata Ramana" <venkata.ramana.nayana(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.4+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200713141636.29326-1-chris@…
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d907d538176e..91786310c114 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -314,13 +314,18 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
+
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ goto unlock;
if (!__intel_breadcrumbs_arm_irq(b))
goto unlock;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3bb7320249ae..0b2fe55e6194 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -560,22 +560,25 @@ bool __i915_request_submit(struct i915_request *request)
engine->serial++;
result = true;
-xfer: /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-
+xfer:
if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
list_move_tail(&request->sched.link, &engine->active.requests);
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
- __notify_execute_cb(request);
}
- GEM_BUG_ON(!llist_empty(&request->execute_cb));
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
- !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
- !i915_request_enable_breadcrumb(request))
- intel_engine_signal_breadcrumbs(engine);
+ /* We may be recursing from the signal callback of another i915 fence */
+ if (!i915_request_signaled(request)) {
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+ __notify_execute_cb(request);
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &request->fence.flags) &&
+ !i915_request_enable_breadcrumb(request))
+ intel_engine_signal_breadcrumbs(engine);
- spin_unlock(&request->lock);
+ spin_unlock(&request->lock);
+ GEM_BUG_ON(!llist_empty(&request->execute_cb));
+ }
return result;
}
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1d9221e9d395c8c80d99d002bea3c9b6dd192d6a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 13 Jul 2020 15:16:36 +0100
Subject: [PATCH] drm/i915: Skip signaling a signaled request
Preempt-to-busy introduces various fascinating complications in that the
requests may complete as we are unsubmitting them from HW. As they may
then signal after unsubmission, we may find ourselves having to cleanup
the signaling request from within the signaling callback. This causes us
to recurse onto the same i915_request.lock.
However, if the request is already signaled (as it will be before we
enter the signal callbacks), we know we can skip the signaling of that
request during submission, neatly evading the spinlock recursion.
unsubmit(ve.rq0) # timeslice expiration or other preemption
-> virtual_submit_request(ve.rq0)
dma_fence_signal(ve.rq0) # request completed before preemption ack
-> submit_notify(ve.rq1)
-> virtual_submit_request(ve.rq1) # sees that we have completed ve.rq0
-> __i915_request_submit(ve.rq0)
[ 264.210142] BUG: spinlock recursion on CPU#2, sample_multi_tr/2093
[ 264.210150] lock: 0xffff9efd6ac55080, .magic: dead4ead, .owner: sample_multi_tr/2093, .owner_cpu: 2
[ 264.210155] CPU: 2 PID: 2093 Comm: sample_multi_tr Tainted: G U
[ 264.210158] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWR1.R00.X212.B01.1909060036 09/06/2019
[ 264.210160] Call Trace:
[ 264.210167] dump_stack+0x98/0xda
[ 264.210174] spin_dump.cold+0x24/0x3c
[ 264.210178] do_raw_spin_lock+0x9a/0xd0
[ 264.210184] _raw_spin_lock_nested+0x6a/0x70
[ 264.210314] __i915_request_submit+0x10a/0x3c0 [i915]
[ 264.210415] virtual_submit_request+0x9b/0x380 [i915]
[ 264.210516] submit_notify+0xaf/0x14c [i915]
[ 264.210602] __i915_sw_fence_complete+0x8a/0x230 [i915]
[ 264.210692] i915_sw_fence_complete+0x2d/0x40 [i915]
[ 264.210762] __dma_i915_sw_fence_wake+0x19/0x30 [i915]
[ 264.210767] dma_fence_signal_locked+0xb1/0x1c0
[ 264.210772] dma_fence_signal+0x29/0x50
[ 264.210871] i915_request_wait+0x5cb/0x830 [i915]
[ 264.210876] ? dma_resv_get_fences_rcu+0x294/0x5d0
[ 264.210974] i915_gem_object_wait_fence+0x2f/0x40 [i915]
[ 264.211084] i915_gem_object_wait+0xce/0x400 [i915]
[ 264.211178] i915_gem_wait_ioctl+0xff/0x290 [i915]
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
References: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: "Nayana, Venkata Ramana" <venkata.ramana.nayana(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.4+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200713141636.29326-1-chris@…
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d907d538176e..91786310c114 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -314,13 +314,18 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
+
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ goto unlock;
if (!__intel_breadcrumbs_arm_irq(b))
goto unlock;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3bb7320249ae..0b2fe55e6194 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -560,22 +560,25 @@ bool __i915_request_submit(struct i915_request *request)
engine->serial++;
result = true;
-xfer: /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-
+xfer:
if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
list_move_tail(&request->sched.link, &engine->active.requests);
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
- __notify_execute_cb(request);
}
- GEM_BUG_ON(!llist_empty(&request->execute_cb));
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
- !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
- !i915_request_enable_breadcrumb(request))
- intel_engine_signal_breadcrumbs(engine);
+ /* We may be recursing from the signal callback of another i915 fence */
+ if (!i915_request_signaled(request)) {
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+ __notify_execute_cb(request);
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &request->fence.flags) &&
+ !i915_request_enable_breadcrumb(request))
+ intel_engine_signal_breadcrumbs(engine);
- spin_unlock(&request->lock);
+ spin_unlock(&request->lock);
+ GEM_BUG_ON(!llist_empty(&request->execute_cb));
+ }
return result;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 24ebec25fb270100e252b19c288e21bd7d8cc7f7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Fri, 29 May 2020 14:12:18 +0100
Subject: [PATCH] arm64: hw_breakpoint: Don't invoke overflow handler on
uaccess watchpoints
Unprivileged memory accesses generated by the so-called "translated"
instructions (e.g. STTR) at EL1 can cause EL0 watchpoints to fire
unexpectedly if kernel debugging is enabled. In such cases, the
hw_breakpoint logic will invoke the user overflow handler which will
typically raise a SIGTRAP back to the current task. This is futile when
returning back to the kernel because (a) the signal won't have been
delivered and (b) userspace can't handle the thing anyway.
Avoid invoking the user overflow handler for watchpoints triggered by
kernel uaccess routines, and instead single-step over the faulting
instruction as we would if no overflow handler had been installed.
(Fixes tag identifies the introduction of unprivileged memory accesses,
which exposed this latent bug in the hw_breakpoint code)
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Fixes: 57f4959bad0a ("arm64: kernel: Add support for User Access Override")
Reported-by: Luis Machado <luis.machado(a)linaro.org>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0b727edf4104..af234a1e08b7 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
return 0;
}
+static int watchpoint_report(struct perf_event *wp, unsigned long addr,
+ struct pt_regs *regs)
+{
+ int step = is_default_overflow_handler(wp);
+ struct arch_hw_breakpoint *info = counter_arch_bp(wp);
+
+ info->trigger = addr;
+
+ /*
+ * If we triggered a user watchpoint from a uaccess routine, then
+ * handle the stepping ourselves since userspace really can't help
+ * us with this.
+ */
+ if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0)
+ step = 1;
+ else
+ perf_bp_event(wp, regs);
+
+ return step;
+}
+
static int watchpoint_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
u64 val;
struct perf_event *wp, **slots;
struct debug_info *debug_info;
- struct arch_hw_breakpoint *info;
struct arch_hw_breakpoint_ctrl ctrl;
slots = this_cpu_ptr(wp_on_reg);
@@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
if (dist != 0)
continue;
- info = counter_arch_bp(wp);
- info->trigger = addr;
- perf_bp_event(wp, regs);
-
- /* Do we need to handle the stepping? */
- if (is_default_overflow_handler(wp))
- step = 1;
+ step = watchpoint_report(wp, addr, regs);
}
- if (min_dist > 0 && min_dist != -1) {
- /* No exact match found. */
- wp = slots[closest_match];
- info = counter_arch_bp(wp);
- info->trigger = addr;
- perf_bp_event(wp, regs);
- /* Do we need to handle the stepping? */
- if (is_default_overflow_handler(wp))
- step = 1;
- }
+ /* No exact match found? */
+ if (min_dist > 0 && min_dist != -1)
+ step = watchpoint_report(slots[closest_match], addr, regs);
+
rcu_read_unlock();
if (!step)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 24ebec25fb270100e252b19c288e21bd7d8cc7f7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Fri, 29 May 2020 14:12:18 +0100
Subject: [PATCH] arm64: hw_breakpoint: Don't invoke overflow handler on
uaccess watchpoints
Unprivileged memory accesses generated by the so-called "translated"
instructions (e.g. STTR) at EL1 can cause EL0 watchpoints to fire
unexpectedly if kernel debugging is enabled. In such cases, the
hw_breakpoint logic will invoke the user overflow handler which will
typically raise a SIGTRAP back to the current task. This is futile when
returning back to the kernel because (a) the signal won't have been
delivered and (b) userspace can't handle the thing anyway.
Avoid invoking the user overflow handler for watchpoints triggered by
kernel uaccess routines, and instead single-step over the faulting
instruction as we would if no overflow handler had been installed.
(Fixes tag identifies the introduction of unprivileged memory accesses,
which exposed this latent bug in the hw_breakpoint code)
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Fixes: 57f4959bad0a ("arm64: kernel: Add support for User Access Override")
Reported-by: Luis Machado <luis.machado(a)linaro.org>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0b727edf4104..af234a1e08b7 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
return 0;
}
+static int watchpoint_report(struct perf_event *wp, unsigned long addr,
+ struct pt_regs *regs)
+{
+ int step = is_default_overflow_handler(wp);
+ struct arch_hw_breakpoint *info = counter_arch_bp(wp);
+
+ info->trigger = addr;
+
+ /*
+ * If we triggered a user watchpoint from a uaccess routine, then
+ * handle the stepping ourselves since userspace really can't help
+ * us with this.
+ */
+ if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0)
+ step = 1;
+ else
+ perf_bp_event(wp, regs);
+
+ return step;
+}
+
static int watchpoint_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
u64 val;
struct perf_event *wp, **slots;
struct debug_info *debug_info;
- struct arch_hw_breakpoint *info;
struct arch_hw_breakpoint_ctrl ctrl;
slots = this_cpu_ptr(wp_on_reg);
@@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
if (dist != 0)
continue;
- info = counter_arch_bp(wp);
- info->trigger = addr;
- perf_bp_event(wp, regs);
-
- /* Do we need to handle the stepping? */
- if (is_default_overflow_handler(wp))
- step = 1;
+ step = watchpoint_report(wp, addr, regs);
}
- if (min_dist > 0 && min_dist != -1) {
- /* No exact match found. */
- wp = slots[closest_match];
- info = counter_arch_bp(wp);
- info->trigger = addr;
- perf_bp_event(wp, regs);
- /* Do we need to handle the stepping? */
- if (is_default_overflow_handler(wp))
- step = 1;
- }
+ /* No exact match found? */
+ if (min_dist > 0 && min_dist != -1)
+ step = watchpoint_report(slots[closest_match], addr, regs);
+
rcu_read_unlock();
if (!step)
From: Mike Snitzer <snitzer(a)redhat.com>
Discontinue issuing writethrough write IO in series to the origin and
then cache.
Use bio_clone_fast() to create a new origin clone bio that will be
mapped to the origin device and then bio_chain() it to the bio that gets
remapped to the cache device. The origin clone bio does _not_ have a
copy of the per_bio_data -- as such check_if_tick_bio_needed() will not
be called.
The cache bio (parent bio) will not complete until the origin bio has
completed -- this fulfills bio_clone_fast()'s requirements as well as
the requirement to not complete the original IO until the write IO has
completed to both the origin and cache device.
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
(cherry picked from commit 2df3bae9a6543e90042291707b8db0cbfbae9ee9)
Fixes: 4ec34f2196d125ff781170ddc6c3058c08ec5e73 (dm bio record:
save/restore bi_end_io and bi_integrity )
4ec34f21 introduced a mkfs.ext4 hang on a LVM device that has been
modified with lvconvert --cachemode=writethrough.
CC:stable@vger.kernel.org for 4.14.y
Signed-off-by: John Donnelly <john.p.donnelly(a)oracle.com>
Reviewed-by: Somasundaram Krishnasamy <somasundaram.krishnasamy(a)oracle.com>
conflicts:
drivers/md/dm-cache-target.c. - Corrected usage of
writethrough_mode(&cache->feature) that was caught by
compiler, and removed unused static functions : writethrough_endio(),
defer_writethrough_bio(), wake_deferred_writethrough_worker()
that generated warnings.
---
drivers/md/dm-cache-target.c | 92 ++++++++++++++++++--------------------------
1 file changed, 37 insertions(+), 55 deletions(-)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 69cdb29ef6be..2732d1df05fa 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -450,6 +450,7 @@ struct cache {
struct work_struct migration_worker;
struct delayed_work waker;
struct dm_bio_prison_v2 *prison;
+ struct bio_set *bs;
mempool_t *migration_pool;
@@ -537,11 +538,6 @@ static void wake_deferred_bio_worker(struct cache *cache)
queue_work(cache->wq, &cache->deferred_bio_worker);
}
-static void wake_deferred_writethrough_worker(struct cache *cache)
-{
- queue_work(cache->wq, &cache->deferred_writethrough_worker);
-}
-
static void wake_migration_worker(struct cache *cache)
{
if (passthrough_mode(&cache->features))
@@ -868,16 +864,23 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags);
}
-static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
- dm_oblock_t oblock)
+static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
+ dm_oblock_t oblock, bool bio_has_pbd)
{
- // FIXME: this is called way too much.
- check_if_tick_bio_needed(cache, bio);
+ if (bio_has_pbd)
+ check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock));
}
+static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
+ dm_oblock_t oblock)
+{
+ // FIXME: check_if_tick_bio_needed() is called way too much through this interface
+ __remap_to_origin_clear_discard(cache, bio, oblock, true);
+}
+
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
@@ -937,57 +940,26 @@ static void issue_op(struct bio *bio, void *context)
accounted_request(cache, bio);
}
-static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
- bio_list_add(&cache->deferred_writethrough_bios, bio);
- spin_unlock_irqrestore(&cache->lock, flags);
-
- wake_deferred_writethrough_worker(cache);
-}
-
-static void writethrough_endio(struct bio *bio)
-{
- struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
-
- dm_unhook_bio(&pb->hook_info, bio);
-
- if (bio->bi_status) {
- bio_endio(bio);
- return;
- }
-
- dm_bio_restore(&pb->bio_details, bio);
- remap_to_cache(pb->cache, bio, pb->cblock);
-
- /*
- * We can't issue this bio directly, since we're in interrupt
- * context. So it gets put on a bio list for processing by the
- * worker thread.
- */
- defer_writethrough_bio(pb->cache, bio);
-}
-
/*
- * FIXME: send in parallel, huge latency as is.
* When running in writethrough mode we need to send writes to clean blocks
- * to both the cache and origin devices. In future we'd like to clone the
- * bio and send them in parallel, but for now we're doing them in
- * series as this is easier.
+ * to both the cache and origin devices. Clone the bio and send them in parallel.
*/
-static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
- dm_oblock_t oblock, dm_cblock_t cblock)
+static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
+ dm_oblock_t oblock, dm_cblock_t cblock)
{
- struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
+ struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs);
+
+ BUG_ON(!origin_bio);
- pb->cache = cache;
- pb->cblock = cblock;
- dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
- dm_bio_record(&pb->bio_details, bio);
+ bio_chain(origin_bio, bio);
+ /*
+ * Passing false to __remap_to_origin_clear_discard() skips
+ * all code that might use per_bio_data (since clone doesn't have it)
+ */
+ __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
+ submit_bio(origin_bio);
- remap_to_origin_clear_discard(pb->cache, bio, oblock);
+ remap_to_cache(cache, bio, cblock);
}
/*----------------------------------------------------------------
@@ -1873,7 +1845,7 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
} else {
if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
!is_dirty(cache, cblock)) {
- remap_to_origin_then_cache(cache, bio, block, cblock);
+ remap_to_origin_and_cache(cache, bio, block, cblock);
accounted_begin(cache, bio);
} else
remap_to_cache_dirty(cache, bio, block, cblock);
@@ -2132,6 +2104,9 @@ static void destroy(struct cache *cache)
kfree(cache->ctr_args[i]);
kfree(cache->ctr_args);
+ if (cache->bs)
+ bioset_free(cache->bs);
+
kfree(cache);
}
@@ -2589,6 +2564,13 @@ static int cache_create(struct cache_args *ca, struct cache **result)
cache->features = ca->features;
ti->per_io_data_size = get_per_bio_data_size(cache);
+ if (writethrough_mode(&cache->features)) {
+ /* Create bioset for writethrough bios issued to origin */
+ cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!cache->bs)
+ goto bad;
+ }
+
cache->callbacks.congested_fn = cache_is_congested;
dm_table_add_target_callbacks(ti->table, &cache->callbacks);
--
1.8.3.1
Hello, stable-kernel maintainers!
Could you please cherry-pick these commits into the v5.7.x kernel?
commit 0de6db30ef79b391cedd749801a49c485d2daf4b
Author: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Date: Mon Jan 13 23:24:17 2020 -0800
ASoC: tegra: Use device managed resource APIs to get the clock
commit 1e4e0bf136aa4b4aa59c1e6af19844bd6d807794
Author: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Date: Mon Jan 13 23:24:23 2020 -0800
ASoC: tegra: Add audio mclk parent configuration
commit ff5d18cb04f4ecccbcf05b7f83ab6df2a0d95c16
Author: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Date: Mon Jan 13 23:24:24 2020 -0800
ASoC: tegra: Enable audio mclk during tegra_asoc_utils_init()
It will fix a huge warnings splat during of kernel boot on NVIDIA Tegra
SoCs. For some reason these patches haven't made into 5.7 when it was
released and several people complained about the warnings. Thanks in
advance!
Recently a customer of ours experienced a crash when booting the
system while enabling memory-hotplug.
The problem is that Normal zones on different nodes don't get their private
zone->pageset allocated, and keep sharing the initial boot_pageset.
The sharing between zones is normally safe as explained by the comment for
boot_pageset - it's a percpu structure, and manipulations are done with
disabled interrupts, and boot_pageset is set up in a way that any page placed
on its pcplist is immediately flushed to shared zone's freelist, because
pcp->high == 1.
However, the hotplug operation updates pcp->high to a higher value as it
expects to be operating on a private pageset.
The problem is in build_all_zonelists(), which is called when the first range
of pages is onlined for the Normal zone of node X or Y:
if (system_state == SYSTEM_BOOTING) {
build_all_zonelists_init();
} else {
#ifdef CONFIG_MEMORY_HOTPLUG
if (zone)
setup_zone_pageset(zone);
#endif
/* we have to stop all cpus to guarantee there is no user
of zonelist */
stop_machine(__build_all_zonelists, pgdat, NULL);
/* cpuset refresh routine should be here */
}
When called during hotplug, it should execute the setup_zone_pageset(zone)
which allocates the private pageset.
However, with memhp_default_state=online, this happens early while
system_state == SYSTEM_BOOTING is still true, hence this step is skipped.
(and build_all_zonelists_init() is probably unsafe anyway at this point).
Another hotplug operation on the same zone then leads to zone_pcp_update(zone)
called from online_pages(), which updates the pcp->high for the shared
boot_pageset to a value higher than 1.
At that point, pages freed from Node X and Y Normal zones can end up on the same
pcplist and from there they can be freed to the wrong zone's freelist,
leading to the corruption and crashes.
Please, note that upstream has fixed that differently (and unintentionally) by
adding another boot state (SYSTEM_SCHEDULING), which is set before smp_init().
That should happen before memory hotplug events even with memhp_default_state=online.
Backporting that would be too intrusive.
Signed-off-by: Oscar Salvador <osalvador(a)suse.de>
Debugged-by: Vlastimil Babka <vbabka(a)suse.cz>
---
include/linux/mmzone.h | 3 ++-
init/main.c | 2 +-
mm/memory_hotplug.c | 10 +++++-----
mm/page_alloc.c | 7 ++++---
4 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e3d7754f25f0..5c7645e156a5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -756,7 +756,8 @@ static inline bool is_dev_zone(const struct zone *zone)
#include <linux/memory_hotplug.h>
extern struct mutex zonelists_mutex;
-void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
+void build_all_zonelists(pg_data_t *pgdat, struct zone *zone,
+ bool hotplug_context);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, unsigned int alloc_flags,
diff --git a/init/main.c b/init/main.c
index d47860dbe896..7ad08957dd18 100644
--- a/init/main.c
+++ b/init/main.c
@@ -512,7 +512,7 @@ asmlinkage __visible void __init start_kernel(void)
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
boot_cpu_hotplug_init();
- build_all_zonelists(NULL, NULL);
+ build_all_zonelists(NULL, NULL, false);
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 449999657c0b..a4ffe5996317 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1125,7 +1125,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
mutex_lock(&zonelists_mutex);
if (!populated_zone(zone)) {
need_zonelists_rebuild = 1;
- build_all_zonelists(NULL, zone);
+ build_all_zonelists(NULL, zone, true);
}
ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
@@ -1146,7 +1146,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
if (onlined_pages) {
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
- build_all_zonelists(NULL, NULL);
+ build_all_zonelists(NULL, NULL, true);
else
zone_pcp_update(zone);
}
@@ -1220,7 +1220,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
* to access not-initialized zonelist, build here.
*/
mutex_lock(&zonelists_mutex);
- build_all_zonelists(pgdat, NULL);
+ build_all_zonelists(pgdat, NULL, true);
mutex_unlock(&zonelists_mutex);
/*
@@ -1276,7 +1276,7 @@ int try_online_node(int nid)
if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
mutex_lock(&zonelists_mutex);
- build_all_zonelists(NULL, NULL);
+ build_all_zonelists(NULL, NULL, true);
mutex_unlock(&zonelists_mutex);
}
@@ -2016,7 +2016,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
if (!populated_zone(zone)) {
zone_pcp_reset(zone);
mutex_lock(&zonelists_mutex);
- build_all_zonelists(NULL, NULL);
+ build_all_zonelists(NULL, NULL, true);
mutex_unlock(&zonelists_mutex);
} else
zone_pcp_update(zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index de00e0fec484..f394dd87fa03 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4608,7 +4608,7 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write,
user_zonelist_order = oldval;
} else if (oldval != user_zonelist_order) {
mutex_lock(&zonelists_mutex);
- build_all_zonelists(NULL, NULL);
+ build_all_zonelists(NULL, NULL, false);
mutex_unlock(&zonelists_mutex);
}
}
@@ -4988,11 +4988,12 @@ build_all_zonelists_init(void)
* (2) call of __init annotated helper build_all_zonelists_init
* [protected by SYSTEM_BOOTING].
*/
-void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
+void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone,
+ bool hotplug_context)
{
set_zonelist_order();
- if (system_state == SYSTEM_BOOTING) {
+ if (system_state == SYSTEM_BOOTING && !hotplug_context) {
build_all_zonelists_init();
} else {
#ifdef CONFIG_MEMORY_HOTPLUG
--
2.26.2