[ Upstream commit 263d6287da1433aba11c5b4046388f2cdf49675c ]
When a VCPU is created, the kvm_vcpu struct is initialized to zero in
kvm_vm_ioctl_create_vcpu(). On VHE systems, the first time
vcpu.arch.mdcr_el2 is loaded on hardware is in vcpu_load(), before it is
set to a sensible value in kvm_arm_setup_debug() later in the run loop. The
result is that KVM executes for a short time with MDCR_EL2 set to zero.
This has several unintended consequences:
* Setting MDCR_EL2.HPMN to 0 is constrained unpredictable according to ARM
DDI 0487G.a, page D13-3820. The behavior specified by the architecture
in this case is for the PE to behave as if MDCR_EL2.HPMN is set to a
value less than or equal to PMCR_EL0.N, which means that an unknown
number of counters are now disabled by MDCR_EL2.HPME, which is zero.
* The host configuration for the other debug features controlled by
MDCR_EL2 is temporarily lost. This has been harmless so far, as Linux
doesn't use the other fields, but that might change in the future.
Let's avoid both issues by initializing the VCPU's mdcr_el2 field in
kvm_vcpu_vcpu_first_run_init(), thus making sure that the MDCR_EL2 register
has a consistent value after each vcpu_load().
[ v5.4 backport: added stub for KVM/arm that fixes compilation errors ]
Fixes: d5a21bcc2995 ("KVM: arm64: Move common VHE/non-VHE trap config in separate functions")
Signed-off-by: Alexandru Elisei <alexandru.elisei(a)arm.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20210407144857.199746-3-alexandru.elisei@arm.com
---
arch/arm/include/asm/kvm_host.h | 1 +
arch/arm64/include/asm/kvm_host.h | 1 +
arch/arm64/kvm/debug.c | 88 +++++++++++++++++++++----------
virt/kvm/arm/arm.c | 2 +
4 files changed, 64 insertions(+), 28 deletions(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dd03d5e01a94..32564b017ba0 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -335,6 +335,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_init_debug(void) {}
+static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index dfa6dc4575be..697702a1a1ff 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -552,6 +552,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
void kvm_arm_init_debug(void);
+void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index dbc890511631..2484b2cca74b 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -68,6 +68,64 @@ void kvm_arm_init_debug(void)
__this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2));
}
+/**
+ * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * This ensures we will trap access to:
+ * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
+ * - Debug ROM Address (MDCR_EL2_TDRA)
+ * - OS related registers (MDCR_EL2_TDOSA)
+ * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
+ * - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
+ */
+static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
+{
+ /*
+ * This also clears MDCR_EL2_E2PB_MASK to disable guest access
+ * to the profiling buffer.
+ */
+ vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
+ vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
+ MDCR_EL2_TPMS |
+ MDCR_EL2_TTRF |
+ MDCR_EL2_TPMCR |
+ MDCR_EL2_TDRA |
+ MDCR_EL2_TDOSA);
+
+ /* Is the VM being debugged by userspace? */
+ if (vcpu->guest_debug)
+ /* Route all software debug exceptions to EL2 */
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
+
+ /*
+ * Trap debug register access when one of the following is true:
+ * - Userspace is using the hardware to debug the guest
+ * (KVM_GUESTDBG_USE_HW is set).
+ * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
+ */
+ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
+ !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+
+ trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
+}
+
+/**
+ * kvm_arm_vcpu_init_debug - setup vcpu debug traps
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * Set vcpu initial mdcr_el2 value.
+ */
+void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ kvm_arm_setup_mdcr_el2(vcpu);
+ preempt_enable();
+}
+
/**
* kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
*/
@@ -83,13 +141,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
* @vcpu: the vcpu pointer
*
* This is called before each entry into the hypervisor to setup any
- * debug related registers. Currently this just ensures we will trap
- * access to:
- * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
- * - Debug ROM Address (MDCR_EL2_TDRA)
- * - OS related registers (MDCR_EL2_TDOSA)
- * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
- * - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
+ * debug related registers.
*
* Additionally, KVM only traps guest accesses to the debug registers if
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
@@ -101,28 +153,14 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
- bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
- /*
- * This also clears MDCR_EL2_E2PB_MASK to disable guest access
- * to the profiling buffer.
- */
- vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
- vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
- MDCR_EL2_TPMS |
- MDCR_EL2_TTRF |
- MDCR_EL2_TPMCR |
- MDCR_EL2_TDRA |
- MDCR_EL2_TDOSA);
+ kvm_arm_setup_mdcr_el2(vcpu);
/* Is Guest debugging in effect? */
if (vcpu->guest_debug) {
- /* Route all software debug exceptions to EL2 */
- vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
-
/* Save guest debug state */
save_guest_debug_regs(vcpu);
@@ -176,7 +214,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
- trap_debug = true;
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
&vcpu->arch.debug_ptr->dbg_bcr[0],
@@ -191,10 +228,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
BUG_ON(!vcpu->guest_debug &&
vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
- /* Trap debug register access */
- if (trap_debug)
- vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
-
/* If KDE or MDE are set, perform a full save/restore cycle. */
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
@@ -203,7 +236,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
- trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 2e7d2b3f2907..4af85605730e 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -579,6 +579,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
vcpu->arch.has_run_once = true;
+ kvm_arm_vcpu_init_debug(vcpu);
+
if (likely(irqchip_in_kernel(kvm))) {
/*
* Map the VGIC hardware resources before running a vcpu the
--
2.31.1
From: Colin Ian King <colin.king(a)canonical.com>
commit af0e1871d79cfbb91f732d2c6fa7558e45c31038 upstream.
The lux_val returned from tsl2583_get_lux can potentially be zero,
so check for this to avoid a division by zero and an overflowed
gain_trim_val.
Fixes clang scan-build warning:
drivers/iio/light/tsl2583.c:345:40: warning: Either the
condition 'lux_val<0' is redundant or there is division
by zero at line 345. [zerodivcond]
Fixes: ac4f6eee8fe8 ("staging: iio: TAOS tsl258x: Device driver")
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
[iwamatsu: Change file path.]
Signed-off-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu(a)toshiba.co.jp>
---
drivers/staging/iio/light/tsl2583.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/staging/iio/light/tsl2583.c b/drivers/staging/iio/light/tsl2583.c
index 3100d960fe2c6d..da2a2ff4cdb998 100644
--- a/drivers/staging/iio/light/tsl2583.c
+++ b/drivers/staging/iio/light/tsl2583.c
@@ -378,6 +378,15 @@ static int taos_als_calibrate(struct iio_dev *indio_dev)
dev_err(&chip->client->dev, "taos_als_calibrate failed to get lux\n");
return lux_val;
}
+
+ /* Avoid division by zero of lux_value later on */
+ if (lux_val == 0) {
+ dev_err(&chip->client->dev,
+ "%s: lux_val of 0 will produce out of range trim_value\n",
+ __func__);
+ return -ENODATA;
+ }
+
gain_trim_val = (unsigned int) (((chip->taos_settings.als_cal_target)
* chip->taos_settings.als_gain_trim) / lux_val);
--
2.30.0
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
commit dda32c00c9a0fa103b5d54ef72c477b7aa993679 upstream.
'xhci_urb_enqueue()' is passed a 'mem_flags' argument, because "URBs may be
submitted in interrupt context" (see comment related to 'usb_submit_urb()'
in 'drivers/usb/core/urb.c')
So this flag should be used in all the calling chain.
Up to now, 'xhci_check_maxpacket()' which is only called from
'xhci_urb_enqueue()', uses GFP_KERNEL.
Be safe and pass the mem_flags to this function as well.
Fixes: ddba5cd0aeff ("xhci: Use command structures when queuing commands on the command ring")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-4-mathias.nyman@linux.intel…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[iwamatsu: Adjust context]
Signed-off-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu(a)toshiba.co.jp>
---
drivers/usb/host/xhci.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index bd6e3555c04793..b1994b03341fe8 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1302,7 +1302,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
* we need to issue an evaluate context command and wait on it.
*/
static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
- unsigned int ep_index, struct urb *urb)
+ unsigned int ep_index, struct urb *urb, gfp_t mem_flags)
{
struct xhci_container_ctx *out_ctx;
struct xhci_input_control_ctx *ctrl_ctx;
@@ -1333,7 +1333,7 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
* changes max packet sizes.
*/
- command = xhci_alloc_command(xhci, false, true, GFP_KERNEL);
+ command = xhci_alloc_command(xhci, false, true, mem_flags);
if (!command)
return -ENOMEM;
@@ -1440,7 +1440,7 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
*/
if (urb->dev->speed == USB_SPEED_FULL) {
ret = xhci_check_maxpacket(xhci, slot_id,
- ep_index, urb);
+ ep_index, urb, mem_flags);
if (ret < 0) {
xhci_urb_free_priv(urb_priv);
urb->hcpriv = NULL;
--
2.30.0
We observed several NVMe failures when running with SWIOTLB. The root
cause of the issue is that when data is mapped via SWIOTLB, the address
offset is not preserved. Several device drivers including the NVMe
driver relies on this offset to function correctly.
Even though we discovered the error when running using AMD SEV, we have
reproduced the same error in Rhel 8 without SEV. By adding swiotlb=force
option to the boot command line parameter, NVMe funcionality is
impacted. For example formatting a disk into xfs format returns an
error.
----
Changes in v2:
Rebased patches to 5.4.115
Updated patch description to correct format.
Jianxiong Gao (9):
driver core: add a min_align_mask field to struct
device_dma_parameters
swiotlb: add a IO_TLB_SIZE define
swiotlb: factor out an io_tlb_offset helper
swiotlb: factor out a nr_slots helper
swiotlb: clean up swiotlb_tbl_unmap_single
swiotlb: refactor swiotlb_tbl_map_single
swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single
swiotlb: respect min_align_mask
nvme-pci: set min_align_mask
drivers/nvme/host/pci.c | 1 +
include/linux/device.h | 1 +
include/linux/dma-mapping.h | 16 +++
include/linux/swiotlb.h | 1 +
kernel/dma/swiotlb.c | 269 ++++++++++++++++++++----------------
5 files changed, 169 insertions(+), 119 deletions(-)
--
2.31.1.751.gd2f1c929bd-goog
From: Mark Tomlinson <mark.tomlinson(a)alliedtelesis.co.nz>
commit 175e476b8cdf2a4de7432583b49c871345e4f8a1 upstream.
When a new table value was assigned, it was followed by a write memory
barrier. This ensured that all writes before this point would complete
before any writes after this point. However, to determine whether the
rules are unused, the sequence counter is read. To ensure that all
writes have been done before these reads, a full memory barrier is
needed, not just a write memory barrier. The same argument applies when
incrementing the counter, before the rules are read.
Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic
reported in cc00bcaa5899 (which is still present), while still
maintaining the same speed of replacing tables.
The smb_mb() barriers potentially slow the packet path, however testing
has shown no measurable change in performance on a 4-core MIPS64
platform.
Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path")
Signed-off-by: Mark Tomlinson <mark.tomlinson(a)alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
[Ported to stable, affected barrier is added by d3d40f237480abf3268956daf18cdc56edd32834 in mainline]
Signed-off-by: Pavel Machek (CIP) <pavel(a)denx.de>
---
include/linux/netfilter/x_tables.h | 2 +-
net/netfilter/x_tables.c | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6923e4049de3..304b60b49526 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -327,7 +327,7 @@ static inline unsigned int xt_write_recseq_begin(void)
* since addend is most likely 1
*/
__this_cpu_add(xt_recseq.sequence, addend);
- smp_wmb();
+ smp_mb();
return addend;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8caae1c5d93d..8878f859c6de 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1146,6 +1146,9 @@ xt_replace_table(struct xt_table *table,
smp_wmb();
table->private = newinfo;
+ /* make sure all cpus see new ->private value */
+ smp_mb();
+
/*
* Even though table entries have now been swapped, other CPU's
* may still be using the old entries. This is okay, because
--
2.11.0
--
DENX Software Engineering GmbH, Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany