The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 0910dd7c9ad45a2605c45fd2bf3d1bcac087687c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101348-gigahertz-cauterize-0303@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0910dd7c9ad45a2605c45fd2bf3d1bcac087687c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 5 Aug 2025 12:05:09 -0700
Subject: [PATCH] KVM: SVM: Skip fastpath emulation on VM-Exit if next RIP
isn't valid
Skip the WRMSR and HLT fastpaths in SVM's VM-Exit handler if the next RIP
isn't valid, e.g. because KVM is running with nrips=false. SVM must
decode and emulate to skip the instruction if the CPU doesn't provide the
next RIP, and getting the instruction bytes to decode requires reading
guest memory. Reading guest memory through the emulator can fault, i.e.
can sleep, which is disallowed since the fastpath handlers run with IRQs
disabled.
BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:106
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 32611, name: qemu
preempt_count: 1, expected: 0
INFO: lockdep is turned off.
irq event stamp: 30580
hardirqs last enabled at (30579): [<ffffffffc08b2527>] vcpu_run+0x1787/0x1db0 [kvm]
hardirqs last disabled at (30580): [<ffffffffb4f62e32>] __schedule+0x1e2/0xed0
softirqs last enabled at (30570): [<ffffffffb4247a64>] fpu_swap_kvm_fpstate+0x44/0x210
softirqs last disabled at (30568): [<ffffffffb4247a64>] fpu_swap_kvm_fpstate+0x44/0x210
CPU: 298 UID: 0 PID: 32611 Comm: qemu Tainted: G U 6.16.0-smp--e6c618b51cfe-sleep #782 NONE
Tainted: [U]=USER
Hardware name: Google Astoria-Turin/astoria, BIOS 0.20241223.2-0 01/17/2025
Call Trace:
<TASK>
dump_stack_lvl+0x7d/0xb0
__might_resched+0x271/0x290
__might_fault+0x28/0x80
kvm_vcpu_read_guest_page+0x8d/0xc0 [kvm]
kvm_fetch_guest_virt+0x92/0xc0 [kvm]
__do_insn_fetch_bytes+0xf3/0x1e0 [kvm]
x86_decode_insn+0xd1/0x1010 [kvm]
x86_emulate_instruction+0x105/0x810 [kvm]
__svm_skip_emulated_instruction+0xc4/0x140 [kvm_amd]
handle_fastpath_invd+0xc4/0x1a0 [kvm]
vcpu_run+0x11a1/0x1db0 [kvm]
kvm_arch_vcpu_ioctl_run+0x5cc/0x730 [kvm]
kvm_vcpu_ioctl+0x578/0x6a0 [kvm]
__se_sys_ioctl+0x6d/0xb0
do_syscall_64+0x8a/0x2c0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
RIP: 0033:0x7f479d57a94b
</TASK>
Note, this is essentially a reapply of commit 5c30e8101e8d ("KVM: SVM:
Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"), but with
different justification (KVM now grabs SRCU when skipping the instruction
for other reasons).
Fixes: b439eb8ab578 ("Revert "KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250805190526.1453366-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d9931c6c4bc6..829d9d46718d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4181,13 +4181,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+
+ /*
+ * Next RIP must be provided as IRQs are disabled, and accessing guest
+ * memory to decode the instruction might fault, i.e. might sleep.
+ */
+ if (!nrips || !control->next_rip)
+ return EXIT_FASTPATH_NONE;
if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE;
- switch (svm->vmcb->control.exit_code) {
+ switch (control->exit_code) {
case SVM_EXIT_MSR:
- if (!svm->vmcb->control.exit_info_1)
+ if (!control->exit_info_1)
break;
return handle_fastpath_set_msr_irqoff(vcpu);
case SVM_EXIT_HLT:
Regardless of the DeviceContext of a device, we can't give any
guarantees about the DeviceContext of its parent device.
This is very subtle, since it's only caused by a simple typo, i.e.
Self::from_raw(parent)
which preserves the DeviceContext in this case, vs.
Device::from_raw(parent)
which discards the DeviceContext.
(I should have noticed it doing the correct thing in auxiliary::Device
subsequently, but somehow missed it.)
Hence, fix both Device::parent() and auxiliary::Device::parent().
Cc: stable(a)vger.kernel.org
Fixes: a4c9f71e3440 ("rust: device: implement Device::parent()")
Signed-off-by: Danilo Krummrich <dakr(a)kernel.org>
---
rust/kernel/auxiliary.rs | 8 +-------
rust/kernel/device.rs | 4 ++--
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs
index 4163129b4103..e12f78734606 100644
--- a/rust/kernel/auxiliary.rs
+++ b/rust/kernel/auxiliary.rs
@@ -217,13 +217,7 @@ pub fn id(&self) -> u32 {
/// Returns a reference to the parent [`device::Device`], if any.
pub fn parent(&self) -> Option<&device::Device> {
- let ptr: *const Self = self;
- // CAST: `Device<Ctx: DeviceContext>` types are transparent to each other.
- let ptr: *const Device = ptr.cast();
- // SAFETY: `ptr` was derived from `&self`.
- let this = unsafe { &*ptr };
-
- this.as_ref().parent()
+ self.as_ref().parent()
}
}
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index 23a95324cb0f..343996027c89 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -256,7 +256,7 @@ pub(crate) fn as_raw(&self) -> *mut bindings::device {
/// Returns a reference to the parent device, if any.
#[cfg_attr(not(CONFIG_AUXILIARY_BUS), expect(dead_code))]
- pub(crate) fn parent(&self) -> Option<&Self> {
+ pub(crate) fn parent(&self) -> Option<&Device> {
// SAFETY:
// - By the type invariant `self.as_raw()` is always valid.
// - The parent device is only ever set at device creation.
@@ -269,7 +269,7 @@ pub(crate) fn parent(&self) -> Option<&Self> {
// - Since `parent` is not NULL, it must be a valid pointer to a `struct device`.
// - `parent` is valid for the lifetime of `self`, since a `struct device` holds a
// reference count of its parent.
- Some(unsafe { Self::from_raw(parent) })
+ Some(unsafe { Device::from_raw(parent) })
}
}
--
2.51.0
The ucsi_psy_get_current_max function defaults to 0.1A when it is not
clear how much current the partner device can support. But this does
not check the port is connected, and will report 0.1A max current when
nothing is connected. Update ucsi_psy_get_current_max to report 0A when
there is no connection.
Fixes: af833e7f7db3 ("usb: typec: ucsi: psy: Set current max to 100mA for BC 1.2 and Default")
Signed-off-by: Jameson Thies <jthies(a)google.com>
Reviewed-by: Benson Leung <bleung(a)chromium.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/ucsi/psy.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c
index 62a9d68bb66d..8ae900c8c132 100644
--- a/drivers/usb/typec/ucsi/psy.c
+++ b/drivers/usb/typec/ucsi/psy.c
@@ -145,6 +145,11 @@ static int ucsi_psy_get_current_max(struct ucsi_connector *con,
{
u32 pdo;
+ if (!UCSI_CONSTAT(con, CONNECTED)) {
+ val->intval = 0;
+ return 0;
+ }
+
switch (UCSI_CONSTAT(con, PWR_OPMODE)) {
case UCSI_CONSTAT_PWR_OPMODE_PD:
if (con->num_pdos > 0) {
base-commit: e40b984b6c4ce3f80814f39f86f87b2a48f2e662
--
2.51.0.858.gf9c4a03a3a-goog
EL0 exception handlers should always call `exit_to_user_mode()` with
interrupts unmasked.
When handling a completed single-step, we skip the if block and
`local_daif_restore(DAIF_PROCCTX)` never gets called,
which ends up calling `exit_to_user_mode()` with interrupts masked.
This is broken if pNMI is in use, as `do_notify_resume()` will try
to enable interrupts, but `local_irq_enable()` will only change the PMR,
leaving interrupts masked via DAIF.
Move the call to `try_step_suspended_breakpoints()` outside of the check
so that interrupts can be unmasked even if we don't call the step handler.
Fixes: 0ac7584c08ce ("arm64: debug: split single stepping exception entry")
Cc: <stable(a)vger.kernel.org> # 6.17
Signed-off-by: Ada Couprie Diaz <ada.coupriediaz(a)arm.com>
----
This was already broken in a similar fashion in kernels prior to v6.17,
as `local_daif_restore()` was called _after_ the debug handlers, with some
calling `send_user_sigtrap()` which would unmask interrupts via PMR
while leaving them masked by DAIF.
---
arch/arm64/kernel/entry-common.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 2b0c5925502e..780cbcf12695 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -832,6 +832,7 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr)
static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
{
+ bool step_done;
if (!is_ttbr0_addr(regs->pc))
arm64_apply_bp_hardening();
@@ -842,10 +843,11 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
* If we are stepping a suspended breakpoint there's nothing more to do:
* the single-step is complete.
*/
- if (!try_step_suspended_breakpoints(regs)) {
- local_daif_restore(DAIF_PROCCTX);
+ step_done = try_step_suspended_breakpoints(regs)
+ local_daif_restore(DAIF_PROCCTX);
+ if (!step_done)
do_el0_softstep(esr, regs);
- }
+
exit_to_user_mode(regs);
}
base-commit: 449d48b1b99fdaa076166e200132705ac2bee711
--
2.43.0
PCI devices are created via pci_scan_slot() and similar, and are
promptly configured for runtime PM (pci_pm_init()). They are initially
prevented from suspending by way of pm_runtime_forbid(); however, it's
expected that user space may override this via sysfs [1].
Now, sometime after initial scan, a PCI device receives its BAR
configuration (pci_assign_unassigned_bus_resources(), etc.).
If a PCI device is allowed to suspend between pci_scan_slot() and
pci_assign_unassigned_bus_resources(), then pci-driver.c will
save/restore incorrect BAR configuration for the device, and the device
may cease to function.
This behavior races with user space, since user space may enable runtime
PM [1] as soon as it sees the device, which may be before BAR
configuration.
Prevent suspending in this intermediate state by holding a runtime PM
reference until the device is fully initialized and ready for probe().
[1] echo auto > /sys/bus/pci/devices/.../power/control
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
---
drivers/pci/bus.c | 7 +++++++
drivers/pci/pci.c | 6 ++++++
2 files changed, 13 insertions(+)
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index f26aec6ff588..227a8898acac 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -14,6 +14,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
@@ -375,6 +376,12 @@ void pci_bus_add_device(struct pci_dev *dev)
put_device(&pdev->dev);
}
+ /*
+ * Now that resources are assigned, drop the reference we grabbed in
+ * pci_pm_init().
+ */
+ pm_runtime_put_noidle(&dev->dev);
+
if (!dn || of_device_is_available(dn))
pci_dev_allow_binding(dev);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006c..06a901214f2c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3226,6 +3226,12 @@ void pci_pm_init(struct pci_dev *dev)
pci_pm_power_up_and_verify_state(dev);
pm_runtime_forbid(&dev->dev);
pm_runtime_set_active(&dev->dev);
+ /*
+ * We cannot allow a device to suspend before its resources are
+ * configured. Otherwise, we may allow saving/restoring unexpected BAR
+ * configuration.
+ */
+ pm_runtime_get_noresume(&dev->dev);
pm_runtime_enable(&dev->dev);
}
--
2.51.0.858.gf9c4a03a3a-goog
The L1 substates support requires additional steps to work, see e.g.
section '11.6.6.4 L1 Substate' in the RK3588 TRM V1.0.
These steps are currently missing from the driver.
While this has always been a problem when using e.g.
CONFIG_PCIEASPM_POWER_SUPERSAVE=y, the problem became more apparent after
commit f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for
devicetree platforms"), which enabled ASPM also for
CONFIG_PCIEASPM_DEFAULT=y.
Disable L1 substates until proper support is added.
Cc: stable(a)vger.kernel.org
Fixes: 0e898eb8df4e ("PCI: rockchip-dwc: Add Rockchip RK356X host controller driver")
Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms")
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
Changes since v1:
-Remove superfluous dw_pcie_readl_dbi()
drivers/pci/controller/dwc/pcie-dw-rockchip.c | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index 3e2752c7dd09..84f882abbca5 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -200,6 +200,25 @@ static bool rockchip_pcie_link_up(struct dw_pcie *pci)
return FIELD_GET(PCIE_LINKUP_MASK, val) == PCIE_LINKUP;
}
+/*
+ * See e.g. section '11.6.6.4 L1 Substate' in the RK3588 TRM V1.0 for the steps
+ * needed to support L1 substates. Currently, not a single rockchip platform
+ * performs these steps, so disable L1 substates until there is proper support.
+ */
+static void rockchip_pcie_disable_l1sub(struct dw_pcie *pci)
+{
+ u32 cap, l1subcap;
+
+ cap = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS);
+ if (cap) {
+ l1subcap = dw_pcie_readl_dbi(pci, cap + PCI_L1SS_CAP);
+ l1subcap &= ~(PCI_L1SS_CAP_L1_PM_SS | PCI_L1SS_CAP_ASPM_L1_1 |
+ PCI_L1SS_CAP_ASPM_L1_2 | PCI_L1SS_CAP_PCIPM_L1_1 |
+ PCI_L1SS_CAP_PCIPM_L1_2);
+ dw_pcie_writel_dbi(pci, cap + PCI_L1SS_CAP, l1subcap);
+ }
+}
+
static void rockchip_pcie_enable_l0s(struct dw_pcie *pci)
{
u32 cap, lnkcap;
@@ -264,6 +283,7 @@ static int rockchip_pcie_host_init(struct dw_pcie_rp *pp)
irq_set_chained_handler_and_data(irq, rockchip_pcie_intx_handler,
rockchip);
+ rockchip_pcie_disable_l1sub(pci);
rockchip_pcie_enable_l0s(pci);
return 0;
@@ -301,6 +321,7 @@ static void rockchip_pcie_ep_init(struct dw_pcie_ep *ep)
struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
enum pci_barno bar;
+ rockchip_pcie_disable_l1sub(pci);
rockchip_pcie_enable_l0s(pci);
rockchip_pcie_ep_hide_broken_ats_cap_rk3588(ep);
--
2.51.0