commit 057d476fff778f1d3b9f861fdb5437ea1a3cfc99 upstream
Backport for linux-4.14.y stable
A race in xhci USB3 remote wake handling may force device back to suspend
after it initiated resume siganaling, causing a missed resume event or warm
reset of device.
When a USB3 link completes resume signaling and goes to enabled (UO)
state a interrupt is issued and the interrupt handler will clear the
bus_state->port_remote_wakeup resume flag, allowing bus suspend.
If the USB3 roothub thread just finished reading port status before
the interrupt, finding ports still in suspended (U3) state, but hasn't
yet started suspending the hub, then the xhci interrupt handler will clear
the flag that prevented roothub suspend and allow bus to suspend, forcing
all port links back to suspended (U3) state.
Example case:
usb_runtime_suspend() # because all ports still show suspended U3
usb_suspend_both()
hub_suspend(); # successful as hub->wakeup_bits not set yet
==> INTERRUPT
xhci_irq()
handle_port_status()
clear bus_state->port_remote_wakeup
usb_wakeup_notification()
sets hub->wakeup_bits;
kick_hub_wq()
<== END INTERRUPT
hcd_bus_suspend()
xhci_bus_suspend() # success as port_remote_wakeup bits cleared
Fix this by increasing roothub usage count during port resume to prevent
roothub autosuspend, and by making sure bus_state->port_remote_wakeup
flag is only cleared after resume completion is visible, i.e.
after xhci roothub returned U0 or other non-U3 link state link on a
get port status request.
Issue rootcaused by Chiasheng Lee
Cc: Lee Hou-hsun <hou-hsun.lee(a)intel.com>
Cc: Lee Chiasheng <chiasheng.lee(a)intel.com>
Reported-by: Lee Chiasheng <chiasheng.lee(a)intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-hub.c | 8 ++++++++
drivers/usb/host/xhci-ring.c | 6 +-----
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 95503bb9b067..d1363f3fabfa 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -887,6 +887,14 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
status |= USB_PORT_STAT_C_BH_RESET << 16;
if ((raw_port_status & PORT_CEC))
status |= USB_PORT_STAT_C_CONFIG_ERROR << 16;
+
+ /* USB3 remote wake resume signaling completed */
+ if (bus_state->port_remote_wakeup & (1 << wIndex) &&
+ (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME &&
+ (raw_port_status & PORT_PLS_MASK) != XDEV_RECOVERY) {
+ bus_state->port_remote_wakeup &= ~(1 << wIndex);
+ usb_hcd_end_port_resume(&hcd->self, wIndex);
+ }
}
if (hcd->speed < HCD_USB3) {
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 89af395cd89c..61fa3007a74a 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1679,9 +1679,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
usb_hcd_resume_root_hub(hcd);
}
- if (hcd->speed >= HCD_USB3 && (portsc & PORT_PLS_MASK) == XDEV_INACTIVE)
- bus_state->port_remote_wakeup &= ~(1 << faked_port_index);
-
if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) {
xhci_dbg(xhci, "port resume event for port %d\n", port_id);
@@ -1700,6 +1697,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
bus_state->port_remote_wakeup |= 1 << faked_port_index;
xhci_test_and_clear_bit(xhci, port_array,
faked_port_index, PORT_PLC);
+ usb_hcd_start_port_resume(&hcd->self, faked_port_index);
xhci_set_link_state(xhci, port_array, faked_port_index,
XDEV_U0);
/* Need to wait until the next link state change
@@ -1737,8 +1735,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
if (slot_id && xhci->devs[slot_id])
xhci_ring_device(xhci, slot_id);
if (bus_state->port_remote_wakeup & (1 << faked_port_index)) {
- bus_state->port_remote_wakeup &=
- ~(1 << faked_port_index);
xhci_test_and_clear_bit(xhci, port_array,
faked_port_index, PORT_PLC);
usb_wakeup_notification(hcd->self.root_hub,
--
2.17.1
commit 057d476fff778f1d3b9f861fdb5437ea1a3cfc99 upstream
Backport for linux-4.19.y stable
A race in xhci USB3 remote wake handling may force device back to suspend
after it initiated resume siganaling, causing a missed resume event or warm
reset of device.
When a USB3 link completes resume signaling and goes to enabled (UO)
state a interrupt is issued and the interrupt handler will clear the
bus_state->port_remote_wakeup resume flag, allowing bus suspend.
If the USB3 roothub thread just finished reading port status before
the interrupt, finding ports still in suspended (U3) state, but hasn't
yet started suspending the hub, then the xhci interrupt handler will clear
the flag that prevented roothub suspend and allow bus to suspend, forcing
all port links back to suspended (U3) state.
Example case:
usb_runtime_suspend() # because all ports still show suspended U3
usb_suspend_both()
hub_suspend(); # successful as hub->wakeup_bits not set yet
==> INTERRUPT
xhci_irq()
handle_port_status()
clear bus_state->port_remote_wakeup
usb_wakeup_notification()
sets hub->wakeup_bits;
kick_hub_wq()
<== END INTERRUPT
hcd_bus_suspend()
xhci_bus_suspend() # success as port_remote_wakeup bits cleared
Fix this by increasing roothub usage count during port resume to prevent
roothub autosuspend, and by making sure bus_state->port_remote_wakeup
flag is only cleared after resume completion is visible, i.e.
after xhci roothub returned U0 or other non-U3 link state link on a
get port status request.
Issue rootcaused by Chiasheng Lee
Cc: <stable(a)vger.kernel.org>
Cc: Lee, Hou-hsun <hou-hsun.lee(a)intel.com>
Reported-by: Lee, Chiasheng <chiasheng.lee(a)intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-hub.c | 8 ++++++++
drivers/usb/host/xhci-ring.c | 3 +--
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 02843c16f9c7..8f180bf7561a 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -868,6 +868,14 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
status |= USB_PORT_STAT_C_BH_RESET << 16;
if ((raw_port_status & PORT_CEC))
status |= USB_PORT_STAT_C_CONFIG_ERROR << 16;
+
+ /* USB3 remote wake resume signaling completed */
+ if (bus_state->port_remote_wakeup & (1 << wIndex) &&
+ (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME &&
+ (raw_port_status & PORT_PLS_MASK) != XDEV_RECOVERY) {
+ bus_state->port_remote_wakeup &= ~(1 << wIndex);
+ usb_hcd_end_port_resume(&hcd->self, wIndex);
+ }
}
if (hcd->speed < HCD_USB3) {
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index b03974958a28..98b67605d3cf 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1609,7 +1609,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
slot_id = xhci_find_slot_id_by_port(hcd, xhci, hcd_portnum + 1);
if (slot_id && xhci->devs[slot_id])
xhci->devs[slot_id]->flags |= VDEV_PORT_ERROR;
- bus_state->port_remote_wakeup &= ~(1 << hcd_portnum);
}
if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) {
@@ -1630,6 +1629,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
bus_state->port_remote_wakeup |= 1 << hcd_portnum;
xhci_test_and_clear_bit(xhci, port, PORT_PLC);
xhci_set_link_state(xhci, port, XDEV_U0);
+ usb_hcd_start_port_resume(&hcd->self, hcd_portnum);
/* Need to wait until the next link state change
* indicates the device is actually in U0.
*/
@@ -1669,7 +1669,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
if (slot_id && xhci->devs[slot_id])
xhci_ring_device(xhci, slot_id);
if (bus_state->port_remote_wakeup & (1 << hcd_portnum)) {
- bus_state->port_remote_wakeup &= ~(1 << hcd_portnum);
xhci_test_and_clear_bit(xhci, port, PORT_PLC);
usb_wakeup_notification(hcd->self.root_hub,
hcd_portnum + 1);
--
2.17.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f26a9e959a7b1588c59f7a919b41b67175b211d8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 9 Dec 2019 02:32:15 +0000
Subject: [PATCH] drm/i915/gt: Detect if we miss WaIdleLiteRestore
In order to avoid confusing the HW, we must never submit an empty ring
during lite-restore, that is we should always advance the RING_TAIL
before submitting to stay ahead of the RING_HEAD.
Normally this is prevented by keeping a couple of spare NOPs in the
request->wa_tail so that on resubmission we can advance the tail. This
relies on the request only being resubmitted once, which is the normal
condition as it is seen once for ELSP[1] and then later in ELSP[0]. On
preemption, the requests are unwound and the tail reset back to the
normal end point (as we know the request is incomplete and therefore its
RING_HEAD is even earlier).
However, if this w/a should fail we would try and resubmit the request
with the RING_TAIL already set to the location of this request's wa_tail
potentially causing a GPU hang. We can spot when we do try and
incorrectly resubmit without advancing the RING_TAIL and spare any
embarrassment by forcing the context restore.
In the case of preempt-to-busy, we leave the requests running on the HW
while we unwind. As the ring is still live, we cannot rewind our
rq->tail without forcing a reload so leave it set to rq->wa_tail and
only force a reload if we resubmit after a lite-restore. (Normally, the
forced reload will be a part of the preemption event.)
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
Closes: https://gitlab.freedesktop.org/drm/intel/issues/673
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: stable(a)kernel.vger.org
Link: https://patchwork.freedesktop.org/patch/msgid/20191209023215.3519970-1-chri…
(cherry picked from commit 82c69bf58650e644c61aa2bf5100b63a1070fd2f)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 17bb52aeff19..75dd0e0367b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,12 +845,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
}
}
-static void unwind_wa_tail(struct i915_request *rq)
-{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
@@ -863,12 +857,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
-
if (i915_request_completed(rq))
continue; /* XXX */
__i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
/*
* Push the request back into the queue for later resubmission.
@@ -1161,13 +1153,29 @@ execlists_schedule_out(struct i915_request *rq)
i915_request_put(rq);
}
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->hw_context;
- u64 desc;
+ u64 desc = ce->lrc_desc;
+ u32 tail;
- ce->lrc_reg_state[CTX_RING_TAIL] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ *
+ * We should never submit the context with the same RING_TAIL twice
+ * just in case we submit an empty ring, which confuses the HW.
+ *
+ * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+ * the normal request to be able to always advance the RING_TAIL on
+ * subsequent resubmissions (for lite restore). Should that fail us,
+ * and we try and submit the same tail again, force the context
+ * reload.
+ */
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1186,13 +1194,11 @@ static u64 execlists_update_context(const struct i915_request *rq)
*/
mb();
- desc = ce->lrc_desc;
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
/* Wa_1607138340:tgl */
if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1703,16 +1709,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;
}
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
}