Since commit f8f80be501aa ("xhci: Use soft retry to recover faster from transaction errors"), unplugging USB device while enumeration results in errors like this:
[ 364.855321] xhci_hcd 0000:0b:00.0: ERROR Transfer event for disabled endpoint slot 5 ep 2 [ 364.864622] xhci_hcd 0000:0b:00.0: @0000002167656d70 67f03000 00000021 0c000000 05038001 [ 374.934793] xhci_hcd 0000:0b:00.0: Abort failed to stop command ring: -110 [ 374.958793] xhci_hcd 0000:0b:00.0: xHCI host controller not responding, assume dead [ 374.967590] xhci_hcd 0000:0b:00.0: HC died; cleaning up [ 374.973984] xhci_hcd 0000:0b:00.0: Timeout while waiting for configure endpoint command
Seems that Etorn xHCI host can not perform Soft Retry correctly, apply XHCI_NO_SOFT_RETRY quirk to disable Soft Retry and then issue is gone.
This patch depends on commit a4a251f8c235 ("usb: xhci: do not perform Soft Retry for some xHCI hosts").
Fixes: f8f80be501aa ("xhci: Use soft retry to recover faster from transaction errors") Cc: stable@vger.kernel.org Signed-off-by: Kuangyi Chiang ki.chiang65@gmail.com --- drivers/usb/host/xhci-pci.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index dda873f3fee7..19f120ed8dd3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -399,6 +399,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_BROKEN_STREAMS; xhci->quirks |= XHCI_NO_RESET_DEVICE; xhci->quirks |= XHCI_NO_BREAK_CTRL_TD; + xhci->quirks |= XHCI_NO_SOFT_RETRY; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ188) { @@ -406,6 +407,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_BROKEN_STREAMS; xhci->quirks |= XHCI_NO_RESET_DEVICE; xhci->quirks |= XHCI_NO_BREAK_CTRL_TD; + xhci->quirks |= XHCI_NO_SOFT_RETRY; }
if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
Performing a stability stress test on a USB3.0 2.5G ethernet adapter results in errors like this:
[ 91.441469] r8152 2-3:1.0 eth3: get_registers -71 [ 91.458659] r8152 2-3:1.0 eth3: get_registers -71 [ 91.475911] r8152 2-3:1.0 eth3: get_registers -71 [ 91.493203] r8152 2-3:1.0 eth3: get_registers -71 [ 91.510421] r8152 2-3:1.0 eth3: get_registers -71
The r8152 driver will periodically issue lots of control-IN requests to access the status of ethernet adapter hardware registers during the test.
This happens when the xHCI driver enqueue a control TD (which cross over the Link TRB between two ring segments, as shown) in the endpoint zero's transfer ring. Seems the Etron xHCI host can not perform this TD correctly, causing the USB transfer error occurred, maybe the upper driver retry that control-IN request can solve problem, but not all drivers do this.
| | ------- | TRB | Setup Stage ------- | TRB | Link ------- ------- | TRB | Data Stage ------- | TRB | Status Stage ------- | |
To work around this, the xHCI driver should enqueue a No Op TRB if next available TRB is the Link TRB in the ring segment, this can prevent the Setup and Data Stage TRB to be breaked by the Link TRB.
Add a new quirk flag XHCI_NO_BREAK_CTRL_TD to invoke the workaround in xhci_queue_ctrl_tx().
Both EJ168 and EJ188 have the same problem, applying this patch then the problem is gone.
Fixes: d0e96f5a71a0 ("USB: xhci: Control transfer support.") Cc: stable@vger.kernel.org Signed-off-by: Kuangyi Chiang ki.chiang65@gmail.com --- drivers/usb/host/xhci-pci.c | 2 ++ drivers/usb/host/xhci-ring.c | 13 +++++++++++++ drivers/usb/host/xhci.h | 1 + 3 files changed, 16 insertions(+)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 2fa7f32c2bf9..dda873f3fee7 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -398,12 +398,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_BROKEN_STREAMS; xhci->quirks |= XHCI_NO_RESET_DEVICE; + xhci->quirks |= XHCI_NO_BREAK_CTRL_TD; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ188) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_BROKEN_STREAMS; xhci->quirks |= XHCI_NO_RESET_DEVICE; + xhci->quirks |= XHCI_NO_BREAK_CTRL_TD; }
if (pdev->vendor == PCI_VENDOR_ID_RENESAS && diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4ea2c3e072a9..1c387d4dc152 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3727,6 +3727,19 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (!urb->setup_packet) return -EINVAL;
+ if (xhci->quirks & XHCI_NO_BREAK_CTRL_TD) { + /* + * If next available TRB is the Link TRB in the ring segment then + * enqueue a No Op TRB, this can prevent the Setup and Data Stage + * TRB to be breaked by the Link TRB. + */ + if (trb_is_link(ep_ring->enqueue + 1)) { + field = TRB_TYPE(TRB_TR_NOOP) | ep_ring->cycle_state; + queue_trb(xhci, ep_ring, false, 0, 0, + TRB_INTR_TARGET(0), field); + } + } + /* 1 TRB for setup, 1 for status */ num_trbs = 2; /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1272d725270a..aedbe8fee8be 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1629,6 +1629,7 @@ struct xhci_hcd { #define XHCI_ZHAOXIN_HOST BIT_ULL(46) #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_NO_RESET_DEVICE BIT_ULL(48) +#define XHCI_NO_BREAK_CTRL_TD BIT_ULL(49)
unsigned int num_active_eps; unsigned int limit_active_eps;
Hi,
This happens when the xHCI driver enqueue a control TD (which cross over the Link TRB between two ring segments, as shown) in the endpoint zero's transfer ring. Seems the Etron xHCI host can not perform this TD correctly, causing the USB transfer error occurred, maybe the upper driver retry that control-IN request can solve problem, but not all drivers do this.
| |
| TRB | Setup Stage
| TRB | Link
| TRB | Data Stage
| TRB | Status Stage
| |
I wonder about a few things.
1. What are the exact symptoms, besides Ethernet driver errors? Any errors from xhci_hcd? What if dynamic debug is enabled?
2. How did you determine that this is the exact cause?
3. Does it happen every time when a Link follows Setup, or only randomly and it takes lots of control transfers to trigger it?
4. How is it even possible? As far as I see, Linux simply queues three TRBs for a control URB. There are 255 slots in a segemnt, so exactly 85 URBs should fit, and then back to the first slot.
Regards, Michal
- How is it even possible? As far as I see, Linux simply queues
three TRBs for a control URB. There are 255 slots in a segemnt, so exactly 85 URBs should fit, and then back to the first slot.
Not all control transfers have a Data stage TRB.
-Mathias
Hi,
Thank you for the review.
Michał Pecio michal.pecio@gmail.com 於 2024年9月11日 週三 下午3:52寫道:
Hi,
This happens when the xHCI driver enqueue a control TD (which cross over the Link TRB between two ring segments, as shown) in the endpoint zero's transfer ring. Seems the Etron xHCI host can not perform this TD correctly, causing the USB transfer error occurred, maybe the upper driver retry that control-IN request can solve problem, but not all drivers do this.
| |
| TRB | Setup Stage
| TRB | Link
| TRB | Data Stage
| TRB | Status Stage
| |
I wonder about a few things.
- What are the exact symptoms, besides Ethernet driver errors?
Any errors from xhci_hcd? What if dynamic debug is enabled?
The xhci driver receives a transfer event TRB (completion code is "USB Transaction Error") when the issue is triggered.
- How did you determine that this is the exact cause?
The issue is triggered every time when a Link TRB follows a Setup Stage TRB.
- Does it happen every time when a Link follows Setup, or only
randomly and it takes lots of control transfers to trigger it?
Yes, it happens every time.
- How is it even possible? As far as I see, Linux simply queues
three TRBs for a control URB. There are 255 slots in a segemnt, so exactly 85 URBs should fit, and then back to the first slot.
The xhci driver also queues no data control transfers.
Regards, Michal
Thanks, Kuangyi Chiang
On 11.9.2024 8.17, Kuangyi Chiang wrote:
Performing a stability stress test on a USB3.0 2.5G ethernet adapter results in errors like this:
[ 91.441469] r8152 2-3:1.0 eth3: get_registers -71 [ 91.458659] r8152 2-3:1.0 eth3: get_registers -71 [ 91.475911] r8152 2-3:1.0 eth3: get_registers -71 [ 91.493203] r8152 2-3:1.0 eth3: get_registers -71 [ 91.510421] r8152 2-3:1.0 eth3: get_registers -71
The r8152 driver will periodically issue lots of control-IN requests to access the status of ethernet adapter hardware registers during the test.
This happens when the xHCI driver enqueue a control TD (which cross over the Link TRB between two ring segments, as shown) in the endpoint zero's transfer ring. Seems the Etron xHCI host can not perform this TD correctly, causing the USB transfer error occurred, maybe the upper driver retry that control-IN request can solve problem, but not all drivers do this.
| |
| TRB | Setup Stage
| TRB | Link
| TRB | Data Stage
| TRB | Status Stage
| |
What if the link TRB is between Data and Status stage, does that case work normally?
To work around this, the xHCI driver should enqueue a No Op TRB if next available TRB is the Link TRB in the ring segment, this can prevent the Setup and Data Stage TRB to be breaked by the Link TRB.
There are some hosts that need the 'Chain' bit set in the Link TRB, does that work in this case?
Thanks Mathias
Hi,
Thank you for the review.
Mathias Nyman mathias.nyman@linux.intel.com 於 2024年9月11日 週三 下午11:05寫道:
On 11.9.2024 8.17, Kuangyi Chiang wrote:
Performing a stability stress test on a USB3.0 2.5G ethernet adapter results in errors like this:
[ 91.441469] r8152 2-3:1.0 eth3: get_registers -71 [ 91.458659] r8152 2-3:1.0 eth3: get_registers -71 [ 91.475911] r8152 2-3:1.0 eth3: get_registers -71 [ 91.493203] r8152 2-3:1.0 eth3: get_registers -71 [ 91.510421] r8152 2-3:1.0 eth3: get_registers -71
The r8152 driver will periodically issue lots of control-IN requests to access the status of ethernet adapter hardware registers during the test.
This happens when the xHCI driver enqueue a control TD (which cross over the Link TRB between two ring segments, as shown) in the endpoint zero's transfer ring. Seems the Etron xHCI host can not perform this TD correctly, causing the USB transfer error occurred, maybe the upper driver retry that control-IN request can solve problem, but not all drivers do this.
| |
| TRB | Setup Stage
| TRB | Link
| TRB | Data Stage
| TRB | Status Stage
| |
What if the link TRB is between Data and Status stage, does that case work normally?
I am not sure, I don't encounter this case, maybe OK.
To work around this, the xHCI driver should enqueue a No Op TRB if next available TRB is the Link TRB in the ring segment, this can prevent the Setup and Data Stage TRB to be breaked by the Link TRB.
There are some hosts that need the 'Chain' bit set in the Link TRB, does that work in this case?
No, it doesn't work. It seems to be a hardware issue.
Thanks Mathias
Thanks, Kuangyi Chiang
Sometimes the hub driver does not recognize the USB device connected to the external USB2.0 hub when the system resumes from S4.
After the SetPortFeature(PORT_RESET) request is completed, the hub driver calls the HCD reset_device callback, which will issue a Reset Device command and free all structures associated with endpoints that were disabled.
This happens when the xHCI driver issue a Reset Device command to inform the Etron xHCI host that the USB device associated with a device slot has been reset. Seems that the Etron xHCI host can not perform this command correctly, affecting the USB device.
To work around this, the xHCI driver should obtain a new device slot with reference to commit 651aaf36a7d7 ("usb: xhci: Handle USB transaction error on address command"), which is another way to inform the Etron xHCI host that the USB device has been reset.
Add a new quirk flag XHCI_NO_RESET_DEVICE to invoke the workaround in xhci_discover_or_reset_device().
Both EJ168 and EJ188 have the same problem, applying this patch then the problem is gone.
Fixes: 2a8f82c4ceaf ("USB: xhci: Notify the xHC when a device is reset.") Cc: stable@vger.kernel.org Signed-off-by: Kuangyi Chiang ki.chiang65@gmail.com --- drivers/usb/host/xhci-pci.c | 2 ++ drivers/usb/host/xhci.c | 19 +++++++++++++++++++ drivers/usb/host/xhci.h | 1 + 3 files changed, 22 insertions(+)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index dc1e345ab67e..2fa7f32c2bf9 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -397,11 +397,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_EJ168) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_BROKEN_STREAMS; + xhci->quirks |= XHCI_NO_RESET_DEVICE; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ188) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_BROKEN_STREAMS; + xhci->quirks |= XHCI_NO_RESET_DEVICE; }
if (pdev->vendor == PCI_VENDOR_ID_RENESAS && diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index efdf4c228b8c..d890a97e0682 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3692,6 +3692,8 @@ void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci, xhci->num_active_eps); }
+static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev); + /* * This submits a Reset Device Command, which will set the device state to 0, * set the device address to 0, and disable all the endpoints except the default @@ -3762,6 +3764,23 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd, SLOT_STATE_DISABLED) return 0;
+ if (xhci->quirks & XHCI_NO_RESET_DEVICE) { + /* + * Obtaining a new device slot to inform the xHCI host that + * the USB device has been reset. + */ + ret = xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); + if (!ret) { + ret = xhci_alloc_dev(hcd, udev); + if (ret == 1) + ret = 0; + else + ret = -EINVAL; + } + return ret; + } + trace_xhci_discover_or_reset_device(slot_ctx);
xhci_dbg(xhci, "Resetting device with slot ID %u\n", slot_id); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index ebd0afd59a60..1272d725270a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1628,6 +1628,7 @@ struct xhci_hcd { #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) #define XHCI_WRITE_64_HI_LO BIT_ULL(47) +#define XHCI_NO_RESET_DEVICE BIT_ULL(48)
unsigned int num_active_eps; unsigned int limit_active_eps;
linux-stable-mirror@lists.linaro.org