We have met a deadlock issue on our device which use 5.15.y when resuming.
After applying this patch which is picked from mainline, issue solved.
Backport to 6.6.y also.
Rafael J. Wysocki (1):
PM: sleep: Restore asynchronous device resume optimization
drivers/base/power/main.c | 117 +++++++++++++++++++++-----------------
include/linux/pm.h | 1 +
2 files changed, 65 insertions(+), 53 deletions(-)
--
2.18.0
The Qualcomm serial console implementation is broken and can lose
characters when the serial port is also used for tty output.
Specifically, the console code only waits for the current tx command to
complete when all data has already been written to the fifo. When there
are on-going longer transfers this often means that console output is
lost when the console code inadvertently "hijacks" the current tx
command instead of starting a new one.
This can, for example, be observed during boot when console output that
should have been interspersed with init output is truncated:
[ 9.462317] qcom-snps-eusb2-hsphy fde000.phy: Registered Qcom-eUSB2 phy
[ OK ] Found device KBG50ZNS256G KIOXIA Wi[ 9.471743ndows.
[ 9.539915] xhci-hcd xhci-hcd.0.auto: xHCI Host Controller
Add a new state variable to track how much data has been written to the
fifo and use it to determine when the fifo and shift register are both
empty. This is needed since there is currently no other known way to
determine when the shift register is empty.
This in turn allows the console code to interrupt long transfers without
losing data.
Note that the oops-in-progress case is similarly broken as it does not
cancel any active command and also waits for the wrong status flag when
attempting to drain the fifo (TX_FIFO_NOT_EMPTY_EN is only set when
cancelling a command leaves data in the fifo).
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Fixes: a1fee899e5be ("tty: serial: qcom_geni_serial: Fix softlock")
Fixes: 9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get the port lock")
Cc: stable(a)vger.kernel.org # 4.17
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/tty/serial/qcom_geni_serial.c | 48 ++++++++++++++-------------
1 file changed, 25 insertions(+), 23 deletions(-)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 7029c39a9a21..be620c5703f5 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -131,6 +131,7 @@ struct qcom_geni_serial_port {
bool brk;
unsigned int tx_remaining;
+ unsigned int tx_queued;
int wakeup_irq;
bool rx_tx_swap;
bool cts_rts_swap;
@@ -144,6 +145,8 @@ static const struct uart_ops qcom_geni_uart_pops;
static struct uart_driver qcom_geni_console_driver;
static struct uart_driver qcom_geni_uart_driver;
+static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
+
static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport)
{
return container_of(uport, struct qcom_geni_serial_port, uport);
@@ -308,6 +311,17 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
return qcom_geni_serial_poll_bitfield(uport, offset, field, set ? field : 0);
}
+static void qcom_geni_serial_drain_fifo(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
+ if (!qcom_geni_serial_main_active(uport))
+ return;
+
+ qcom_geni_serial_poll_bitfield(uport, SE_GENI_M_GP_LENGTH, GP_LENGTH,
+ port->tx_queued);
+}
+
static void qcom_geni_serial_setup_tx(struct uart_port *uport, u32 xmit_size)
{
u32 m_cmd;
@@ -476,7 +490,6 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
struct qcom_geni_serial_port *port;
bool locked = true;
unsigned long flags;
- u32 geni_status;
WARN_ON(co->index < 0 || co->index >= GENI_UART_CONS_PORTS);
@@ -490,34 +503,20 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
else
uart_port_lock_irqsave(uport, &flags);
- geni_status = readl(uport->membase + SE_GENI_STATUS);
+ if (qcom_geni_serial_main_active(uport)) {
+ /* Wait for completion or drain FIFO */
+ if (!locked || port->tx_remaining == 0)
+ qcom_geni_serial_poll_tx_done(uport);
+ else
+ qcom_geni_serial_drain_fifo(uport);
- if (!locked) {
- /*
- * We can only get here if an oops is in progress then we were
- * unable to get the lock. This means we can't safely access
- * our state variables like tx_remaining. About the best we
- * can do is wait for the FIFO to be empty before we start our
- * transfer, so we'll do that.
- */
- qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
- M_TX_FIFO_NOT_EMPTY_EN, false);
- } else if ((geni_status & M_GENI_CMD_ACTIVE) && !port->tx_remaining) {
- /*
- * It seems we can't interrupt existing transfers if all data
- * has been sent, in which case we need to look for done first.
- */
- qcom_geni_serial_poll_tx_done(uport);
+ qcom_geni_serial_cancel_tx_cmd(uport);
}
__qcom_geni_serial_console_write(uport, s, count);
-
- if (locked) {
- if (port->tx_remaining)
- qcom_geni_serial_setup_tx(uport, port->tx_remaining);
+ if (locked)
uart_port_unlock_irqrestore(uport, flags);
- }
}
static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
@@ -698,6 +697,7 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
port->tx_remaining = 0;
+ port->tx_queued = 0;
}
static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
@@ -924,6 +924,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
if (!port->tx_remaining) {
qcom_geni_serial_setup_tx(uport, pending);
port->tx_remaining = pending;
+ port->tx_queued = 0;
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
if (!(irq_en & M_TX_FIFO_WATERMARK_EN))
@@ -932,6 +933,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
}
qcom_geni_serial_send_chunk_fifo(uport, chunk);
+ port->tx_queued += chunk;
/*
* The tx fifo watermark is level triggered and latched. Though we had
--
2.44.2
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Fix ordering of pm_runtime_enable
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Mon Jul 29 13:42:03 2024 +0100
pm_runtime_enable() should happen prior to vfe_get() since vfe_get() calls
pm_runtime_resume_and_get().
This is a basic race condition that doesn't show up for most users so is
not widely reported. If you blacklist qcom-camss in modules.d and then
subsequently modprobe the module post-boot it is possible to reliably show
this error up.
The kernel log for this error looks like this:
qcom-camss ac5a000.camss: Failed to power up pipeline: -13
Fixes: 02afa816dbbf ("media: camss: Add basic runtime PM support")
Reported-by: Johan Hovold <johan+linaro(a)kernel.org>
Closes: https://lore.kernel.org/lkml/ZoVNHOTI0PKMNt4_@hovoldconsulting.com/
Tested-by: Johan Hovold <johan+linaro(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Konrad Dybcio <konradybcio(a)kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c
index 51b1d3550421..d64985ca6e88 100644
--- a/drivers/media/platform/qcom/camss/camss.c
+++ b/drivers/media/platform/qcom/camss/camss.c
@@ -2283,6 +2283,8 @@ static int camss_probe(struct platform_device *pdev)
v4l2_async_nf_init(&camss->notifier, &camss->v4l2_dev);
+ pm_runtime_enable(dev);
+
num_subdevs = camss_of_parse_ports(camss);
if (num_subdevs < 0) {
ret = num_subdevs;
@@ -2323,8 +2325,6 @@ static int camss_probe(struct platform_device *pdev)
}
}
- pm_runtime_enable(dev);
-
return 0;
err_register_subdevs:
@@ -2332,6 +2332,7 @@ err_register_subdevs:
err_v4l2_device_unregister:
v4l2_device_unregister(&camss->v4l2_dev);
v4l2_async_nf_cleanup(&camss->notifier);
+ pm_runtime_disable(dev);
err_genpd_cleanup:
camss_genpd_cleanup(camss);
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: qcom: camss: Remove use_count guard in stop_streaming
Author: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Date: Mon Jul 29 13:42:02 2024 +0100
The use_count check was introduced so that multiple concurrent Raw Data
Interfaces RDIs could be driven by different virtual channels VCs on the
CSIPHY input driving the video pipeline.
This is an invalid use of use_count though as use_count pertains to the
number of times a video entity has been opened by user-space not the number
of active streams.
If use_count and stream-on count don't agree then stop_streaming() will
break as is currently the case and has become apparent when using CAMSS
with libcamera's released softisp 0.3.
The use of use_count like this is a bit hacky and right now breaks regular
usage of CAMSS for a single stream case. Stopping qcam results in the splat
below, and then it cannot be started again and any attempts to do so fails
with -EBUSY.
[ 1265.509831] WARNING: CPU: 5 PID: 919 at drivers/media/common/videobuf2/videobuf2-core.c:2183 __vb2_queue_cancel+0x230/0x2c8 [videobuf2_common]
...
[ 1265.510630] Call trace:
[ 1265.510636] __vb2_queue_cancel+0x230/0x2c8 [videobuf2_common]
[ 1265.510648] vb2_core_streamoff+0x24/0xcc [videobuf2_common]
[ 1265.510660] vb2_ioctl_streamoff+0x5c/0xa8 [videobuf2_v4l2]
[ 1265.510673] v4l_streamoff+0x24/0x30 [videodev]
[ 1265.510707] __video_do_ioctl+0x190/0x3f4 [videodev]
[ 1265.510732] video_usercopy+0x304/0x8c4 [videodev]
[ 1265.510757] video_ioctl2+0x18/0x34 [videodev]
[ 1265.510782] v4l2_ioctl+0x40/0x60 [videodev]
...
[ 1265.510944] videobuf2_common: driver bug: stop_streaming operation is leaving buffer 0 in active state
[ 1265.511175] videobuf2_common: driver bug: stop_streaming operation is leaving buffer 1 in active state
[ 1265.511398] videobuf2_common: driver bug: stop_streaming operation is leaving buffer 2 in active st
One CAMSS specific way to handle multiple VCs on the same RDI might be:
- Reference count each pipeline enable for CSIPHY, CSID, VFE and RDIx.
- The video buffers are already associated with msm_vfeN_rdiX so
release video buffers when told to do so by stop_streaming.
- Only release the power-domains for the CSIPHY, CSID and VFE when
their internal refcounts drop.
Either way refusing to release video buffers based on use_count is
erroneous and should be reverted. The silicon enabling code for selecting
VCs is perfectly fine. Its a "known missing feature" that concurrent VCs
won't work with CAMSS right now.
Initial testing with this code didn't show an error but, SoftISP and "real"
usage with Google Hangouts breaks the upstream code pretty quickly, we need
to do a partial revert and take another pass at VCs.
This commit partially reverts commit 89013969e232 ("media: camss: sm8250:
Pipeline starting and stopping for multiple virtual channels")
Fixes: 89013969e232 ("media: camss: sm8250: Pipeline starting and stopping for multiple virtual channels")
Reported-by: Johan Hovold <johan+linaro(a)kernel.org>
Closes: https://lore.kernel.org/lkml/ZoVNHOTI0PKMNt4_@hovoldconsulting.com/
Tested-by: Johan Hovold <johan+linaro(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
drivers/media/platform/qcom/camss/camss-video.c | 6 ------
1 file changed, 6 deletions(-)
---
diff --git a/drivers/media/platform/qcom/camss/camss-video.c b/drivers/media/platform/qcom/camss/camss-video.c
index cd72feca618c..3b8fc31d957c 100644
--- a/drivers/media/platform/qcom/camss/camss-video.c
+++ b/drivers/media/platform/qcom/camss/camss-video.c
@@ -297,12 +297,6 @@ static void video_stop_streaming(struct vb2_queue *q)
ret = v4l2_subdev_call(subdev, video, s_stream, 0);
- if (entity->use_count > 1) {
- /* Don't stop if other instances of the pipeline are still running */
- dev_dbg(video->camss->dev, "Video pipeline still used, don't stop streaming.\n");
- return;
- }
-
if (ret) {
dev_err(video->camss->dev, "Video pipeline stop failed: %d\n", ret);
return;
From: Mathieu Tortuyaux <mtortuyaux(a)microsoft.com>
[ Upstream commit 89add40066f9ed9abe5f7f886fe5789ff7e0c50e ]
Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb
for GSO packets.
The function already checks that a checksum requested with
VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets
this might not hold for segs after segmentation.
Syzkaller demonstrated to reach this warning in skb_checksum_help
offset = skb_checksum_start_offset(skb);
ret = -EINVAL;
if (WARN_ON_ONCE(offset >= skb_headlen(skb)))
By injecting a TSO packet:
WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0
ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774
ip_finish_output_gso net/ipv4/ip_output.c:279 [inline]
__ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301
iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4850 [inline]
netdev_start_xmit include/linux/netdevice.h:4864 [inline]
xmit_one net/core/dev.c:3595 [inline]
dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611
__dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261
packet_snd net/packet/af_packet.c:3073 [inline]
The geometry of the bad input packet at tcp_gso_segment:
[ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0
[ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244
[ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0))
[ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536
ip_summed=3 complete_sw=0 valid=0 level=0)
Mitigate with stricter input validation.
csum_offset: for GSO packets, deduce the correct value from gso_type.
This is already done for USO. Extend it to TSO. Let UFO be:
udp[46]_ufo_fragment ignores these fields and always computes the
checksum in software.
csum_start: finding the real offset requires parsing to the transport
header. Do not add a parser, use existing segmentation parsing. Thanks
to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded.
Again test both TSO and USO. Do not test UFO for the above reason, and
do not test UDP tunnel offload.
GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be
CHECKSUM_NONE since commit 10154db ("udp: Allow GSO transmit
from devices with no checksum offload"), but then still these fields
are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no
need to test for ip_summed == CHECKSUM_PARTIAL first.
This revises an existing fix mentioned in the Fixes tag, which broke
small packets with GSO offload, as detected by kselftests.
Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871
Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org
Fixes: e269d79 ("net: missing check virtio")
Cc: stable(a)vger.kernel.org
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gma…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Mathieu Tortuyaux <mtortuyaux(a)microsoft.com>
---
Hi,
This patch fixes network failures on OpenStack VMs running with Kernel
5.15.165.
In 5.15.165, the commit "net: missing check virtio" is breaking networks
on VMs that uses virtio in some conditions.
I slightly adapted the patch to have it fitting this branch (5.15.y).
Once patched and compiled it has been successfully tested on Flatcar CI
with Kernel 5.15.165.
NOTE: This patch has already been backported on other stable branches
(like 6.6.y)
Thanks,
Mathieu - @tormath1
include/linux/virtio_net.h | 17 ++++++-----------
net/ipv4/tcp_offload.c | 3 +++
net/ipv4/udp_offload.c | 4 ++++
3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 29b19d0a324c..d9410d97158d 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -51,7 +51,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
unsigned int thlen = 0;
unsigned int p_off = 0;
unsigned int ip_proto;
- u64 ret, remainder, gso_size;
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
@@ -88,16 +87,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
- if (hdr->gso_size) {
- gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
- ret = div64_u64_rem(skb->len, gso_size, &remainder);
- if (!(ret && (hdr->gso_size > needed) &&
- ((remainder > needed) || (remainder == 0)))) {
- return -EINVAL;
- }
- skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG;
- }
-
if (!pskb_may_pull(skb, needed))
return -EINVAL;
@@ -163,6 +152,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (gso_size == GSO_BY_FRAGS)
return -EINVAL;
+ if ((gso_size & SKB_GSO_TCPV4) ||
+ (gso_size & SKB_GSO_TCPV6)) {
+ if (skb->csum_offset != offsetof(struct tcphdr, check))
+ return -EINVAL;
+ }
+
/* Too small packets are not really GSO ones. */
if (skb->len - nh_off > gso_size) {
shinfo->gso_size = gso_size;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fc61cd3fea65..76684cbd63a4 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -71,6 +71,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index c61268849948..61773a26fb34 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -279,6 +279,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb)))
+ return ERR_PTR(-EINVAL);
+
skb_pull(gso_skb, sizeof(*uh));
/* clear destructor to avoid skb_segment assigning it to tail */
--
2.44.2
devm_kasprintf() can return a NULL pointer on failure but this returned
value is not checked. Fix this lack and check the returned value.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 0bb4e9187ea4 ("mt76: mt7615: fix hwmon temp sensor mem use-after-free")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/net/wireless/mediatek/mt76/mt7615/init.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index f7722f67db57..0b9ebdcda221 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -56,6 +56,9 @@ int mt7615_thermal_init(struct mt7615_dev *dev)
name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7615_%s",
wiphy_name(wiphy));
+ if (!name)
+ return -ENOMEM;
+
hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, dev,
mt7615_hwmon_groups);
return PTR_ERR_OR_ZERO(hwmon);
--
2.25.1
From: Breno Leitao <leitao(a)debian.org>
[ Upstream commit f8321fa75102246d7415a6af441872f6637c93ab ]
After the commit bdacf3e34945 ("net: Use nested-BH locking for
napi_alloc_cache.") was merged, the following warning began to appear:
WARNING: CPU: 5 PID: 1 at net/core/skbuff.c:1451 napi_skb_cache_put+0x82/0x4b0
__warn+0x12f/0x340
napi_skb_cache_put+0x82/0x4b0
napi_skb_cache_put+0x82/0x4b0
report_bug+0x165/0x370
handle_bug+0x3d/0x80
exc_invalid_op+0x1a/0x50
asm_exc_invalid_op+0x1a/0x20
__free_old_xmit+0x1c8/0x510
napi_skb_cache_put+0x82/0x4b0
__free_old_xmit+0x1c8/0x510
__free_old_xmit+0x1c8/0x510
__pfx___free_old_xmit+0x10/0x10
The issue arises because virtio is assuming it's running in NAPI context
even when it's not, such as in the netpoll case.
To resolve this, modify virtnet_poll_tx() to only set NAPI when budget
is available. Same for virtnet_poll_cleantx(), which always assumed that
it was in a NAPI context.
Fixes: df133f3f9625 ("virtio_net: bulk free tx skbs")
Suggested-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Reviewed-by: Heng Qi <hengqi(a)linux.alibaba.com>
Link: https://patch.msgid.link/20240712115325.54175-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
[Shivani: Modified to apply on v4.19.y-v5.10.y]
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
---
drivers/net/virtio_net.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f7ed99561..99dea89b2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1497,7 +1497,7 @@ static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
return false;
}
-static void virtnet_poll_cleantx(struct receive_queue *rq)
+static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int index = vq2rxq(rq->vq);
@@ -1508,7 +1508,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
return;
if (__netif_tx_trylock(txq)) {
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
__netif_tx_unlock(txq);
}
@@ -1525,7 +1525,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
unsigned int received;
unsigned int xdp_xmit = 0;
- virtnet_poll_cleantx(rq);
+ virtnet_poll_cleantx(rq, budget);
received = virtnet_receive(rq, budget, &xdp_xmit);
@@ -1598,7 +1598,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
opaque = virtqueue_enable_cb_prepare(sq->vq);
--
2.39.4
Streams should flush their TRB cache, re-read TRBs, and start executing
TRBs from the beginning of the new dequeue pointer after a 'Set TR Dequeue
Pointer' command.
Cadence controllers may fail to start from the beginning of the dequeue
TRB as it doesn't clear the Opaque 'RsvdO' field of the stream context
during 'Set TR Dequeue' command. This stream context area is where xHC
stores information about the last partially executed TD when a stream
is stopped. xHC uses this information to resume the transfer where it left
mid TD, when the stream is restarted.
Patch fixes this by clearing out all RsvdO fields before initializing new
Stream transfer using a 'Set TR Dequeue Pointer' command.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
Changelog:
v3:
- changed patch to patch Cadence specific
v2:
- removed restoring of EDTLA field
drivers/usb/cdns3/host.c | 4 +++-
drivers/usb/host/xhci-pci.c | 7 +++++++
drivers/usb/host/xhci-ring.c | 14 ++++++++++++++
drivers/usb/host/xhci.h | 1 +
4 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
index ceca4d839dfd..7ba760ee62e3 100644
--- a/drivers/usb/cdns3/host.c
+++ b/drivers/usb/cdns3/host.c
@@ -62,7 +62,9 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = {
.resume_quirk = xhci_cdns3_resume_quirk,
};
-static const struct xhci_plat_priv xhci_plat_cdnsp_xhci;
+static const struct xhci_plat_priv xhci_plat_cdnsp_xhci = {
+ .quirks = XHCI_CDNS_SCTX_QUIRK,
+};
static int __cdns_host_init(struct cdns *cdns)
{
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index b9ae5c2a2527..9199dbfcea07 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -74,6 +74,9 @@
#define PCI_DEVICE_ID_ASMEDIA_2142_XHCI 0x2142
#define PCI_DEVICE_ID_ASMEDIA_3242_XHCI 0x3242
+#define PCI_DEVICE_ID_CADENCE 0x17CD
+#define PCI_DEVICE_ID_CADENCE_SSP 0x0200
+
static const char hcd_name[] = "xhci_hcd";
static struct hc_driver __read_mostly xhci_pci_hc_driver;
@@ -532,6 +535,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH;
}
+ if (pdev->vendor == PCI_DEVICE_ID_CADENCE &&
+ pdev->device == PCI_DEVICE_ID_CADENCE_SSP)
+ xhci->quirks |= XHCI_CDNS_SCTX_QUIRK;
+
/* xHC spec requires PCI devices to support D3hot and D3cold */
if (xhci->hci_version >= 0x120)
xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1dde53f6eb31..a1ad2658c0c7 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1386,6 +1386,20 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
struct xhci_stream_ctx *ctx =
&ep->stream_info->stream_ctx_array[stream_id];
deq = le64_to_cpu(ctx->stream_ring) & SCTX_DEQ_MASK;
+
+ /*
+ * Cadence xHCI controllers store some endpoint state
+ * information within Rsvd0 fields of Stream Endpoint
+ * context. This field is not cleared during Set TR
+ * Dequeue Pointer command which causes XDMA to skip
+ * over transfer ring and leads to data loss on stream
+ * pipe.
+ * To fix this issue driver must clear Rsvd0 field.
+ */
+ if (xhci->quirks & XHCI_CDNS_SCTX_QUIRK) {
+ ctx->reserved[0] = 0;
+ ctx->reserved[1] = 0;
+ }
} else {
deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
}
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 101e74c9060f..4cbd58eed214 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1907,6 +1907,7 @@ struct xhci_hcd {
#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45)
#define XHCI_ZHAOXIN_HOST BIT_ULL(46)
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
+#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
unsigned int num_active_eps;
unsigned int limit_active_eps;
--
2.43.0
In the off-chance that waiting for the firmware to signal its booted status
timed out in the fast reset path, one must flush the cache lines for the
entire FW VM address space before reloading the regions, otherwise stale
values eventually lead to a scheduler job timeout.
Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block")
Cc: stable(a)vger.kernel.org
Signed-off-by: Adrián Larumbe <adrian.larumbe(a)collabora.com>
Acked-by: Liviu Dudau <liviu.dudau(a)arm.com>
---
drivers/gpu/drm/panthor/panthor_fw.c | 8 +++++++-
drivers/gpu/drm/panthor/panthor_mmu.c | 21 ++++++++++++++++++---
drivers/gpu/drm/panthor/panthor_mmu.h | 1 +
3 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index 857f3f11258a..ef232c0c2049 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -1089,6 +1089,12 @@ int panthor_fw_post_reset(struct panthor_device *ptdev)
panthor_fw_stop(ptdev);
ptdev->fw->fast_reset = false;
drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
+
+ ret = panthor_vm_flush_all(ptdev->fw->vm);
+ if (ret) {
+ drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
+ return ret;
+ }
}
/* Reload all sections, including RO ones. We're not supposed
@@ -1099,7 +1105,7 @@ int panthor_fw_post_reset(struct panthor_device *ptdev)
ret = panthor_fw_start(ptdev);
if (ret) {
- drm_err(&ptdev->base, "FW slow reset failed");
+ drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
return ret;
}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index d47972806d50..bbc12728437f 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -576,6 +576,12 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
if (as_nr < 0)
return 0;
+ /*
+ * If the AS number is greater than zero, then we can be sure
+ * the device is up and running, so we don't need to explicitly
+ * power it up
+ */
+
if (op != AS_COMMAND_UNLOCK)
lock_region(ptdev, as_nr, iova, size);
@@ -874,14 +880,23 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
if (!drm_dev_enter(&ptdev->base, &cookie))
return 0;
- /* Flush the PTs only if we're already awake */
- if (pm_runtime_active(ptdev->base.dev))
- ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
+ ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
drm_dev_exit(cookie);
return ret;
}
+/**
+ * panthor_vm_flush_all() - Flush L2 caches for the entirety of a VM's AS
+ * @vm: VM whose cache to flush
+ *
+ * Return: 0 on success, a negative error code if flush failed.
+ */
+int panthor_vm_flush_all(struct panthor_vm *vm)
+{
+ return panthor_vm_flush_range(vm, vm->base.mm_start, vm->base.mm_range);
+}
+
static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
{
struct panthor_device *ptdev = vm->ptdev;
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h
index f3c1ed19f973..6788771071e3 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.h
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -31,6 +31,7 @@ panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset);
int panthor_vm_active(struct panthor_vm *vm);
void panthor_vm_idle(struct panthor_vm *vm);
int panthor_vm_as(struct panthor_vm *vm);
+int panthor_vm_flush_all(struct panthor_vm *vm);
struct panthor_heap_pool *
panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create);
--
2.46.0
On Mon, 29 Jul 2024 16:32:36 +0200, Greg Kroah-Hartman wrote:
> In the Linux kernel, the following vulnerability has been resolved:
>
> udp: Set SOCK_RCU_FREE earlier in udp_lib_get_port().
>
> [...]
>
> We had the same bug in TCP and fixed it in commit 871019b22d1b ("net:
> set SOCK_RCU_FREE before inserting socket into hashtable").
>
> Let's apply the same fix for UDP.
>
> [...]
>
> The Linux kernel CVE team has assigned CVE-2024-41041 to this issue.
>
>
> Affected and fixed versions
> ===========================
>
> Issue introduced in 4.20 with commit 6acc9b432e67 and fixed in 5.4.280 with commit 7a67c4e47626
> Issue introduced in 4.20 with commit 6acc9b432e67 and fixed in 5.10.222 with commit 9f965684c57c
These versions don't have the TCP fix backported. Please do so.
Thanks,
Siddh