When we fail to refill the receive buffers, we schedule a delayed worker to retry later. However, this worker creates some concurrency issues. For example, when the worker runs concurrently with virtnet_xdp_set, both need to temporarily disable queue's NAPI before enabling again. Without proper synchronization, a deadlock can happen when napi_disable() is called on an already disabled NAPI. That napi_disable() call will be stuck and so will the subsequent napi_enable() call.
To simplify the logic and avoid further problems, we will instead retry refilling in the next NAPI poll.
Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") Reported-by: Paolo Abeni pabeni@redhat.com Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/drv-hw-dbg/results/400961/3-x... Cc: stable@vger.kernel.org Suggested-by: Xuan Zhuo xuanzhuo@linux.alibaba.com Signed-off-by: Bui Quang Minh minhquangbui99@gmail.com --- drivers/net/virtio_net.c | 48 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1bb3aeca66c6..f986abf0c236 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3046,16 +3046,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget, else packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
+ u64_stats_set(&stats.packets, packets); if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { - if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { - spin_lock(&vi->refill_lock); - if (vi->refill_enabled) - schedule_delayed_work(&vi->refill, 0); - spin_unlock(&vi->refill_lock); - } + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) + /* We need to retry refilling in the next NAPI poll so + * we must return budget to make sure the NAPI is + * repolled. + */ + packets = budget; }
- u64_stats_set(&stats.packets, packets); u64_stats_update_begin(&rq->stats.syncp); for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { size_t offset = virtnet_rq_stats_desc[i].offset; @@ -3230,9 +3230,10 @@ static int virtnet_open(struct net_device *dev)
for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) - /* Make sure we have some buffers: if oom use wq. */ - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + /* Pre-fill rq agressively, to make sure we are ready to + * get packets immediately. + */ + try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);
err = virtnet_enable_queue_pair(vi, i); if (err < 0) @@ -3472,16 +3473,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq, bool refill) { - bool running = netif_running(vi->dev); - bool schedule_refill = false; + if (netif_running(vi->dev)) { + /* Pre-fill rq agressively, to make sure we are ready to get + * packets immediately. + */ + if (refill) + try_fill_recv(vi, rq, GFP_KERNEL);
- if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_refill = true; - if (running) virtnet_napi_enable(rq); - - if (schedule_refill) - schedule_delayed_work(&vi->refill, 0); + } }
static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3829,11 +3829,13 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) } succ: vi->curr_queue_pairs = queue_pairs; - /* virtnet_open() will refill when device is going to up. */ - spin_lock_bh(&vi->refill_lock); - if (dev->flags & IFF_UP && vi->refill_enabled) - schedule_delayed_work(&vi->refill, 0); - spin_unlock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP) { + local_bh_disable(); + for (int i = 0; i < vi->curr_queue_pairs; ++i) + virtqueue_napi_schedule(&vi->rq[i].napi, vi->rq[i].vq); + + local_bh_enable(); + }
return 0; }
On Tue, Jan 06, 2026 at 10:04:36PM +0700, Bui Quang Minh wrote:
When we fail to refill the receive buffers, we schedule a delayed worker to retry later. However, this worker creates some concurrency issues. For example, when the worker runs concurrently with virtnet_xdp_set, both need to temporarily disable queue's NAPI before enabling again. Without proper synchronization, a deadlock can happen when napi_disable() is called on an already disabled NAPI. That napi_disable() call will be stuck and so will the subsequent napi_enable() call.
To simplify the logic and avoid further problems, we will instead retry refilling in the next NAPI poll.
Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") Reported-by: Paolo Abeni pabeni@redhat.com Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/drv-hw-dbg/results/400961/3-x... Cc: stable@vger.kernel.org Suggested-by: Xuan Zhuo xuanzhuo@linux.alibaba.com Signed-off-by: Bui Quang Minh minhquangbui99@gmail.com
Acked-by: Michael S. Tsirkin mst@redhat.com
and CC stable I think. Can you do that pls?
drivers/net/virtio_net.c | 48 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1bb3aeca66c6..f986abf0c236 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3046,16 +3046,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget, else packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
- u64_stats_set(&stats.packets, packets); if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {spin_lock(&vi->refill_lock);if (vi->refill_enabled)schedule_delayed_work(&vi->refill, 0);spin_unlock(&vi->refill_lock);}
if (!try_fill_recv(vi, rq, GFP_ATOMIC))/* We need to retry refilling in the next NAPI poll so* we must return budget to make sure the NAPI is* repolled.*/ }packets = budget;
- u64_stats_set(&stats.packets, packets); u64_stats_update_begin(&rq->stats.syncp); for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { size_t offset = virtnet_rq_stats_desc[i].offset;
@@ -3230,9 +3230,10 @@ static int virtnet_open(struct net_device *dev) for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))schedule_delayed_work(&vi->refill, 0);
/* Pre-fill rq agressively, to make sure we are ready to* get packets immediately.*/try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);err = virtnet_enable_queue_pair(vi, i); if (err < 0) @@ -3472,16 +3473,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq, bool refill) {
- bool running = netif_running(vi->dev);
- bool schedule_refill = false;
- if (netif_running(vi->dev)) {
/* Pre-fill rq agressively, to make sure we are ready to get* packets immediately.*/if (refill)try_fill_recv(vi, rq, GFP_KERNEL);
- if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
schedule_refill = true;- if (running) virtnet_napi_enable(rq);
- if (schedule_refill)
schedule_delayed_work(&vi->refill, 0);
- }
} static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3829,11 +3829,13 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) } succ: vi->curr_queue_pairs = queue_pairs;
- /* virtnet_open() will refill when device is going to up. */
- spin_lock_bh(&vi->refill_lock);
- if (dev->flags & IFF_UP && vi->refill_enabled)
schedule_delayed_work(&vi->refill, 0);- spin_unlock_bh(&vi->refill_lock);
- if (dev->flags & IFF_UP) {
local_bh_disable();for (int i = 0; i < vi->curr_queue_pairs; ++i)virtqueue_napi_schedule(&vi->rq[i].napi, vi->rq[i].vq);local_bh_enable();- }
return 0; } -- 2.43.0
On 1/6/26 22:29, Michael S. Tsirkin wrote:
On Tue, Jan 06, 2026 at 10:04:36PM +0700, Bui Quang Minh wrote:
When we fail to refill the receive buffers, we schedule a delayed worker to retry later. However, this worker creates some concurrency issues. For example, when the worker runs concurrently with virtnet_xdp_set, both need to temporarily disable queue's NAPI before enabling again. Without proper synchronization, a deadlock can happen when napi_disable() is called on an already disabled NAPI. That napi_disable() call will be stuck and so will the subsequent napi_enable() call.
To simplify the logic and avoid further problems, we will instead retry refilling in the next NAPI poll.
Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") Reported-by: Paolo Abeni pabeni@redhat.com Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/drv-hw-dbg/results/400961/3-x... Cc: stable@vger.kernel.org Suggested-by: Xuan Zhuo xuanzhuo@linux.alibaba.com Signed-off-by: Bui Quang Minh minhquangbui99@gmail.com
Acked-by: Michael S. Tsirkin mst@redhat.com
and CC stable I think. Can you do that pls?
I've added Cc stable already.
Thanks for you review.
drivers/net/virtio_net.c | 48 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1bb3aeca66c6..f986abf0c236 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3046,16 +3046,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget, else packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
- u64_stats_set(&stats.packets, packets); if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {spin_lock(&vi->refill_lock);if (vi->refill_enabled)schedule_delayed_work(&vi->refill, 0);spin_unlock(&vi->refill_lock);}
if (!try_fill_recv(vi, rq, GFP_ATOMIC))/* We need to retry refilling in the next NAPI poll so* we must return budget to make sure the NAPI is* repolled.*/ }packets = budget;
- u64_stats_set(&stats.packets, packets); u64_stats_update_begin(&rq->stats.syncp); for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { size_t offset = virtnet_rq_stats_desc[i].offset;
@@ -3230,9 +3230,10 @@ static int virtnet_open(struct net_device *dev) for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))schedule_delayed_work(&vi->refill, 0);
/* Pre-fill rq agressively, to make sure we are ready to* get packets immediately.*/try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);err = virtnet_enable_queue_pair(vi, i); if (err < 0) @@ -3472,16 +3473,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq, bool refill) {
- bool running = netif_running(vi->dev);
- bool schedule_refill = false;
- if (netif_running(vi->dev)) {
/* Pre-fill rq agressively, to make sure we are ready to get* packets immediately.*/if (refill)try_fill_recv(vi, rq, GFP_KERNEL);
- if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
schedule_refill = true;- if (running) virtnet_napi_enable(rq);
- if (schedule_refill)
schedule_delayed_work(&vi->refill, 0);
- } }
static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3829,11 +3829,13 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) } succ: vi->curr_queue_pairs = queue_pairs;
- /* virtnet_open() will refill when device is going to up. */
- spin_lock_bh(&vi->refill_lock);
- if (dev->flags & IFF_UP && vi->refill_enabled)
schedule_delayed_work(&vi->refill, 0);- spin_unlock_bh(&vi->refill_lock);
- if (dev->flags & IFF_UP) {
local_bh_disable();for (int i = 0; i < vi->curr_queue_pairs; ++i)virtqueue_napi_schedule(&vi->rq[i].napi, vi->rq[i].vq);local_bh_enable();- }
return 0; } -- 2.43.0
linux-stable-mirror@lists.linaro.org