Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic.
? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80
Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Reviewed-by: Haiyang Zhang haiyangz@microsoft.com Reviewed-by: Shradha Gupta shradhagupta@linux.microsoft.com --- V3 -> V2: Instead of using napi internal attribute, using an atomic attribute to verify napi is initialized for a particular txq / rxq.
V2 -> V1: Addressed the comment on cleaning up napi for the queues, where queue creation was successful. --- drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++------- include/net/mana/mana.h | 4 +++ 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 39f56973746d..bd303c89cfa6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc)
for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi; - napi_synchronize(napi); - napi_disable(napi); - netif_napi_del(napi); - + if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) { + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0); + } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; + atomic_set(&txq->napi_initialized, 0); skb_queue_head_init(&txq->pending_skbs);
memset(&spec, 0, sizeof(spec)); @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context *apc,
netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); + atomic_set(&txq->napi_initialized, 1);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
napi = &rxq->rx_cq.napi;
- if (validate_state) - napi_synchronize(napi); + if (atomic_read(&rxq->napi_initialized)) {
- napi_disable(napi); + if (validate_state) + napi_synchronize(napi);
- xdp_rxq_info_unreg(&rxq->xdp_rxq); + napi_disable(napi);
- netif_napi_del(napi); + netif_napi_del(napi); + atomic_set(&rxq->napi_initialized, 0); + } + + xdp_rxq_info_unreg(&rxq->xdp_rxq);
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
@@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; + atomic_set(&rxq->napi_initialized, 0);
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, &rxq->headroom); @@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
napi_enable(&cq->napi);
+ atomic_set(&rxq->napi_initialized, 1); + mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); out: if (!err) @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct mana_port_context *apc) rxq = apc->rxqs[rxq_idx]; if (!rxq) continue; - mana_destroy_rxq(apc, rxq, true); apc->rxqs[rxq_idx] = NULL; } diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 7caa334f4888..be75abd63dc8 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -98,6 +98,8 @@ struct mana_txq {
atomic_t pending_sends;
+ atomic_t napi_initialized; + struct mana_stats_tx stats; };
@@ -335,6 +337,8 @@ struct mana_rxq { bool xdp_flush; int xdp_rc; /* XDP redirect return code */
+ atomic_t napi_initialized; + struct page_pool *page_pool;
/* MUST BE THE LAST MEMBER:
-----Original Message----- From: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Sent: Thursday, August 29, 2024 4:37 AM To: KY Srinivasan kys@microsoft.com; Haiyang Zhang haiyangz@microsoft.com; wei.liu@kernel.org; Dexuan Cui decui@microsoft.com; davem@davemloft.net; Long Li longli@microsoft.com; ssengar@linux.microsoft.com; linux- hyperv@vger.kernel.org; netdev@vger.kernel.org; linux- kernel@vger.kernel.org; linux-rdma@vger.kernel.org Cc: Souradeep Chakrabarti schakrabarti@microsoft.com; Souradeep Chakrabarti schakrabarti@linux.microsoft.com; stable@vger.kernel.org Subject: [PATCH V3 net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup
Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic.
? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80
Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Reviewed-by: Haiyang Zhang haiyangz@microsoft.com Reviewed-by: Shradha Gupta shradhagupta@linux.microsoft.com
V3 -> V2: Instead of using napi internal attribute, using an atomic attribute to verify napi is initialized for a particular txq / rxq.
V2 -> V1: Addressed the comment on cleaning up napi for the queues, where queue creation was successful.
drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++------- include/net/mana/mana.h | 4 +++ 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 39f56973746d..bd303c89cfa6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc)
for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi;
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
}
mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset;
atomic_set(&txq->napi_initialized, 0);
skb_queue_head_init(&txq->pending_skbs);
memset(&spec, 0, sizeof(spec));
@@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context *apc,
netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi);
atomic_set(&txq->napi_initialized, 1);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); }
@@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
napi = &rxq->rx_cq.napi;
- if (validate_state)
napi_synchronize(napi);
- if (atomic_read(&rxq->napi_initialized)) {
- napi_disable(napi);
if (validate_state)
napi_synchronize(napi);
- xdp_rxq_info_unreg(&rxq->xdp_rxq);
napi_disable(napi);
- netif_napi_del(napi);
netif_napi_del(napi);
atomic_set(&rxq->napi_initialized, 0);
}
xdp_rxq_info_unreg(&rxq->xdp_rxq);
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
@@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE;
atomic_set(&rxq->napi_initialized, 0);
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, &rxq->headroom);
@@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
napi_enable(&cq->napi);
- atomic_set(&rxq->napi_initialized, 1);
- mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
out: if (!err) @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct mana_port_context *apc) rxq = apc->rxqs[rxq_idx]; if (!rxq) continue;
- mana_destroy_rxq(apc, rxq, true); apc->rxqs[rxq_idx] = NULL; }
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 7caa334f4888..be75abd63dc8 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -98,6 +98,8 @@ struct mana_txq {
atomic_t pending_sends;
- atomic_t napi_initialized;
- struct mana_stats_tx stats;
};
@@ -335,6 +337,8 @@ struct mana_rxq { bool xdp_flush; int xdp_rc; /* XDP redirect return code */
atomic_t napi_initialized;
struct page_pool *page_pool;
/* MUST BE THE LAST MEMBER:
-- 2.34.1
I agree with the idea that adds napi_initialized for the state.
But the Rx/Tx queue creation/removal are either on the same thread, or protected by rtnl_lock, correct? So napi_initialized should be a regular variable, not atomic_t.
Thanks, - Haiyang
-----Original Message----- From: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Sent: Thursday, August 29, 2024 4:37 AM To: KY Srinivasan kys@microsoft.com; Haiyang Zhang haiyangz@microsoft.com; wei.liu@kernel.org; Dexuan Cui decui@microsoft.com; davem@davemloft.net; Long Li longli@microsoft.com; ssengar@linux.microsoft.com; linux- hyperv@vger.kernel.org; netdev@vger.kernel.org; linux- kernel@vger.kernel.org; linux-rdma@vger.kernel.org Cc: Souradeep Chakrabarti schakrabarti@microsoft.com; Souradeep Chakrabarti schakrabarti@linux.microsoft.com; stable@vger.kernel.org Subject: [PATCH V3 net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup
Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic.
? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80
Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Reviewed-by: Haiyang Zhang haiyangz@microsoft.com Reviewed-by: Shradha Gupta shradhagupta@linux.microsoft.com
V3 -> V2: Instead of using napi internal attribute, using an atomic attribute to verify napi is initialized for a particular txq / rxq.
V2 -> V1: Addressed the comment on cleaning up napi for the queues, where queue creation was successful.
drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++------- include/net/mana/mana.h | 4 +++ 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 39f56973746d..bd303c89cfa6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc)
for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi;
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
}
mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset;
atomic_set(&txq->napi_initialized, 0);
skb_queue_head_init(&txq->pending_skbs);
memset(&spec, 0, sizeof(spec));
@@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context *apc,
netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi);
atomic_set(&txq->napi_initialized, 1);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); }
@@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
napi = &rxq->rx_cq.napi;
- if (validate_state)
napi_synchronize(napi);
- if (atomic_read(&rxq->napi_initialized)) {
- napi_disable(napi);
if (validate_state)
napi_synchronize(napi);
- xdp_rxq_info_unreg(&rxq->xdp_rxq);
napi_disable(napi);
- netif_napi_del(napi);
netif_napi_del(napi);
atomic_set(&rxq->napi_initialized, 0);
}
xdp_rxq_info_unreg(&rxq->xdp_rxq);
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
@@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE;
atomic_set(&rxq->napi_initialized, 0);
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, &rxq->headroom);
@@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
napi_enable(&cq->napi);
- atomic_set(&rxq->napi_initialized, 1);
- mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
out: if (!err) @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct mana_port_context *apc) rxq = apc->rxqs[rxq_idx]; if (!rxq) continue;
- mana_destroy_rxq(apc, rxq, true);
Also, this line removal is not necessary.
On Thu, Aug 29, 2024 at 01:36:50AM -0700, Souradeep Chakrabarti wrote:
Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic.
? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80
Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Reviewed-by: Haiyang Zhang haiyangz@microsoft.com Reviewed-by: Shradha Gupta shradhagupta@linux.microsoft.com
V3 -> V2: Instead of using napi internal attribute, using an atomic attribute to verify napi is initialized for a particular txq / rxq.
V2 -> V1: Addressed the comment on cleaning up napi for the queues, where queue creation was successful.
drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++------- include/net/mana/mana.h | 4 +++ 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 39f56973746d..bd303c89cfa6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc) for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi;
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);}
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset;
skb_queue_head_init(&txq->pending_skbs);atomic_set(&txq->napi_initialized, 0);
memset(&spec, 0, sizeof(spec)); @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context *apc, netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi);
atomic_set(&txq->napi_initialized, 1);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi = &rxq->rx_cq.napi;
- if (validate_state)
napi_synchronize(napi);
- if (atomic_read(&rxq->napi_initialized)) {
- napi_disable(napi);
if (validate_state)
napi_synchronize(napi);
Is this validate_state flag still needed? The new napi_initialized variable will make sure the napi_synchronize() is called only for rxqs that have napi_enabled.
Regards, Shradha.
- xdp_rxq_info_unreg(&rxq->xdp_rxq);
napi_disable(napi);
- netif_napi_del(napi);
netif_napi_del(napi);
atomic_set(&rxq->napi_initialized, 0);
- }
- xdp_rxq_info_unreg(&rxq->xdp_rxq);
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); @@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE;
- atomic_set(&rxq->napi_initialized, 0);
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, &rxq->headroom); @@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, napi_enable(&cq->napi);
- atomic_set(&rxq->napi_initialized, 1);
- mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
out: if (!err) @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct mana_port_context *apc) rxq = apc->rxqs[rxq_idx]; if (!rxq) continue;
- mana_destroy_rxq(apc, rxq, true); apc->rxqs[rxq_idx] = NULL; }
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 7caa334f4888..be75abd63dc8 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -98,6 +98,8 @@ struct mana_txq { atomic_t pending_sends;
- atomic_t napi_initialized;
- struct mana_stats_tx stats;
}; @@ -335,6 +337,8 @@ struct mana_rxq { bool xdp_flush; int xdp_rc; /* XDP redirect return code */
- atomic_t napi_initialized;
- struct page_pool *page_pool;
/* MUST BE THE LAST MEMBER: -- 2.34.1
-----Original Message----- From: Shradha Gupta shradhagupta@linux.microsoft.com Sent: Friday, August 30, 2024 2:14 AM To: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Cc: KY Srinivasan kys@microsoft.com; Haiyang Zhang haiyangz@microsoft.com; wei.liu@kernel.org; Dexuan Cui decui@microsoft.com; davem@davemloft.net; Long Li longli@microsoft.com; ssengar@linux.microsoft.com; linux- hyperv@vger.kernel.org; netdev@vger.kernel.org; linux- kernel@vger.kernel.org; linux-rdma@vger.kernel.org; Souradeep Chakrabarti schakrabarti@microsoft.com; stable@vger.kernel.org Subject: Re: [PATCH V3 net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup
On Thu, Aug 29, 2024 at 01:36:50AM -0700, Souradeep Chakrabarti wrote:
Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic.
? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80
Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti schakrabarti@linux.microsoft.com Reviewed-by: Haiyang Zhang haiyangz@microsoft.com Reviewed-by: Shradha Gupta shradhagupta@linux.microsoft.com
V3 -> V2: Instead of using napi internal attribute, using an atomic attribute to verify napi is initialized for a particular txq / rxq.
V2 -> V1: Addressed the comment on cleaning up napi for the queues, where queue creation was successful.
drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++------- include/net/mana/mana.h | 4 +++ 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 39f56973746d..bd303c89cfa6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct
mana_port_context *apc)
for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi;
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
}
mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1931,6 +1933,7 @@ static int mana_create_txq(struct
mana_port_context *apc,
txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset;
atomic_set(&txq->napi_initialized, 0);
skb_queue_head_init(&txq->pending_skbs);
memset(&spec, 0, sizeof(spec));
@@ -1997,6 +2000,7 @@ static int mana_create_txq(struct
mana_port_context *apc,
netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi);
atomic_set(&txq->napi_initialized, 1);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); }
@@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct
mana_port_context *apc,
napi = &rxq->rx_cq.napi;
- if (validate_state)
napi_synchronize(napi);
- if (atomic_read(&rxq->napi_initialized)) {
- napi_disable(napi);
if (validate_state)
napi_synchronize(napi);
Is this validate_state flag still needed? The new napi_initialized variable will make sure the napi_synchronize() is called only for rxqs that have napi_enabled.
Yes, the validate_state flag for mana_destroy_rxq() can be combined/renamed.
@Souradeep Chakrabarti, As Jakub suggested in V2, you may: Rename the parameter validate_state to napi_initialized. The two callers below can still call like before. 1 mana_en.c mana_create_rxq 2296 mana_destroy_rxq(apc, rxq, false); 2 mana_en.c mana_destroy_vport 2340 mana_destroy_rxq(apc, rxq, true);
So you don't need to add napi_initialized struct mana_rxq.
In mana_destroy_rxq() you can have code like this if (napi_initialized) { napi = &rxq->rx_cq.napi;
napi_synchronize(napi);
napi_disable(napi);
xdp_rxq_info_unreg(&rxq->xdp_rxq);
netif_napi_del(napi); }
For mana_destroy_txq, it looks more complex, your adding napi_initialized to struct mana_txq is good.
Thanks, - Haiyang
linux-stable-mirror@lists.linaro.org