After calling irq_set_affinity_and_hint(), the cpumask pointer is saved in desc->affinity_hint, and will be used later when reading /proc/irq/<num>/affinity_hint. So the cpumask variable needs to be persistent. Otherwise, we are accessing freed memory when reading the affinity_hint file.
Also, need to clear affinity_hint before free_irq(), otherwise there is a one-time warning and stack trace during module unloading:
[ 243.948687] WARNING: CPU: 10 PID: 1589 at kernel/irq/manage.c:1913 free_irq+0x318/0x360 ... [ 243.948753] Call Trace: [ 243.948754] <TASK> [ 243.948760] mana_gd_remove_irqs+0x78/0xc0 [mana] [ 243.948767] mana_gd_remove+0x3e/0x80 [mana] [ 243.948773] pci_device_remove+0x3d/0xb0 [ 243.948778] device_remove+0x46/0x70 [ 243.948782] device_release_driver_internal+0x1fe/0x280 [ 243.948785] driver_detach+0x4e/0xa0 [ 243.948787] bus_remove_driver+0x70/0xf0 [ 243.948789] driver_unregister+0x35/0x60 [ 243.948792] pci_unregister_driver+0x44/0x90 [ 243.948794] mana_driver_exit+0x14/0x3fe [mana] [ 243.948800] __do_sys_delete_module.constprop.0+0x185/0x2f0
To fix the bug, use the persistent mask, cpumask_of(cpu#), and set affinity_hint to NULL before freeing the IRQ, as required by free_irq().
Cc: stable@vger.kernel.org Fixes: 71fa6887eeca ("net: mana: Assign interrupts to CPUs based on NUMA nodes") Signed-off-by: Haiyang Zhang haiyangz@microsoft.com --- .../net/ethernet/microsoft/mana/gdma_main.c | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index b144f2237748..a55d42332e20 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1218,8 +1218,6 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) struct gdma_context *gc = pci_get_drvdata(pdev); struct gdma_irq_context *gic; unsigned int max_irqs; - u16 *cpus; - cpumask_var_t req_mask; int nvec, irq; int err, i = 0, j;
@@ -1240,21 +1238,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) goto free_irq_vector; }
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) { - err = -ENOMEM; - goto free_irq; - } - - cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL); - if (!cpus) { - err = -ENOMEM; - goto free_mask; - } - for (i = 0; i < nvec; i++) - cpus[i] = cpumask_local_spread(i, gc->numa_node); - for (i = 0; i < nvec; i++) { - cpumask_set_cpu(cpus[i], req_mask); gic = &gc->irq_contexts[i]; gic->handler = NULL; gic->arg = NULL; @@ -1269,17 +1253,16 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) irq = pci_irq_vector(pdev, i); if (irq < 0) { err = irq; - goto free_mask; + goto free_irq; }
err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); if (err) - goto free_mask; - irq_set_affinity_and_hint(irq, req_mask); - cpumask_clear(req_mask); + goto free_irq; + + irq_set_affinity_and_hint(irq, cpumask_of(cpumask_local_spread + (i, gc->numa_node))); } - free_cpumask_var(req_mask); - kfree(cpus);
err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); if (err) @@ -1290,13 +1273,12 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
return 0;
-free_mask: - free_cpumask_var(req_mask); - kfree(cpus); free_irq: for (j = i - 1; j >= 0; j--) { irq = pci_irq_vector(pdev, j); gic = &gc->irq_contexts[j]; + + irq_update_affinity_hint(irq, NULL); free_irq(irq, gic); }
@@ -1324,6 +1306,9 @@ static void mana_gd_remove_irqs(struct pci_dev *pdev) continue;
gic = &gc->irq_contexts[i]; + + /* Need to clear the hint before free_irq */ + irq_update_affinity_hint(irq, NULL); free_irq(irq, gic); }
From: LKML haiyangz lkmlhyz@microsoft.com On Behalf Of Haiyang Zhang Sent: Wednesday, February 1, 2023 1:47 PM
After calling irq_set_affinity_and_hint(), the cpumask pointer is saved in desc->affinity_hint, and will be used later when reading /proc/irq/<num>/affinity_hint. So the cpumask variable needs to be persistent. Otherwise, we are accessing freed memory when reading the affinity_hint file.
Also, need to clear affinity_hint before free_irq(), otherwise there is a one-time warning and stack trace during module unloading:
[ 243.948687] WARNING: CPU: 10 PID: 1589 at kernel/irq/manage.c:1913 free_irq+0x318/0x360 ... [ 243.948753] Call Trace: [ 243.948754] <TASK> [ 243.948760] mana_gd_remove_irqs+0x78/0xc0 [mana] [ 243.948767] mana_gd_remove+0x3e/0x80 [mana] [ 243.948773] pci_device_remove+0x3d/0xb0 [ 243.948778] device_remove+0x46/0x70 [ 243.948782] device_release_driver_internal+0x1fe/0x280 [ 243.948785] driver_detach+0x4e/0xa0 [ 243.948787] bus_remove_driver+0x70/0xf0 [ 243.948789] driver_unregister+0x35/0x60 [ 243.948792] pci_unregister_driver+0x44/0x90 [ 243.948794] mana_driver_exit+0x14/0x3fe [mana] [ 243.948800] __do_sys_delete_module.constprop.0+0x185/0x2f0
To fix the bug, use the persistent mask, cpumask_of(cpu#), and set affinity_hint to NULL before freeing the IRQ, as required by free_irq().
Cc: stable@vger.kernel.org Fixes: 71fa6887eeca ("net: mana: Assign interrupts to CPUs based on NUMA nodes") Signed-off-by: Haiyang Zhang haiyangz@microsoft.com
.../net/ethernet/microsoft/mana/gdma_main.c | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index b144f2237748..a55d42332e20 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1218,8 +1218,6 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) struct gdma_context *gc = pci_get_drvdata(pdev); struct gdma_irq_context *gic; unsigned int max_irqs;
- u16 *cpus;
- cpumask_var_t req_mask; int nvec, irq; int err, i = 0, j;
@@ -1240,21 +1238,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) goto free_irq_vector; }
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) {
err = -ENOMEM;
goto free_irq;
- }
- cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL);
- if (!cpus) {
err = -ENOMEM;
goto free_mask;
- }
- for (i = 0; i < nvec; i++)
cpus[i] = cpumask_local_spread(i, gc->numa_node);
- for (i = 0; i < nvec; i++) {
gic = &gc->irq_contexts[i]; gic->handler = NULL; gic->arg = NULL;cpumask_set_cpu(cpus[i], req_mask);
@@ -1269,17 +1253,16 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) irq = pci_irq_vector(pdev, i); if (irq < 0) { err = irq;
goto free_mask;
goto free_irq;
}
err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); if (err)
goto free_mask;
irq_set_affinity_and_hint(irq, req_mask);
cpumask_clear(req_mask);
goto free_irq;
irq_set_affinity_and_hint(irq, cpumask_of(cpumask_local_spread
}(i, gc->numa_node)));
free_cpumask_var(req_mask);
kfree(cpus);
err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); if (err)
@@ -1290,13 +1273,12 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
return 0;
-free_mask:
- free_cpumask_var(req_mask);
- kfree(cpus);
free_irq: for (j = i - 1; j >= 0; j--) { irq = pci_irq_vector(pdev, j); gic = &gc->irq_contexts[j];
free_irq(irq, gic); }irq_update_affinity_hint(irq, NULL);
@@ -1324,6 +1306,9 @@ static void mana_gd_remove_irqs(struct pci_dev *pdev) continue;
gic = &gc->irq_contexts[i];
/* Need to clear the hint before free_irq */
free_irq(irq, gic); }irq_update_affinity_hint(irq, NULL);
-- 2.25.1
Reviewed-by: Michael Kelley
On Wed, Feb 01, 2023 at 01:46:53PM -0800, Haiyang Zhang wrote:
After calling irq_set_affinity_and_hint(), the cpumask pointer is saved in desc->affinity_hint, and will be used later when reading /proc/irq/<num>/affinity_hint. So the cpumask variable needs to be persistent. Otherwise, we are accessing freed memory when reading the affinity_hint file.
Also, need to clear affinity_hint before free_irq(), otherwise there is a one-time warning and stack trace during module unloading:
[ 243.948687] WARNING: CPU: 10 PID: 1589 at kernel/irq/manage.c:1913 free_irq+0x318/0x360 ... [ 243.948753] Call Trace: [ 243.948754] <TASK> [ 243.948760] mana_gd_remove_irqs+0x78/0xc0 [mana] [ 243.948767] mana_gd_remove+0x3e/0x80 [mana] [ 243.948773] pci_device_remove+0x3d/0xb0 [ 243.948778] device_remove+0x46/0x70 [ 243.948782] device_release_driver_internal+0x1fe/0x280 [ 243.948785] driver_detach+0x4e/0xa0 [ 243.948787] bus_remove_driver+0x70/0xf0 [ 243.948789] driver_unregister+0x35/0x60 [ 243.948792] pci_unregister_driver+0x44/0x90 [ 243.948794] mana_driver_exit+0x14/0x3fe [mana] [ 243.948800] __do_sys_delete_module.constprop.0+0x185/0x2f0
To fix the bug, use the persistent mask, cpumask_of(cpu#), and set affinity_hint to NULL before freeing the IRQ, as required by free_irq().
Cc: stable@vger.kernel.org Fixes: 71fa6887eeca ("net: mana: Assign interrupts to CPUs based on NUMA nodes") Signed-off-by: Haiyang Zhang haiyangz@microsoft.com
.../net/ethernet/microsoft/mana/gdma_main.c | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-)
Thanks, Reviewed-by: Leon Romanovsky leonro@nvidia.com
-----Original Message----- From: Jakub Kicinski kuba@kernel.org Sent: Thursday, February 2, 2023 2:38 PM To: Haiyang Zhang haiyangz@microsoft.com Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui decui@microsoft.com; KY Srinivasan kys@microsoft.com; Paul Rosswurm paulros@microsoft.com; olaf@aepfle.de; vkuznets@redhat.com; davem@davemloft.net; linux-kernel@vger.kernel.org; stable@vger.kernel.org Subject: Re: [PATCH net,v2] net: mana: Fix accessing freed irq affinity_hint
On Wed, 1 Feb 2023 13:46:53 -0800 Haiyang Zhang wrote:
irq_set_affinity_and_hint(irq,
cpumask_of(cpumask_local_spread
(i, gc->numa_node)));
The line break here looks ugly. Please use a local variable for the mask or the cpu.
Will do.
linux-stable-mirror@lists.linaro.org