From: Mike Marciniszyn mike.marciniszyn@cornelisnetworks.com
A panic can result when AIP is enabled:
[ 8.644728] BUG: unable to handle kernel NULL pointer dereference at 000000000000000 [ 8.657708] PGD 0 P4D 0 [ 8.664488] Oops: 0000 1 SMP PTI [ 8.672190] CPU: 70 PID: 981 Comm: systemd-udevd Tainted: G OE --------- - - 4.18.0-240.el8.x86_64 #1 [ 8.687916] Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.01.01.0005.101720141054 10/17/2014 [ 8.703340] RIP: 0010:__bitmap_and+0x1b/0x70 [ 8.741702] RSP: 0018:ffff99aa0845f9f0 EFLAGS: 00010246 [ 8.751757] RAX: 0000000000000000 RBX: ffff8d5a6fc18000 RCX: 0000000000000048 [ 8.764203] RDX: 0000000000000000 RSI: ffffffffc06336f0 RDI: ffff8d5a8fa67750 [ 8.776990] RBP: 0000000000000079 R08: 0000000fffffffff R09: 0000000000000000 [ 8.789768] R10: 0000000000000000 R11: 0000000000000001 R12: ffffffffc06336f0 [ 8.802007] R13: 00000000000000a0 R14: ffff8d5a6fc18000 R15: 0000000000000003 [ 8.814317] FS: 00007fec137a5980(0000) GS:ffff8d5a9fa80000(0000) knlGS:0000000000000000 [ 8.827629] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.838309] CR2: 0000000000000000 CR3: 0000000a04b48002 CR4: 00000000001606e0 [ 8.850502] Call Trace: [ 8.857950] hfi1_num_netdev_contexts+0x7c/0x110 [hfi1] [ 8.868295] hfi1_init_dd+0xd7f/0x1a90 [hfi1] [ 8.877681] ? pci_bus_read_config_dword+0x49/0x70 [ 8.887567] ? pci_mmcfg_read+0x3e/0xe0 [ 8.896797] do_init_one.isra.18+0x336/0x640 [hfi1] [ 8.906958] local_pci_probe+0x41/0x90 [ 8.915784] pci_device_probe+0x105/0x1c0 [ 8.925002] really_probe+0x212/0x440 [ 8.933687] driver_probe_device+0x49/0xc0 [ 8.942918] device_driver_attach+0x50/0x60 [ 8.952553] __driver_attach+0x61/0x130 [ 8.961553] ? device_driver_attach+0x60/0x60 [ 8.971122] bus_for_each_dev+0x77/0xc0 [ 8.979912] ? klist_add_tail+0x3b/0x70 [ 8.988886] bus_add_driver+0x14d/0x1e0 [ 8.998175] ? dev_init+0x10b/0x10b [hfi1] [ 9.007531] driver_register+0x6b/0xb0 [ 9.016757] ? dev_init+0x10b/0x10b [hfi1] [ 9.026220] hfi1_mod_init+0x1e6/0x20a [hfi1] [ 9.035601] do_one_initcall+0x46/0x1c3 [ 9.043958] ? free_unref_page_commit+0x91/0x100 [ 9.053460] ? _cond_resched+0x15/0x30 [ 9.062426] ? kmem_cache_alloc_trace+0x140/0x1c0 [ 9.071982] do_init_module+0x5a/0x220 [ 9.080574] load_module+0x14b4/0x17e0 [ 9.088911] ? __do_sys_finit_module+0xa8/0x110 [ 9.098231] __do_sys_finit_module+0xa8/0x110 [ 9.107307] do_syscall_64+0x5b/0x1a0
The issue happens when pcibus_to_node() returns NO_NUMA_NODE.
Fix this issue by moving the initialization of dd->node to hfi1_devdata allocation and remove the other pcibus_to_node() calls in the probe path and use dd->node instead.
Affinity logic is adjusted to use a new field dd->affinity_entry as a guard instead of dd->node.
Fixes: 4730f4a6c6b2 ("IB/hfi1: Activate the dummy netdev") Cc: stable@vger.kernel.org Signed-off-by: Mike Marciniszyn mike.marciniszyn@cornelisnetworks.com Signed-off-by: Dennis Dalessandro dennis.dalessandro@cornelisnetworks.com --- drivers/infiniband/hw/hfi1/affinity.c | 21 +++++---------------- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/init.c | 10 +++++++++- drivers/infiniband/hw/hfi1/netdev_rx.c | 3 +-- 4 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 2a91b8d..04b1e8f 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -632,22 +632,11 @@ static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, */ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) { - int node = pcibus_to_node(dd->pcidev->bus); struct hfi1_affinity_node *entry; const struct cpumask *local_mask; int curr_cpu, possible, i, ret; bool new_entry = false;
- /* - * If the BIOS does not have the NUMA node information set, select - * NUMA 0 so we get consistent performance. - */ - if (node < 0) { - dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n"); - node = 0; - } - dd->node = node; - local_mask = cpumask_of_node(dd->node); if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); @@ -660,7 +649,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) * create an entry in the global affinity structure and initialize it. */ if (!entry) { - entry = node_affinity_allocate(node); + entry = node_affinity_allocate(dd->node); if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); @@ -751,6 +740,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (new_entry) node_affinity_add_tail(entry);
+ dd->affinity_entry = entry; mutex_unlock(&node_affinity.lock);
return 0; @@ -766,10 +756,9 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) { struct hfi1_affinity_node *entry;
- if (dd->node < 0) - return; - mutex_lock(&node_affinity.lock); + if (!dd->affinity_entry) + goto unlock; entry = node_affinity_lookup(dd->node); if (!entry) goto unlock; @@ -780,8 +769,8 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) */ _dev_comp_vect_cpu_mask_clean_up(dd, entry); unlock: + dd->affinity_entry = NULL; mutex_unlock(&node_affinity.lock); - dd->node = NUMA_NO_NODE; }
/* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 024ef6e..d341b8a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1403,6 +1403,7 @@ struct hfi1_devdata { spinlock_t irq_src_lock; int vnic_num_vports; struct net_device *dummy_netdev; + struct hfi1_affinity_node *affinity_entry;
/* Keeps track of IPoIB RSM rule users */ atomic_t ipoib_rsm_usr_num; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e4f8db4..6d03aa0 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1277,7 +1277,6 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->pport = (struct hfi1_pportdata *)(dd + 1); dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - dd->node = NUMA_NO_NODE;
ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, GFP_KERNEL); @@ -1287,6 +1286,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); + /* + * If the BIOS does not have the NUMA node information set, select + * NUMA 0 so we get consistent performance. + */ + dd->node = pcibus_to_node(pdev->bus); + if (dd->node == NUMA_NO_NODE) { + dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n"); + dd->node = 0; + }
/* * Initialize all locks for the device. This needs to be as early as diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index cec02e8..c1fa53d 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -173,8 +173,7 @@ u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts, return 0; }
- cpumask_and(node_cpu_mask, cpu_mask, - cpumask_of_node(pcibus_to_node(dd->pcidev->bus))); + cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
available_cpus = cpumask_weight(node_cpu_mask);