Fix the IO hang that arises because of MSIx vector not having a mapped online CPU upon receiving completion. This patch sets up a reply queue mapping to CPUs based on the IRQ affinity retrieved using pci_irq_get_affinity() API.
aac_setup_reply_map() is an explicit mapping for internally generated (non-SCSI) cmds. The SCSI cmds take the blk_mq route, and the non-SCSI cmds are mapped to the reply_map.
Reviewed-by: Gilbert Wu gilbert.wu@microchip.com Signed-off-by: Sagar Biradar Sagar.Biradar@microchip.com --- drivers/scsi/aacraid/aacraid.h | 1 + drivers/scsi/aacraid/comminit.c | 32 ++++++++++++++++++++++++++++++++ drivers/scsi/aacraid/commsup.c | 6 +++++- drivers/scsi/aacraid/linit.c | 25 +++++++++++++++++++++++++ drivers/scsi/aacraid/src.c | 13 +++++++++++-- 5 files changed, 74 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 5e115e8b2ba4..20f8560a3038 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1678,6 +1678,7 @@ struct aac_dev u32 handle_pci_error; bool init_reset; u8 soft_reset_support; + unsigned int *reply_map; };
#define aac_adapter_interrupt(dev) \ diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index bd99c5492b7d..6f4e40cdaade 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -33,6 +33,8 @@
#include "aacraid.h"
+void aac_setup_reply_map(struct aac_dev *dev); + struct aac_common aac_config = { .irq_mod = 1 }; @@ -630,6 +632,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
if (aac_is_src(dev)) aac_define_int_mode(dev); + + aac_setup_reply_map(dev); + /* * Ok now init the communication subsystem */ @@ -658,3 +663,30 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) return dev; }
+/* + * aac_setup_reply_map - This is an explicit mapping for + * internally generated (non-SCSI) cmds which need to be + * serviced outside of IO requests. + * The SCSI cmds take the blk_mq mechanism, + * and the non-SCSI cmds are mapped to the reply_map. + */ +void aac_setup_reply_map(struct aac_dev *dev) +{ + const struct cpumask *mask; + unsigned int i, cpu = 1; + + for (i = 1; i < dev->max_msix; i++) { + mask = pci_irq_get_affinity(dev->pdev, i); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) { + dev->reply_map[cpu] = i; + } + } + return; + +fallback: + for_each_possible_cpu(cpu) + dev->reply_map[cpu] = 0; +} diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index deb32c9f4b3e..3f062e4013ab 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) { struct fib *fibptr; + u32 blk_tag; + int i;
- fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag]; + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + i = blk_mq_unique_tag_to_tag(blk_tag); + fibptr = &dev->fibs[i]; /* * Null out fields that depend on being zero at the start of * each I/O diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 5ba5c18b77b4..077adbcde909 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -34,6 +34,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/msdos_partition.h> +#include <linux/blk-mq-pci.h>
#include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -505,6 +506,16 @@ static int aac_slave_configure(struct scsi_device *sdev) return 0; }
+static void aac_map_queues(struct Scsi_Host *shost) +{ + struct aac_dev *aac = (struct aac_dev *)shost->hostdata; + + blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + aac->pdev, 0); +} + + + /** * aac_change_queue_depth - alter queue depths * @sdev: SCSI device we are considering @@ -1489,6 +1500,7 @@ static struct scsi_host_template aac_driver_template = { .bios_param = aac_biosparm, .shost_groups = aac_host_groups, .slave_configure = aac_slave_configure, + .map_queues = aac_map_queues, .change_queue_depth = aac_change_queue_depth, .sdev_groups = aac_dev_groups, .eh_abort_handler = aac_eh_abort, @@ -1668,6 +1680,14 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto out_free_host; }
+ aac->reply_map = kzalloc(sizeof(unsigned int) * nr_cpu_ids, + GFP_KERNEL); + if (!aac->reply_map) { + error = -ENOMEM; + dev_err(&pdev->dev, "reply_map allocation failed\n"); + goto out_free_host; + } + spin_lock_init(&aac->fib_lock);
mutex_init(&aac->ioctl_mutex); @@ -1776,6 +1796,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = AAC_MAX_LUN;
pci_set_drvdata(pdev, shost); + shost->nr_hw_queues = aac->max_msix; + shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev); if (error) @@ -1797,6 +1819,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) aac->comm_addr, aac->comm_phys); kfree(aac->queues); aac_adapter_ioremap(aac, 0); + /* By now we should have configured the reply_map */ + kfree(aac->reply_map); kfree(aac->fibs); kfree(aac->fsa_dev); out_free_host: @@ -1918,6 +1942,7 @@ static void aac_remove_one(struct pci_dev *pdev)
aac_adapter_ioremap(aac, 0);
+ kfree(aac->reply_map); kfree(aac->fibs); kfree(aac->fsa_dev);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 11ef58204e96..46c0f4df995d 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -493,6 +493,8 @@ static int aac_src_deliver_message(struct fib *fib) #endif
u16 vector_no; + struct scsi_cmnd *scmd; + u32 blk_tag;
atomic_inc(&q->numpending);
@@ -505,8 +507,15 @@ static int aac_src_deliver_message(struct fib *fib) if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) && dev->sa_firmware) vector_no = aac_get_vector(dev); - else - vector_no = fib->vector_no; + else { + if (!fib->vector_no || !fib->callback_data) { + vector_no = dev->reply_map[raw_smp_processor_id()]; + } else { + scmd = (struct scsi_cmnd *)fib->callback_data; + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + vector_no = blk_mq_unique_tag_to_hwq(blk_tag); + } + }
if (native_hba) { if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
Rule: 'Cc: stable@vger.kernel.org' or 'commit <sha1> upstream.' Subject: [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity Link: https://lore.kernel.org/stable/20230428210751.29722-1-sagar.biradar%40microc...
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
On 28/04/2023 22:07, Sagar Biradar wrote:
Fix the IO hang that arises because of MSIx vector not having a mapped online CPU upon receiving completion. This patch sets up a reply queue mapping to CPUs based on the IRQ affinity retrieved using pci_irq_get_affinity() API.
aac_setup_reply_map() is an explicit mapping for internally generated (non-SCSI) cmds. The SCSI cmds take the blk_mq route, and the non-SCSI cmds are mapped to the reply_map.
This now looks better.
I would still prefer if no reply_map was used even for internal commands. As I see, you have two alternatives (to using reply_map): - instead of using a driver-internal reply_map, lookup CPU->HW queue mapping for internal commands by using shost->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[raw_smp_processor_id()] Ideally when we finally support reserved commands for SCSI ML we will have a better solution for this. - if it is possible to send driver internal commands on a specific HW queue always, then reserve a dedicated HW queue for them (and always send on that HW queue). You may reserve this HW queue by omitting 1x HW queue from pci_alloc_irq_vectors_affinity() for affinity spread
Reviewed-by: Gilbert Wu gilbert.wu@microchip.com Signed-off-by: Sagar Biradar Sagar.Biradar@microchip.com
drivers/scsi/aacraid/aacraid.h | 1 + drivers/scsi/aacraid/comminit.c | 32 ++++++++++++++++++++++++++++++++ drivers/scsi/aacraid/commsup.c | 6 +++++- drivers/scsi/aacraid/linit.c | 25 +++++++++++++++++++++++++ drivers/scsi/aacraid/src.c | 13 +++++++++++-- 5 files changed, 74 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 5e115e8b2ba4..20f8560a3038 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1678,6 +1678,7 @@ struct aac_dev u32 handle_pci_error; bool init_reset; u8 soft_reset_support;
- unsigned int *reply_map; };
#define aac_adapter_interrupt(dev) \ diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index bd99c5492b7d..6f4e40cdaade 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -33,6 +33,8 @@ #include "aacraid.h" +void aac_setup_reply_map(struct aac_dev *dev);
- struct aac_common aac_config = { .irq_mod = 1 };
@@ -630,6 +632,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) if (aac_is_src(dev)) aac_define_int_mode(dev);
- aac_setup_reply_map(dev);
- /*
*/
- Ok now init the communication subsystem
@@ -658,3 +663,30 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) return dev; } +/*
- aac_setup_reply_map - This is an explicit mapping for
- internally generated (non-SCSI) cmds which need to be
- serviced outside of IO requests.
- The SCSI cmds take the blk_mq mechanism,
- and the non-SCSI cmds are mapped to the reply_map.
- */
+void aac_setup_reply_map(struct aac_dev *dev) +{
- const struct cpumask *mask;
- unsigned int i, cpu = 1;
- for (i = 1; i < dev->max_msix; i++) {
mask = pci_irq_get_affinity(dev->pdev, i);
if (!mask)
goto fallback;
for_each_cpu(cpu, mask) {
dev->reply_map[cpu] = i;
}
- }
- return;
+fallback:
- for_each_possible_cpu(cpu)
dev->reply_map[cpu] = 0;
+} diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index deb32c9f4b3e..3f062e4013ab 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) { struct fib *fibptr;
- u32 blk_tag;
- int i;
- fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
- blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
- i = blk_mq_unique_tag_to_tag(blk_tag);
- fibptr = &dev->fibs[i]; /*
- Null out fields that depend on being zero at the start of
- each I/O
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 5ba5c18b77b4..077adbcde909 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -34,6 +34,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/msdos_partition.h> +#include <linux/blk-mq-pci.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -505,6 +506,16 @@ static int aac_slave_configure(struct scsi_device *sdev) return 0; } +static void aac_map_queues(struct Scsi_Host *shost) +{
- struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
I don't think that you need a explicit casting ...
- blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
aac->pdev, 0);
+}
- /**
- aac_change_queue_depth - alter queue depths
- @sdev: SCSI device we are considering
@@ -1489,6 +1500,7 @@ static struct scsi_host_template aac_driver_template = { .bios_param = aac_biosparm, .shost_groups = aac_host_groups, .slave_configure = aac_slave_configure,
- .map_queues = aac_map_queues, .change_queue_depth = aac_change_queue_depth, .sdev_groups = aac_dev_groups, .eh_abort_handler = aac_eh_abort,
@@ -1668,6 +1680,14 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto out_free_host; }
- aac->reply_map = kzalloc(sizeof(unsigned int) * nr_cpu_ids,
GFP_KERNEL);
- if (!aac->reply_map) {
error = -ENOMEM;
dev_err(&pdev->dev, "reply_map allocation failed\n");
goto out_free_host;
- }
- spin_lock_init(&aac->fib_lock);
mutex_init(&aac->ioctl_mutex); @@ -1776,6 +1796,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = AAC_MAX_LUN; pci_set_drvdata(pdev, shost);
- shost->nr_hw_queues = aac->max_msix;
- shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev); if (error) @@ -1797,6 +1819,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) aac->comm_addr, aac->comm_phys); kfree(aac->queues); aac_adapter_ioremap(aac, 0);
- /* By now we should have configured the reply_map */
- kfree(aac->reply_map); kfree(aac->fibs); kfree(aac->fsa_dev); out_free_host:
@@ -1918,6 +1942,7 @@ static void aac_remove_one(struct pci_dev *pdev) aac_adapter_ioremap(aac, 0);
- kfree(aac->reply_map); kfree(aac->fibs); kfree(aac->fsa_dev);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 11ef58204e96..46c0f4df995d 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -493,6 +493,8 @@ static int aac_src_deliver_message(struct fib *fib) #endif u16 vector_no;
- struct scsi_cmnd *scmd;
- u32 blk_tag;
atomic_inc(&q->numpending); @@ -505,8 +507,15 @@ static int aac_src_deliver_message(struct fib *fib) if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) && dev->sa_firmware) vector_no = aac_get_vector(dev);
else
vector_no = fib->vector_no;
else {
if (!fib->vector_no || !fib->callback_data) {
vector_no = dev->reply_map[raw_smp_processor_id()];
} else {
scmd = (struct scsi_cmnd *)fib->callback_data;
blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
}
}
if (native_hba) { if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
linux-stable-mirror@lists.linaro.org