Backport commit 38adf94e166e3cb4eb89683458ca578051e8218d and it's dependencies to linux-stable 5.4.y.
Dependent commits: 314d48dd224897e35ddcaf5a1d7d133b5adddeb7 e08f2ae850929d40e66268ee47e443e7ea56eeb7
When running test cases to stress NVMe device, a race condition / deadlocks is seen every couple of days or so where multiple threads are trying to acquire ctrl->subsystem->lock or ctrl->scan_lock.
The test cases send a lot nvme-cli requests to do Sanitize, Format, FW Download, FW Activate, Flush, Get Log, Identify, and reset requests to two controllers that share a namespace. Some of those commands target a namespace, some target a controller. The commands are sent in random order and random mix to the two controllers.
The test cases does not wait for nvme-cli requests to finish before sending more. So for example, there could be multiple reset requests, multiple format requests, outstanding at the same time as a sanitize, on both paths at the same time, etc. Many of these test cases include combos that don't really make sense in the context of NVMe, however it is used to create as much stress as possible.
This patchset fixes this issue.
Similar issue with a detailed call trace/log was discussed in the LKML Link: https://lore.kernel.org/linux-nvme/04580CD6-7652-459D-ABDD-732947B4A6DF@javi...
Revanth Rajashekar (3): nvme: Cleanup and rename nvme_block_nr() nvme: Introduce nvme_lba_to_sect() nvme: consolidate chunk_sectors settings
drivers/nvme/host/core.c | 40 +++++++++++++++++++--------------------- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 2 files changed, 32 insertions(+), 24 deletions(-)
-- 2.17.1
From: Damien Le Moal damien.lemoal@wdc.com
commit 314d48dd224897e35ddcaf5a1d7d133b5adddeb7
Rename nvme_block_nr() to nvme_sect_to_lba() and use SECTOR_SHIFT instead of its hard coded value 9. Also add a comment to decribe this helper.
Signed-off-by: Damien Le Moal damien.lemoal@wdc.com Signed-off-by: Revanth Rajashekar revanth.rajashekar@intel.com --- drivers/nvme/host/core.c | 6 +++--- drivers/nvme/host/nvme.h | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 071b63146..f7d32eeee 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -633,7 +633,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, }
__rq_for_each_bio(bio, req) { - u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); + u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
if (n < segments) { @@ -674,7 +674,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cmnd->write_zeroes.control = 0; @@ -698,7 +698,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); - cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ed0226086..f93ba2da1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -421,9 +421,12 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); }
-static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) +/* + * Convert a 512B sector number to a device logical block number. + */ +static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) { - return (sector >> (ns->lba_shift - 9)); + return sector >> (ns->lba_shift - SECTOR_SHIFT); }
static inline void nvme_end_request(struct request *req, __le16 status, -- 2.17.1
From: Damien Le Moal damien.lemoal@wdc.com
commit e08f2ae850929d40e66268ee47e443e7ea56eeb7
Introduce the new helper function nvme_lba_to_sect() to convert a device logical block number to a 512B sector number. Use this new helper in obvious places, cleaning up the code.
Signed-off-by: Damien Le Moal damien.lemoal@wdc.com Signed-off-by: Revanth Rajashekar revanth.rajashekar@intel.com --- drivers/nvme/host/core.c | 14 +++++++------- drivers/nvme/host/nvme.h | 8 ++++++++ 2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f7d32eeee..9ac27c9a1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1694,7 +1694,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_set_chunk_size(struct nvme_ns *ns) { - u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9)); + u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); }
@@ -1731,8 +1731,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) { - u32 max_sectors; - unsigned short bs = 1 << ns->lba_shift; + u64 max_blocks;
if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) @@ -1748,11 +1747,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) * nvme_init_identify() if available. */ if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9; + max_blocks = (u64)USHRT_MAX + 1; else - max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; + max_blocks = ns->ctrl->max_hw_sectors + 1;
- blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); + blk_queue_max_write_zeroes_sectors(disk->queue, + nvme_lba_to_sect(ns, max_blocks)); }
static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, @@ -1786,7 +1786,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); + sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; u32 atomic_bs, phys_bs, io_opt;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f93ba2da1..73065952f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -429,6 +429,14 @@ static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) return sector >> (ns->lba_shift - SECTOR_SHIFT); }
+/* + * Convert a device logical block number to a 512B sector number. + */ +static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) +{ + return lba << (ns->lba_shift - SECTOR_SHIFT); +} + static inline void nvme_end_request(struct request *req, __le16 status, union nvme_result result) { -- 2.17.1
From: Keith Busch kbusch@kernel.org
commit 38adf94e166e3cb4eb89683458ca578051e8218d
Move the quirked chunk_sectors setting to the same location as noiob so one place registers this setting. And since the noiob value is only used locally, remove the member from struct nvme_ns.
Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Revanth Rajashekar revanth.rajashekar@intel.com --- drivers/nvme/host/core.c | 22 ++++++++++------------ drivers/nvme/host/nvme.h | 1 - 2 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9ac27c9a1..f8068d150 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1692,12 +1692,6 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } #endif /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_set_chunk_size(struct nvme_ns *ns) -{ - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); -} - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1852,6 +1846,7 @@ static void nvme_update_disk_info(struct gendisk *disk, static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + u32 iob;
/* * If identify namespace failed, use default 512 byte block size so @@ -1860,7 +1855,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - ns->noiob = le16_to_cpu(id->noiob); + + if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ns->ctrl->max_hw_sectors)) + iob = ns->ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ @@ -1869,8 +1870,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else ns->pi_type = 0;
- if (ns->noiob) - nvme_set_chunk_size(ns); + if (iob) + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { @@ -2221,9 +2222,6 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 73065952f..a177b918f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -376,7 +376,6 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 - u16 noiob;
struct nvme_fault_inject fault_inject;
-- 2.17.1
On Tue, Sep 22, 2020 at 08:58:05PM -0600, Revanth Rajashekar wrote:
Backport commit 38adf94e166e3cb4eb89683458ca578051e8218d and it's dependencies to linux-stable 5.4.y.
Dependent commits: 314d48dd224897e35ddcaf5a1d7d133b5adddeb7 e08f2ae850929d40e66268ee47e443e7ea56eeb7
When running test cases to stress NVMe device, a race condition / deadlocks is seen every couple of days or so where multiple threads are trying to acquire ctrl->subsystem->lock or ctrl->scan_lock.
The test cases send a lot nvme-cli requests to do Sanitize, Format, FW Download, FW Activate, Flush, Get Log, Identify, and reset requests to two controllers that share a namespace. Some of those commands target a namespace, some target a controller. The commands are sent in random order and random mix to the two controllers.
The test cases does not wait for nvme-cli requests to finish before sending more. So for example, there could be multiple reset requests, multiple format requests, outstanding at the same time as a sanitize, on both paths at the same time, etc. Many of these test cases include combos that don't really make sense in the context of NVMe, however it is used to create as much stress as possible.
This patchset fixes this issue.
Similar issue with a detailed call trace/log was discussed in the LKML Link: https://lore.kernel.org/linux-nvme/04580CD6-7652-459D-ABDD-732947B4A6DF@javi...
Revanth Rajashekar (3): nvme: Cleanup and rename nvme_block_nr() nvme: Introduce nvme_lba_to_sect() nvme: consolidate chunk_sectors settings
drivers/nvme/host/core.c | 40 +++++++++++++++++++--------------------- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 2 files changed, 32 insertions(+), 24 deletions(-)
For some reason you forgot to credit, and cc: all of the people on the original patches, which isn't very nice, don't you think?
Next time please be more careful.
greg k-h
On 10/5/2020 7:23 AM, Greg KH wrote:
On Tue, Sep 22, 2020 at 08:58:05PM -0600, Revanth Rajashekar wrote:
Backport commit 38adf94e166e3cb4eb89683458ca578051e8218d and it's dependencies to linux-stable 5.4.y.
Dependent commits: 314d48dd224897e35ddcaf5a1d7d133b5adddeb7 e08f2ae850929d40e66268ee47e443e7ea56eeb7
When running test cases to stress NVMe device, a race condition / deadlocks is seen every couple of days or so where multiple threads are trying to acquire ctrl->subsystem->lock or ctrl->scan_lock.
The test cases send a lot nvme-cli requests to do Sanitize, Format, FW Download, FW Activate, Flush, Get Log, Identify, and reset requests to two controllers that share a namespace. Some of those commands target a namespace, some target a controller. The commands are sent in random order and random mix to the two controllers.
The test cases does not wait for nvme-cli requests to finish before sending more. So for example, there could be multiple reset requests, multiple format requests, outstanding at the same time as a sanitize, on both paths at the same time, etc. Many of these test cases include combos that don't really make sense in the context of NVMe, however it is used to create as much stress as possible.
This patchset fixes this issue.
Similar issue with a detailed call trace/log was discussed in the LKML Link: https://lore.kernel.org/linux-nvme/04580CD6-7652-459D-ABDD-732947B4A6DF@javi...
Revanth Rajashekar (3): nvme: Cleanup and rename nvme_block_nr() nvme: Introduce nvme_lba_to_sect() nvme: consolidate chunk_sectors settings
drivers/nvme/host/core.c | 40 +++++++++++++++++++--------------------- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 2 files changed, 32 insertions(+), 24 deletions(-)
For some reason you forgot to credit, and cc: all of the people on the original patches, which isn't very nice, don't you think?
Next time please be more careful.
greg k-h
I'm really sorry for missing out a few people from the cc list who are on the original patch. Thought only the signed-off names go in there. Will make sure this won't happen again and thanks for pointing it out :)
Thanks! Revanth
linux-stable-mirror@lists.linaro.org