Hi,
We are running kernel 4.4.0-22 and the patch below does not seem to be present in the mpt3sas driver. Can you please confirm?
As a reminder the patch was related to a Security Erase ATA command that requires a very long timeout like 100 minutes or more and the drive retains a busy status. And the driver should not try to send other commands or reset the drive.
Thanks,
Igor Rybak
CTO
MediaClone Inc
6900 Canby Ave Ste 107
Reseda, CA 91335
USA
+1-818-654-6286
________________________________________
From: Sreekanth Reddy [sreekanth.reddy(a)broadcom.com]
Sent: Thursday, November 10, 2016 8:38 PM
To: Andrey Grodzovsky
Cc: PDL-MPT-FUSIONLINUX; Igor Rybak; Ezra Kohavi; linux-scsi(a)vger.kernel.org; Sathya Prakash; Chaitra P B; Suganath Prabu Subramani; Hannes Reinecke; stable(a)vger.kernel.org
Subject: Re: [PATCH] [SCSI] mpt3sas: Fix secure erase premature termination (v4)
On Thu, Nov 10, 2016 at 8:05 PM, Andrey Grodzovsky <andrey2805(a)gmail.com> wrote:
> Problem:
> This is a work around for a bug with LSI Fusion MPT SAS2 when
> pefroming secure erase. Due to the very long time the operation
> takes commands issued during the erase will time out and will trigger
> execution of abort hook. Even though the abort hook is called for
> the specific command which timed out this leads to entire device halt
> (scsi_state terminated) and premature termination of the secured erase.
>
> Fix:
> Set device state to busy while erase in progress to reject any incoming
> commands until the erase is done. The device is blocked any way during
> this time and cannot execute any other command.
> More data and logs can be found here -
> https://drive.google.com/file/d/0B9ocOHYHbbS1Q3VMdkkzeWFkTjg/view
>
> v2: Update according to example patch by Hannes Reinecke to apply
> the blocking logic to any ATA 12/16 command.
>
> v3: Use SCSI commands opcodes definitions instead of value and
> correct identation.
>
> v4: Fix checkpath errors and warning.
>
> Signed-off-by: Andrey Grodzovsky <andrey2805(a)gmail.com>
> Cc: <linux-scsi(a)vger.kernel.org>
> Cc: Sathya Prakash <sathya.prakash(a)broadcom.com>
> Cc: Chaitra P B <chaitra.basappa(a)broadcom.com>
> Cc: Suganath Prabu Subramani <suganath-prabu.subramani(a)broadcom.com>
> Cc: Sreekanth Reddy <Sreekanth.Reddy(a)broadcom.com>
> Cc: Hannes Reinecke <hare(a)suse.de>
> Cc: <stable(a)vger.kernel.org>
Acked-by: Sreekanth Reddy <Sreekanth.Reddy(a)broadcom.com>
> ---
> drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> index 5a97e32..c032319 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> @@ -3500,6 +3500,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
> SAM_STAT_CHECK_CONDITION;
> }
>
> +static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
> +{
> + return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
> +}
>
> /**
> * _scsih_qcmd - main scsi request entry point
> @@ -3528,6 +3532,14 @@ _scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
> scsi_print_command(scmd);
> #endif
>
> + /**
> + * Lock the device for any subsequent command until
> + * command is done.
> + */
> + if (ata_12_16_cmd(scmd))
> + scsi_internal_device_block(scmd->device);
> +
> +
> sas_device_priv_data = scmd->device->hostdata;
> if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
> scmd->result = DID_NO_CONNECT << 16;
> @@ -4062,6 +4074,10 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
> if (scmd == NULL)
> return 1;
>
> + if (ata_12_16_cmd(scmd))
> + scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
> +
> +
> mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
>
> if (mpi_reply == NULL) {
> --
> 2.1.4
>
From: Evan Wang <xswang(a)marvell.com>
There maybe some special operations needed when stop engine
for AHCI IP of different vendors, so it is necessary to override
ahci_stop_engine() when stop the engine like overriding start_engine().
Evan Wang (2):
libahci: Allow drivers to override stop_engine
ata: ahci: mvebu: override ahci_stop_engine for mvebu AHCI
drivers/ata/ahci.c | 6 ++---
drivers/ata/ahci.h | 7 ++++++
drivers/ata/ahci_mvebu.c | 56 +++++++++++++++++++++++++++++++++++++++++++++
drivers/ata/ahci_qoriq.c | 2 +-
drivers/ata/ahci_xgene.c | 4 ++--
drivers/ata/libahci.c | 20 +++++++++-------
drivers/ata/sata_highbank.c | 2 +-
7 files changed, 82 insertions(+), 15 deletions(-)
--
1.9.1
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 021ad274d7dc31611d4f47f7dd4ac7a224526f30 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui(a)microsoft.com>
Date: Thu, 15 Mar 2018 14:20:53 +0000
Subject: [PATCH] PCI: hv: Serialize the present and eject work items
When we hot-remove the device, we first receive a PCI_EJECT message and
then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0.
The first message is offloaded to hv_eject_device_work(), and the second
is offloaded to pci_devices_present_work(). Both the paths can be running
list_del(&hpdev->list_entry), causing general protection fault, because
system_wq can run them concurrently.
The patch eliminates the race condition.
Since access to present/eject work items is serialized, we do not need the
hbus->enum_sem anymore, so remove it.
Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
Link: https://lkml.kernel.org/r/KL1P15301MB00064DA6B4D221123B5241CFBFD70@KL1P1530…
Tested-by: Adrian Suhov <v-adsuho(a)microsoft.com>
Tested-by: Chris Valean <v-chvale(a)microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
[lorenzo.pieralisi(a)arm.com: squashed semaphore removal patch]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Reviewed-by: Michael Kelley <mikelley(a)microsoft.com>
Acked-by: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: <stable(a)vger.kernel.org> # v4.6+
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Jack Morgenstein <jackm(a)mellanox.com>
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 2faf38eab785..b7fd5c157d73 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -447,7 +447,6 @@ struct hv_pcibus_device {
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;
- struct semaphore enum_sem;
struct list_head resources_for_children;
struct list_head children;
@@ -461,6 +460,8 @@ struct hv_pcibus_device {
struct retarget_msi_interrupt retarget_msi_interrupt_params;
spinlock_t retarget_msi_interrupt_lock;
+
+ struct workqueue_struct *wq;
};
/*
@@ -1590,12 +1591,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
* It must also treat the omission of a previously observed device as
* notification that the device no longer exists.
*
- * Note that this function is a work item, and it may not be
- * invoked in the order that it was queued. Back to back
- * updates of the list of present devices may involve queuing
- * multiple work items, and this one may run before ones that
- * were sent later. As such, this function only does something
- * if is the last one in the queue.
+ * Note that this function is serialized with hv_eject_device_work(),
+ * because both are pushed to the ordered workqueue hbus->wq.
*/
static void pci_devices_present_work(struct work_struct *work)
{
@@ -1616,11 +1613,6 @@ static void pci_devices_present_work(struct work_struct *work)
INIT_LIST_HEAD(&removed);
- if (down_interruptible(&hbus->enum_sem)) {
- put_hvpcibus(hbus);
- return;
- }
-
/* Pull this off the queue and process it if it was the last one. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
while (!list_empty(&hbus->dr_list)) {
@@ -1637,7 +1629,6 @@ static void pci_devices_present_work(struct work_struct *work)
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
if (!dr) {
- up(&hbus->enum_sem);
put_hvpcibus(hbus);
return;
}
@@ -1724,7 +1715,6 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
- up(&hbus->enum_sem);
put_hvpcibus(hbus);
kfree(dr);
}
@@ -1770,7 +1760,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
get_hvpcibus(hbus);
- schedule_work(&dr_wrk->wrk);
+ queue_work(hbus->wq, &dr_wrk->wrk);
}
/**
@@ -1848,7 +1838,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
get_pcichild(hpdev, hv_pcidev_ref_pnp);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
get_hvpcibus(hpdev->hbus);
- schedule_work(&hpdev->wrk);
+ queue_work(hpdev->hbus->wq, &hpdev->wrk);
}
/**
@@ -2461,13 +2451,18 @@ static int hv_pci_probe(struct hv_device *hdev,
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
spin_lock_init(&hbus->retarget_msi_interrupt_lock);
- sema_init(&hbus->enum_sem, 1);
init_completion(&hbus->remove_event);
+ hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
+ hbus->sysdata.domain);
+ if (!hbus->wq) {
+ ret = -ENOMEM;
+ goto free_bus;
+ }
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
hv_pci_onchannelcallback, hbus);
if (ret)
- goto free_bus;
+ goto destroy_wq;
hv_set_drvdata(hdev, hbus);
@@ -2536,6 +2531,8 @@ static int hv_pci_probe(struct hv_device *hdev,
hv_free_config_window(hbus);
close:
vmbus_close(hdev->channel);
+destroy_wq:
+ destroy_workqueue(hbus->wq);
free_bus:
free_page((unsigned long)hbus);
return ret;
@@ -2615,6 +2612,7 @@ static int hv_pci_remove(struct hv_device *hdev)
irq_domain_free_fwnode(hbus->sysdata.fwnode);
put_hvpcibus(hbus);
wait_for_completion(&hbus->remove_event);
+ destroy_workqueue(hbus->wq);
free_page((unsigned long)hbus);
return 0;
}
The current rescind processing code will not correctly handle
the case where the host immediately rescinds a channel that has
been offerred. In this case, we could be blocked in the open call and
since the channel is rescinded, the host will not respond and we could
be blocked forever in the vmbus open call.i Fix this problem.
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
(cherry picked from commit 7fa32e5ec28b1609abc0b797b58267f725fc3964)
Signed-off-by: Mohammed Gamal <mgamal(a)redhat.com>
---
drivers/hv/channel.c | 10 ++++++++--
drivers/hv/channel_mgmt.c | 7 ++++---
include/linux/hyperv.h | 1 +
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 1c967f5..1d58986 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -659,22 +659,28 @@ void vmbus_close(struct vmbus_channel *channel)
*/
return;
}
- mutex_lock(&vmbus_connection.channel_mutex);
/*
* Close all the sub-channels first and then close the
* primary channel.
*/
list_for_each_safe(cur, tmp, &channel->sc_list) {
cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
- vmbus_close_internal(cur_channel);
if (cur_channel->rescind) {
+ wait_for_completion(&cur_channel->rescind_event);
+ mutex_lock(&vmbus_connection.channel_mutex);
+ vmbus_close_internal(cur_channel);
hv_process_channel_removal(
cur_channel->offermsg.child_relid);
+ } else {
+ mutex_lock(&vmbus_connection.channel_mutex);
+ vmbus_close_internal(cur_channel);
}
+ mutex_unlock(&vmbus_connection.channel_mutex);
}
/*
* Now close the primary.
*/
+ mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
}
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index ec5454f..c21020b 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -333,6 +333,7 @@ static struct vmbus_channel *alloc_channel(void)
return NULL;
spin_lock_init(&channel->lock);
+ init_completion(&channel->rescind_event);
INIT_LIST_HEAD(&channel->sc_list);
INIT_LIST_HEAD(&channel->percpu_list);
@@ -898,6 +899,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
/*
* Now wait for offer handling to complete.
*/
+ vmbus_rescind_cleanup(channel);
while (READ_ONCE(channel->probe_done) == false) {
/*
* We wait here until any channel offer is currently
@@ -913,7 +915,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
- vmbus_rescind_cleanup(channel);
return;
}
/*
@@ -922,7 +923,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
*/
dev = get_device(&channel->device_obj->device);
if (dev) {
- vmbus_rescind_cleanup(channel);
vmbus_device_unregister(channel->device_obj);
put_device(dev);
}
@@ -936,13 +936,14 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
* 2. Then close the primary channel.
*/
mutex_lock(&vmbus_connection.channel_mutex);
- vmbus_rescind_cleanup(channel);
if (channel->state == CHANNEL_OPEN_STATE) {
/*
* The channel is currently not open;
* it is safe for us to cleanup the channel.
*/
hv_process_channel_removal(rescind->child_relid);
+ } else {
+ complete(&channel->rescind_event);
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 1552a5f..465d5db 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -724,6 +724,7 @@ struct vmbus_channel {
u8 monitor_bit;
bool rescind; /* got rescind msg */
+ struct completion rescind_event;
u32 ringbuffer_gpadlhandle;
--
1.8.3.1