4.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit d7d94d48a272fd7583dc3c83acb8f5ed4ef456a4 ]
We can race with the snd timeout and the per-request timeout and end up requeuing the same request twice. We can't use the send_complete completion to tell if everything is ok because we hold the tx_lock during send, so the timeout stuff will block waiting to mark the socket dead, and we could be marked complete and still requeue. Instead add a flag to the socket so we know whether we've been requeued yet.
Signed-off-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin alexander.levin@microsoft.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/block/nbd.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-)
--- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -112,12 +112,15 @@ struct nbd_device { struct task_struct *task_setup; };
+#define NBD_CMD_REQUEUED 1 + struct nbd_cmd { struct nbd_device *nbd; int index; int cookie; struct completion send_complete; blk_status_t status; + unsigned long flags; };
#if IS_ENABLED(CONFIG_DEBUG_FS) @@ -146,6 +149,14 @@ static inline struct device *nbd_to_dev( return disk_to_dev(nbd->disk); }
+static void nbd_requeue_cmd(struct nbd_cmd *cmd) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) + blk_mq_requeue_request(req, true); +} + static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -328,7 +339,7 @@ static enum blk_eh_timer_return nbd_xmit nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); } - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); nbd_config_put(nbd); return BLK_EH_NOT_HANDLED; } @@ -484,6 +495,7 @@ static int nbd_send_cmd(struct nbd_devic nsock->pending = req; nsock->sent = sent; } + set_bit(NBD_CMD_REQUEUED, &cmd->flags); return BLK_STS_RESOURCE; } dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -525,6 +537,7 @@ send_pages: */ nsock->pending = req; nsock->sent = sent; + set_bit(NBD_CMD_REQUEUED, &cmd->flags); return BLK_STS_RESOURCE; } dev_err(disk_to_dev(nbd->disk), @@ -793,7 +806,7 @@ again: */ blk_mq_start_request(req); if (unlikely(nsock->pending && nsock->pending != req)) { - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); ret = 0; goto out; } @@ -806,7 +819,7 @@ again: dev_err_ratelimited(disk_to_dev(nbd->disk), "Request send failed, requeueing\n"); nbd_mark_nsock_dead(nbd, nsock, 1); - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); ret = 0; } out: @@ -831,6 +844,7 @@ static blk_status_t nbd_queue_rq(struct * done sending everything over the wire. */ init_completion(&cmd->send_complete); + clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
/* We can be called directly from the user space process, which means we * could possibly have signals pending so our sendmsg will fail. In @@ -1446,6 +1460,7 @@ static int nbd_init_request(struct blk_m { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->nbd = set->driver_data; + cmd->flags = 0; return 0; }