I'm resending with a cover letter as I realize I didn't provide enough info.
This series fixes critical qp flush bugs in iw_cxgb4. Pleaes apply to 4.14-stable.
Thanks,
Steve
----
Steve Wise (5): iw_cxgb4: only call the cq comp_handler when the cq is armed iw_cxgb4: atomically flush the qp iw_cxgb4: only clear the ARMED bit if a notification is needed iw_cxgb4: reflect the original WR opcode in drain cqes iw_cxgb4: when flushing, complete all wrs in a chain
drivers/infiniband/hw/cxgb4/cq.c | 7 +- drivers/infiniband/hw/cxgb4/ev.c | 8 ++- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 - drivers/infiniband/hw/cxgb4/qp.c | 119 ++++++++++++++++++++++++++------- drivers/infiniband/hw/cxgb4/t4.h | 6 ++ 5 files changed, 107 insertions(+), 35 deletions(-)
commit cbb40fadd31c6bbc59104e58ac95c6ef492d038b upstream.
The ULPs completion handler should only be called if the CQ is armed for notification.
Signed-off-by: Steve Wise swise@opengridcomputing.com Signed-off-by: Doug Ledford dledford@redhat.com --- drivers/infiniband/hw/cxgb4/ev.c | 8 +++++--- drivers/infiniband/hw/cxgb4/qp.c | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 8f963df..9d25298 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -109,9 +109,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
- spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + if (t4_clear_cq_armed(&chp->cq)) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } }
void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e694536..282046e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -817,10 +817,12 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) t4_swcq_produce(cq); spin_unlock_irqrestore(&schp->lock, flag);
- spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + if (t4_clear_cq_armed(&schp->cq)) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } }
static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) @@ -846,10 +848,12 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) t4_swcq_produce(cq); spin_unlock_irqrestore(&rchp->lock, flag);
- spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, - rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (t4_clear_cq_armed(&rchp->cq)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } }
int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
commit bc52e9ca74b9a395897bb640c6671b2cbf716032 upstream.
__flush_qp() has a race condition where during the flush operation, the qp lock is released allowing another thread to possibly post a WR, which corrupts the queue state, possibly causing crashes. The lock was released to preserve the cq/qp locking hierarchy of cq first, then qp. However releasing the qp lock is not necessary; both RQ and SQ CQ locks can be acquired first, followed by the qp lock, and then the RQ and SQ flushing can be done w/o unlocking.
Signed-off-by: Steve Wise swise@opengridcomputing.com Signed-off-by: Doug Ledford dledford@redhat.com --- drivers/infiniband/hw/cxgb4/qp.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 282046e..c7402f4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1271,31 +1271,34 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
- /* locking hierarchy: cq lock first, then qp lock. */ + /* locking hierarchy: cqs lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); + if (schp != rchp) + spin_lock(&schp->lock); spin_lock(&qhp->lock);
if (qhp->wq.flushed) { spin_unlock(&qhp->lock); + if (schp != rchp) + spin_unlock(&schp->lock); spin_unlock_irqrestore(&rchp->lock, flag); return; } qhp->wq.flushed = 1; + t4_set_wq_in_error(&qhp->wq);
c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, flag);
- /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, flag); - spin_lock(&qhp->lock); if (schp != rchp) c4iw_flush_hw_cq(schp); sq_flushed = c4iw_flush_sq(qhp); + spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, flag); + if (schp != rchp) + spin_unlock(&schp->lock); + spin_unlock_irqrestore(&rchp->lock, flag);
if (schp == rchp) { if (t4_clear_cq_armed(&rchp->cq) && @@ -1329,8 +1332,8 @@ static void flush_qp(struct c4iw_qp *qhp) rchp = to_c4iw_cq(qhp->ibqp.recv_cq); schp = to_c4iw_cq(qhp->ibqp.send_cq);
- t4_set_wq_in_error(&qhp->wq); if (qhp->ibqp.uobject) { + t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
commit 335ebf6fa35ca1c59b73f76fad19b249d3550e86 upstream.
In __flush_qp(), the CQ ARMED bit was being cleared regardless of whether any notification is actually needed. This resulted in the iser termination logic getting stuck in ib_drain_sq() because the CQ was not marked ARMED and thus the drain CQE notification wasn't triggered.
This new bug was exposed when this commit was merged:
commit cbb40fadd31c ("iw_cxgb4: only call the cq comp_handler when the cq is armed")
Signed-off-by: Steve Wise swise@opengridcomputing.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- drivers/infiniband/hw/cxgb4/qp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index c7402f4..74864c4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1301,21 +1301,21 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, spin_unlock_irqrestore(&rchp->lock, flag);
if (schp == rchp) { - if (t4_clear_cq_armed(&rchp->cq) && - (rq_flushed || sq_flushed)) { + if ((rq_flushed || sq_flushed) && + t4_clear_cq_armed(&rchp->cq)) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } } else { - if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } - if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + if (sq_flushed && t4_clear_cq_armed(&schp->cq)) { spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
commit 96a236ed286776554fbd227c6d2876fd3b5dc65d upstream.
The flush/drain logic was not retaining the original wr opcode in its completion. This can cause problems if the application uses the completion opcode to make decisions.
Use bit 10 of the CQE header word to indicate the CQE is a special drain completion, and save the original WR opcode in the cqe header opcode field.
Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic")
Cc: stable@vger.kernel.org Signed-off-by: Steve Wise swise@opengridcomputing.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- drivers/infiniband/hw/cxgb4/cq.c | 7 ++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 -- drivers/infiniband/hw/cxgb4/qp.c | 46 +++++++++++++++++++++++++++++++--- drivers/infiniband/hw/cxgb4/t4.h | 6 +++++ 4 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 514c100..73feeee 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -410,7 +410,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) { - if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(cqe)) { WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); return 0; } @@ -509,7 +509,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, /* * Special cqe for drain WR completions... */ - if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(hw_cqe)) { *cookie = CQE_DRAIN_COOKIE(hw_cqe); *cqe = *hw_cqe; goto skip_cqe; @@ -766,9 +766,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) c4iw_invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; - case C4IW_DRAIN_OPCODE: - wc->opcode = IB_WC_SEND; - break; default: pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", CQE_OPCODE(&cqe), CQE_QPID(&cqe)); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 819a306..20c4811 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -631,8 +631,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state) return IB_QPS_ERR; }
-#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN - static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 74864c4..6e91c69 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -794,21 +794,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) return 0; }
-static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int ib_to_fw_opcode(int ib_opcode) +{ + int opcode; + + switch (ib_opcode) { + case IB_WR_SEND_WITH_INV: + opcode = FW_RI_SEND_WITH_INV; + break; + case IB_WR_SEND: + opcode = FW_RI_SEND; + break; + case IB_WR_RDMA_WRITE: + opcode = FW_RI_RDMA_WRITE; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + opcode = FW_RI_READ_REQ; + break; + case IB_WR_REG_MR: + opcode = FW_RI_FAST_REGISTER; + break; + case IB_WR_LOCAL_INV: + opcode = FW_RI_LOCAL_INV; + break; + default: + opcode = -EINVAL; + } + return opcode; +} + +static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; unsigned long flag; struct t4_cq *cq; + int opcode;
schp = to_c4iw_cq(qhp->ibqp.send_cq); cq = &schp->cq;
+ opcode = ib_to_fw_opcode(wr->opcode); + if (opcode < 0) + return opcode; + cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(opcode) | CQE_TYPE_V(1) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid));
spin_lock_irqsave(&schp->lock, flag); @@ -823,6 +859,7 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } + return 0; }
static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) @@ -837,9 +874,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(FW_RI_SEND) | CQE_TYPE_V(0) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid));
spin_lock_irqsave(&rchp->lock, flag); @@ -879,7 +917,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wr(qhp, wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index bcb80ca6..80b390e 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -197,6 +197,11 @@ struct t4_cqe { #define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) #define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
+#define CQE_DRAIN_S 10 +#define CQE_DRAIN_M 0x1 +#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M) +#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S) + #define CQE_STATUS_S 5 #define CQE_STATUS_M 0x1F #define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M) @@ -213,6 +218,7 @@ struct t4_cqe { #define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header))) +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header))) #define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) #define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) #define SQ_TYPE(x) (CQE_TYPE((x)))
commit d14587334580bc94d3ee11e8320e0c157f91ae8f upstream.
If a wr chain was posted and needed to be flushed, only the first wr in the chain was completed with FLUSHED status. The rest were never completed. This caused isert to hang on shutdown due to the missing completions which left iscsi IO commands referenced, stalling the shutdown.
Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic")
Cc: stable@vger.kernel.org Signed-off-by: Steve Wise swise@opengridcomputing.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- drivers/infiniband/hw/cxgb4/qp.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 6e91c69..f311ea7 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -862,6 +862,22 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) return 0; }
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int ret = 0; + + while (wr) { + ret = complete_sq_drain_wr(qhp, wr); + if (ret) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + return ret; +} + static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; @@ -894,6 +910,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } }
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +{ + while (wr) { + complete_rq_drain_wr(qhp, wr); + wr = wr->next; + } +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -917,7 +941,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - err = complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wrs(qhp, wr, bad_wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); @@ -1066,7 +1090,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_rq_drain_wr(qhp, wr); + complete_rq_drain_wrs(qhp, wr); return err; } num_wrs = t4_rq_avail(&qhp->wq);
On Tue, Jan 09, 2018 at 09:53:14AM -0800, Steve Wise wrote:
I'm resending with a cover letter as I realize I didn't provide enough info.
This series fixes critical qp flush bugs in iw_cxgb4. Pleaes apply to 4.14-stable.
All now applied, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org