In order to do DMAs, the QAT device requires that the scatterlist structures are mapped and translated into a format that the firmware can understand. This is defined as the composition of a scatter gather list (SGL) descriptor header, the struct qat_alg_buf_list, plus a variable number of flat buffer descriptors, the struct qat_alg_buf.
The allocation and mapping of these data structures is done each time a request is received from the skcipher and aead APIs. In an OOM situation, this behaviour might lead to a dead-lock if an allocation fails.
Based on the conversation in [1], increase the size of the aead and skcipher request contexts to include an SGL descriptor that can handle a maximum of 4 flat buffers. If requests exceed 4 entries buffers, memory is allocated dynamically.
In addition, remove the CRYPTO_ALG_ALLOCATES_MEMORY flag from both aead and skcipher alg structures.
[1] https://lore.kernel.org/linux-crypto/20200722072932.GA27544@gondor.apana.org...
Cc: stable@vger.kernel.org Fixes: d370cec32194 ("crypto: qat - Intel(R) QAT crypto interface") Reported-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Reviewed-by: Marco Chiappero marco.chiappero@intel.com Reviewed-by: Wojciech Ziemba wojciech.ziemba@intel.com --- drivers/crypto/qat/qat_common/qat_algs.c | 77 ++++++++++++---------- drivers/crypto/qat/qat_common/qat_crypto.h | 24 +++++++ 2 files changed, 67 insertions(+), 34 deletions(-)
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index f998ed58457c..b9228f3a26de 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -46,19 +46,6 @@ static DEFINE_MUTEX(algs_lock); static unsigned int active_devs;
-struct qat_alg_buf { - u32 len; - u32 resrvd; - u64 addr; -} __packed; - -struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; -} __packed __aligned(64); - /* Common content descriptor */ struct qat_alg_cd { union { @@ -693,7 +680,10 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, bl->bufers[i].len, DMA_BIDIRECTIONAL);
dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - kfree(bl); + + if (!qat_req->buf.sgl_src_valid) + kfree(bl); + if (blp != blpout) { /* If out of place operation dma unmap only data */ int bufless = blout->num_bufs - blout->num_mapped_bufs; @@ -704,7 +694,9 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, DMA_BIDIRECTIONAL); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - kfree(blout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(blout); } }
@@ -721,15 +713,24 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_addr_t blp = DMA_MAPPING_ERROR; dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n + 1); + size_t sz_out, sz = struct_size(bufl, bufers, n); + int node = dev_to_node(&GET_DEV(inst->accel_dev));
if (unlikely(!n)) return -EINVAL;
- bufl = kzalloc_node(sz, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!bufl)) - return -ENOMEM; + qat_req->buf.sgl_src_valid = false; + qat_req->buf.sgl_dst_valid = false; + + if (n > QAT_MAX_BUFF_DESC) { + bufl = kzalloc_node(sz, GFP_ATOMIC, node); + if (unlikely(!bufl)) + return -ENOMEM; + } else { + bufl = &qat_req->buf.sgl_src.sgl_hdr; + memset(bufl, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_src_valid = true; + }
for_each_sg(sgl, sg, n, i) bufl->bufers[i].addr = DMA_MAPPING_ERROR; @@ -760,12 +761,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, struct qat_alg_buf *bufers;
n = sg_nents(sglout); - sz_out = struct_size(buflout, bufers, n + 1); + sz_out = struct_size(buflout, bufers, n); sg_nctr = 0; - buflout = kzalloc_node(sz_out, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!buflout)) - goto err_in; + + if (n > QAT_MAX_BUFF_DESC) { + buflout = kzalloc_node(sz_out, GFP_ATOMIC, node); + if (unlikely(!buflout)) + goto err_in; + } else { + buflout = &qat_req->buf.sgl_dst.sgl_hdr; + memset(buflout, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_dst_valid = true; + }
bufers = buflout->bufers; for_each_sg(sglout, sg, n, i) @@ -810,7 +817,9 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(buflout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(buflout);
err_in: if (!dma_mapping_error(dev, blp)) @@ -823,7 +832,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, bufl->bufers[i].len, DMA_BIDIRECTIONAL);
- kfree(bufl); + if (!qat_req->buf.sgl_src_valid) + kfree(bufl);
dev_err(dev, "Failed to map buf for dma\n"); return -ENOMEM; @@ -1434,7 +1444,7 @@ static struct aead_alg qat_aeads[] = { { .cra_name = "authenc(hmac(sha1),cbc(aes))", .cra_driver_name = "qat_aes_cbc_hmac_sha1", .cra_priority = 4001, - .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct qat_alg_aead_ctx), .cra_module = THIS_MODULE, @@ -1451,7 +1461,7 @@ static struct aead_alg qat_aeads[] = { { .cra_name = "authenc(hmac(sha256),cbc(aes))", .cra_driver_name = "qat_aes_cbc_hmac_sha256", .cra_priority = 4001, - .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct qat_alg_aead_ctx), .cra_module = THIS_MODULE, @@ -1468,7 +1478,7 @@ static struct aead_alg qat_aeads[] = { { .cra_name = "authenc(hmac(sha512),cbc(aes))", .cra_driver_name = "qat_aes_cbc_hmac_sha512", .cra_priority = 4001, - .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct qat_alg_aead_ctx), .cra_module = THIS_MODULE, @@ -1486,7 +1496,7 @@ static struct skcipher_alg qat_skciphers[] = { { .base.cra_name = "cbc(aes)", .base.cra_driver_name = "qat_aes_cbc", .base.cra_priority = 4001, - .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx), .base.cra_alignmask = 0, @@ -1504,7 +1514,7 @@ static struct skcipher_alg qat_skciphers[] = { { .base.cra_name = "ctr(aes)", .base.cra_driver_name = "qat_aes_ctr", .base.cra_priority = 4001, - .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx), .base.cra_alignmask = 0, @@ -1522,8 +1532,7 @@ static struct skcipher_alg qat_skciphers[] = { { .base.cra_name = "xts(aes)", .base.cra_driver_name = "qat_aes_xts", .base.cra_priority = 4001, - .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx), .base.cra_alignmask = 0, diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index b6a4c95ae003..0928f159ea99 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -21,6 +21,26 @@ struct qat_crypto_instance { atomic_t refctr; };
+#define QAT_MAX_BUFF_DESC 4 + +struct qat_alg_buf { + u32 len; + u32 resrvd; + u64 addr; +} __packed; + +struct qat_alg_buf_list { + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + struct qat_alg_buf bufers[]; +} __packed; + +struct qat_alg_fixed_buf_list { + struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; +} __packed __aligned(64); + struct qat_crypto_request_buffs { struct qat_alg_buf_list *bl; dma_addr_t blp; @@ -28,6 +48,10 @@ struct qat_crypto_request_buffs { dma_addr_t bloutp; size_t sz; size_t sz_out; + bool sgl_src_valid; + bool sgl_dst_valid; + struct qat_alg_fixed_buf_list sgl_src; + struct qat_alg_fixed_buf_list sgl_dst; };
struct qat_crypto_request;
On Sun, Apr 10, 2022 at 08:47:05PM +0100, Giovanni Cabiddu wrote:
If requests exceed 4 entries buffers, memory is allocated dynamically.
In addition, remove the CRYPTO_ALG_ALLOCATES_MEMORY flag from both aead and skcipher alg structures.
There is nothing that says that algorithms can ignore !CRYPTO_ALG_ALLOCATES_MEMORY if there are too many scatterlist entries. See the comment above the definition of CRYPTO_ALG_ALLOCATES_MEMORY.
If you need to introduce this constraint, then you will need to audit the users of !CRYPTO_ALG_ALLOCATES_MEMORY to verify that none of them are issuing requests that violate this constraint, then add this to the documentation comment for CRYPTO_ALG_ALLOCATES_MEMORY.
- Eric
Belated response on this - I was out last week.
On Mon, Apr 11, 2022 at 05:37:24PM +0000, Eric Biggers wrote:
On Sun, Apr 10, 2022 at 08:47:05PM +0100, Giovanni Cabiddu wrote:
If requests exceed 4 entries buffers, memory is allocated dynamically.
In addition, remove the CRYPTO_ALG_ALLOCATES_MEMORY flag from both aead and skcipher alg structures.
There is nothing that says that algorithms can ignore !CRYPTO_ALG_ALLOCATES_MEMORY if there are too many scatterlist entries. See the comment above the definition of CRYPTO_ALG_ALLOCATES_MEMORY.
From the conversation in [1], I assumed that a cap on the number of pre-allocated entries in the scatterlists was already agreed.
If you need to introduce this constraint, then you will need to audit the users of !CRYPTO_ALG_ALLOCATES_MEMORY to verify that none of them are issuing requests that violate this constraint, then add this to the documentation comment for CRYPTO_ALG_ALLOCATES_MEMORY.
Makes sense. I see that the only users of !CRYPTO_ALG_ALLOCATES_MEMORY are dm-crypt and dm-integrity but I haven't done an audit on those yet to understand if they use more than 4 entries.
Regards,
[1] https://lore.kernel.org/linux-crypto/20200722072932.GA27544@gondor.apana.org...
On Mon, Apr 11, 2022 at 05:37:24PM +0000, Eric Biggers wrote:
On Sun, Apr 10, 2022 at 08:47:05PM +0100, Giovanni Cabiddu wrote:
If requests exceed 4 entries buffers, memory is allocated dynamically.
In addition, remove the CRYPTO_ALG_ALLOCATES_MEMORY flag from both aead and skcipher alg structures.
There is nothing that says that algorithms can ignore !CRYPTO_ALG_ALLOCATES_MEMORY if there are too many scatterlist entries. See the comment above the definition of CRYPTO_ALG_ALLOCATES_MEMORY.
If you need to introduce this constraint, then you will need to audit the users of !CRYPTO_ALG_ALLOCATES_MEMORY to verify that none of them are issuing requests that violate this constraint, then add this to the documentation comment for CRYPTO_ALG_ALLOCATES_MEMORY.
Belatedly...
Adding to this thread my colleague Lucas who did an audit of the users of !CRYPTO_ALG_ALLOCATES_MEMORY to understand if we can add a constraint to the definition of CRYPTO_ALG_ALLOCATES_MEMORY.
Regards,
On Mon, Jul 11, 2022 at 03:21:39PM +0100, Giovanni Cabiddu wrote:
On Mon, Apr 11, 2022 at 05:37:24PM +0000, Eric Biggers wrote:
On Sun, Apr 10, 2022 at 08:47:05PM +0100, Giovanni Cabiddu wrote:
If requests exceed 4 entries buffers, memory is allocated dynamically.
In addition, remove the CRYPTO_ALG_ALLOCATES_MEMORY flag from both aead and skcipher alg structures.
There is nothing that says that algorithms can ignore !CRYPTO_ALG_ALLOCATES_MEMORY if there are too many scatterlist entries. See the comment above the definition of CRYPTO_ALG_ALLOCATES_MEMORY.
If you need to introduce this constraint, then you will need to audit the users of !CRYPTO_ALG_ALLOCATES_MEMORY to verify that none of them are issuing requests that violate this constraint, then add this to the documentation comment for CRYPTO_ALG_ALLOCATES_MEMORY.
Belatedly...
Adding to this thread my colleague Lucas who did an audit of the users of !CRYPTO_ALG_ALLOCATES_MEMORY to understand if we can add a constraint to the definition of CRYPTO_ALG_ALLOCATES_MEMORY.
Regards,
-- Giovanni
An audit was done on users of !CRYPTO_ALG_ALLOCATES_MEMORY: dm-crypt and dm-integrity. dm-crypt uses scatterlists with at most 4 entries, but dm-integrity may allocate memory for scatterlist with arch-dependent and system-bounded number of entries. Therefore the constraint in https://lore.kernel.org/linux-crypto/20200722072932.GA27544@gondor.apana.org... cannot be introduced.
A way to solve the problem might be to forward requests with more than 4 entries in a scatterlist to an implementation that does not allocate memory. This will introduce always a performance penalty for requests with scatterlists greater than 4 in algorithms backed up by HW accelerators, even if the requestor does not requires this restriction. A way to solve this might be to register two versions of the same algorithm, one without CRYPTO_ALG_ALLOCATES_MEMORY that forwards to SW and one with CRYPTO_ALG_ALLOCATES_MEMORY set that doesn’t. Any suggestions?
Adding Horia Geantă and dm-devel based on the previous thread.
Thanks. --------------------------------------------------------------------- Intel Technology Poland sp. z o.o. ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN. Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek przegladanie lub rozpowszechnianie jest zabronione. This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by others is strictly prohibited.
linux-stable-mirror@lists.linaro.org