Remove nosva limitation to permit nosva run
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- wd.c | 4 ---- wd_alg.c | 44 +------------------------------------------- wd_util.c | 4 ---- 3 files changed, 1 insertion(+), 51 deletions(-)
diff --git a/wd.c b/wd.c index 75a9469..5fa8feb 100644 --- a/wd.c +++ b/wd.c @@ -235,10 +235,6 @@ static int get_dev_info(struct uacce_dev *dev) ret = get_int_attr(dev, "flags", &dev->flags); if (ret < 0) return ret; - else if (!((unsigned int)dev->flags & UACCE_DEV_SVA)) { - WD_ERR("skip none sva uacce device!\n"); - return -WD_ENODEV; - }
ret = get_int_attr(dev, "region_mmio_size", &value); if (ret < 0) diff --git a/wd_alg.c b/wd_alg.c index 08f0e2e..45619ba 100644 --- a/wd_alg.c +++ b/wd_alg.c @@ -23,47 +23,6 @@ static struct wd_alg_list alg_list_head; static struct wd_alg_list *alg_list_tail = &alg_list_head;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -static bool wd_check_dev_sva(const char *dev_name) -{ - char dev_path[PATH_MAX] = {'\0'}; - char buf[DEV_SVA_SIZE] = {'\0'}; - unsigned int val; - ssize_t ret; - int fd; - - ret = snprintf(dev_path, PATH_STR_SIZE, "%s/%s/%s", SYS_CLASS_DIR, - dev_name, SVA_FILE_NAME); - if (ret < 0) { - WD_ERR("failed to snprintf, device name: %s!\n", dev_name); - return false; - } - - /** - * The opened file is the specified device driver file. - * no need for realpath processing. - */ - fd = open(dev_path, O_RDONLY, 0); - if (fd < 0) { - WD_ERR("failed to open %s(%d)!\n", dev_path, -errno); - return false; - } - - ret = read(fd, buf, DEV_SVA_SIZE - 1); - if (ret <= 0) { - WD_ERR("failed to read anything at %s!\n", dev_path); - close(fd); - return false; - } - close(fd); - - val = strtol(buf, NULL, STR_DECIMAL); - if (val & UACCE_DEV_SVA) - return true; - - return false; -} - static bool wd_check_accel_dev(const char *dev_name) { struct dirent *dev_dir; @@ -80,8 +39,7 @@ static bool wd_check_accel_dev(const char *dev_name) !strncmp(dev_dir->d_name, "..", LINUX_PRTDIR_SIZE)) continue;
- if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name)) && - wd_check_dev_sva(dev_dir->d_name)) { + if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name))) { closedir(wd_class); return true; } diff --git a/wd_util.c b/wd_util.c index f1b27bf..9675098 100644 --- a/wd_util.c +++ b/wd_util.c @@ -1883,10 +1883,6 @@ int wd_init_param_check(struct wd_ctx_config *config, struct wd_sched *sched) return -WD_EINVAL; }
- if (!wd_is_sva(config->ctxs[0].ctx)) { - WD_ERR("invalid: the mode is non sva, please check system!\n"); - return -WD_EINVAL; - }
return 0; }
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- include/uacce.h | 6 +++++- include/wd.h | 16 ++++++++++++++++ wd.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/include/uacce.h b/include/uacce.h index f7fae27..c7a5752 100644 --- a/include/uacce.h +++ b/include/uacce.h @@ -15,6 +15,7 @@ extern "C" {
#define UACCE_CMD_START _IO('W', 0) #define UACCE_CMD_PUT_Q _IO('W', 1) +#define UACCE_CMD_GET_SS_DMA _IOR('W', 3, unsigned long)
/** * UACCE Device flags: @@ -25,7 +26,9 @@ extern "C" { */
enum { - UACCE_DEV_SVA = 0x1, + UACCE_DEV_SVA = 0x1, + UACCE_DEV_NOIOMMU = 0x2, + UACCE_DEV_IOMMUU = 0x80, };
#define UACCE_API_VER_NOIOMMU_SUBFIX "_noiommu" @@ -33,6 +36,7 @@ enum { enum uacce_qfrt { UACCE_QFRT_MMIO = 0, /* device mmio region */ UACCE_QFRT_DUS = 1, /* device user share */ + UACCE_QFRT_SS, /* static share memory */ UACCE_QFRT_MAX, };
diff --git a/include/wd.h b/include/wd.h index b62d355..556a992 100644 --- a/include/wd.h +++ b/include/wd.h @@ -604,6 +604,22 @@ struct wd_capability { struct wd_capability *wd_get_alg_cap(void); void wd_release_alg_cap(struct wd_capability *head);
+/** + * wd_is_noiommu() - Check if the system is noiommu. + * @h_ctx: The handle of context. + * + * Return 1 if noiommu, 0 for has iommu, less than 0 otherwise. + */ +int wd_is_noiommu(handle_t h_ctx); + +/** + * wd_reserve_mem() - Reserve memory iva mmap. + * @h_ctx: The handle of context. + * @size: size of the reserved memory. + * + * Return NULL if fail, pointer of the memory if success. + */ +void *wd_reserve_mem(handle_t h_ctx, size_t size); #ifdef __cplusplus } #endif diff --git a/wd.c b/wd.c index 5fa8feb..5c930f4 100644 --- a/wd.c +++ b/wd.c @@ -578,6 +578,19 @@ int wd_ctx_wait(handle_t h_ctx, __u16 ms) return ret; }
+int wd_is_noiommu(handle_t h_ctx) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + + if (!ctx || !ctx->dev) + return -WD_EINVAL; + + if ((unsigned int)ctx->dev->flags & UACCE_DEV_NOIOMMU) + return 1; + + return 0; +} + int wd_is_sva(handle_t h_ctx) { struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; @@ -974,3 +987,20 @@ alloc_err: return NULL; }
+void *wd_reserve_mem(handle_t h_ctx, size_t size) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + void *ptr; + + if (!ctx) + return NULL; + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED, ctx->fd, UACCE_QFRT_SS * getpagesize()); + if (ptr == MAP_FAILED) { + WD_ERR("wd drv mmap fail!(err = %d)\n", errno); + return NULL; + } + + return ptr; +}
Add new apis: wd_blkpool_new; wd_blkpool_delete; wd_blkpool_phy; wd_blkpool_alloc; wd_blkpool_free; wd_blkpool_setup; wd_blkpool_destroy_mem; wd_blkpool_create_sglpool; wd_blkpool_destroy_sglpool;
App only use two apis after setup blkpool wd_blkpool_alloc; wd_blkpool_free;
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- Makefile.am | 4 +- include/wd_bmm.h | 60 ++++ libwd.map | 10 + wd_bmm.c | 727 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 800 insertions(+), 1 deletion(-) create mode 100644 include/wd_bmm.h create mode 100644 wd_bmm.c
diff --git a/Makefile.am b/Makefile.am index c4b9c52..16704d3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,7 +36,8 @@ pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ include/wd_comp.h include/wd_dh.h include/wd_digest.h \ include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ - include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h + include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ + include/wd_bmm.h
nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h @@ -60,6 +61,7 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ v1/wd_bmm.c v1/wd_bmm.h \ v1/wd_ecc.c v1/wd_ecc.h \ v1/wd_sgl.c v1/wd_sgl.h \ + wd_bmm.c \ aes.h sm4.h galois.h \ lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ v1/drv/hisi_qm_udrv.c v1/drv/hisi_qm_udrv.h \ diff --git a/include/wd_bmm.h b/include/wd_bmm.h new file mode 100644 index 0000000..0730cf5 --- /dev/null +++ b/include/wd_bmm.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2025-2026 Huawei Technologies Co.,Ltd. All rights reserved. + * Copyright 2025-2026 Linaro ltd. + */ + +#ifndef _WD_BMM_H +#define _WD_BMM_H + +#include <asm/types.h> +#include "wd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEFAULT_BLOCK_NM 16384 +#define DEFAULT_ALIGN_SIZE 0x40 +#define DEFAULT_BLOCK_SIZE (1024 * 8) + +/* memory APIs for Algorithm Layer */ +typedef void *(*wd_alloc)(void *usr, size_t size); +typedef void (*wd_free)(void *usr, void *va); + + /* memory VA to DMA address map */ +typedef void *(*wd_map)(void *usr, void *va, size_t sz); +typedef __u32 (*wd_bufsize)(void *usr); + +/* Memory from user, it is given at ctx creating. */ +struct wd_mm_br { + wd_alloc alloc; /* Memory allocation */ + wd_free free; /* Memory free */ + wd_map iova_map; /* get iova from user space VA */ + void *usr; /* data for the above operations */ + wd_bufsize get_bufsize; /* optional */ +}; + +/* Memory pool creating parameters */ +struct wd_blkpool_setup { + __u32 block_size; /* Block buffer size */ + __u32 block_num; /* Block buffer number */ + __u32 align_size; /* Block buffer starting address align size */ + struct wd_mm_br br; /* memory from user if don't use WD memory */ +}; + + +void *wd_blkpool_new(handle_t h_ctx); +void wd_blkpool_delete(void *pool); +int wd_blkpool_setup(void *pool, struct wd_blkpool_setup *setup); +void wd_blkpool_destroy_mem(void *pool); +void *wd_blkpool_alloc(void *pool, size_t size); +void wd_blkpool_free(void *pool, void *va); +void *wd_blkpool_phy(void *pool, void *va); +handle_t wd_blkpool_create_sglpool(void *pool); +void wd_blkpool_destroy_sglpool(void *pool, handle_t sgl_pool); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libwd.map b/libwd.map index 5522ec0..e884671 100644 --- a/libwd.map +++ b/libwd.map @@ -49,5 +49,15 @@ global: wd_enable_drv; wd_disable_drv; wd_get_alg_head; + + wd_blkpool_new; + wd_blkpool_delete; + wd_blkpool_phy; + wd_blkpool_alloc; + wd_blkpool_free; + wd_blkpool_setup; + wd_blkpool_destroy_mem; + wd_blkpool_create_sglpool; + wd_blkpool_destroy_sglpool; local: *; }; diff --git a/wd_bmm.c b/wd_bmm.c new file mode 100644 index 0000000..d252c74 --- /dev/null +++ b/wd_bmm.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright 2025-2026 Huawei Technologies Co.,Ltd. All rights reserved. + * Copyright 2025-2026 Linaro ltd. + */ + +/* Block Memory Management (lib): A block memory algorithm */ +#include <asm/byteorder.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/queue.h> +#include <sys/mman.h> +#include <pthread.h> + +#include "wd.h" +#include "wd_bmm.h" + +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define ALIGN(x, a) __ALIGN_MASK((uintptr_t)(x), (uintptr_t)(a)-1) + +#define TAG_FREE 0x12345678 /* block is free */ +#define TAG_USED 0x87654321 /* block is busy */ +#define MAX_ALIGN_SIZE 0x1000 /* 4KB */ +#define MAX_BLOCK_SIZE 0x10000000 /* 256MB */ +#define BLK_BALANCE_SZ 0x100000ul +#define NUM_TIMES(x) (87 * (x) / 100) + +#define WD_UACCE_GRAN_SIZE 0x10000ull +#define WD_UACCE_GRAN_SHIFT 16 +#define WD_UACCE_GRAN_NUM_MASK 0xfffull + +#define DEFAULT_BLKSIZE_ALIGN 0x1000 + +/* the max sge num in one sgl */ +#define SGE_NUM_IN_SGL 255 + +/* the max sge num in on BD, QM user it be the sgl pool size */ +#define SGL_NUM_IN_BD 256 + +struct wd_blk_hd { + unsigned int blk_tag; + void *blk_dma; + void *blk; + + TAILQ_ENTRY(wd_blk_hd) next; +}; + +TAILQ_HEAD(wd_blk_list, wd_blk_hd); + +struct wd_ss_region { + void *va; + unsigned long long pa; + size_t size; + + TAILQ_ENTRY(wd_ss_region) next; +}; + +TAILQ_HEAD(wd_ss_region_list, wd_ss_region); + +struct wd_blkpool { + pthread_spinlock_t lock; + unsigned int free_blk_num; + unsigned int blk_num; + unsigned int alloc_failures; + struct wd_blk_list head; + void *act_start; + unsigned int hd_sz; + unsigned int blk_sz; + struct wd_blkpool_setup setup; + + handle_t ctx; + void *mem; + unsigned long size; + struct wd_ss_region_list ss_list; + struct wd_ss_region_list *ss_head; + struct hisi_sgl_pool *sgl_pool; + void *sgl_mem; + size_t sgl_size; +}; + +struct hisi_sge { + uintptr_t buff; + void *page_ctrl; + __le32 len; + __le32 pad; + __le32 pad0; + __le32 pad1; +}; + +/* use default hw sgl head size 64B, in little-endian */ +struct hisi_sgl { + /* the next sgl address */ + uintptr_t next_dma; + /* the sge num of all the sgl */ + __le16 entry_sum_in_chain; + /* valid sge(has buff) num in this sgl */ + __le16 entry_sum_in_sgl; + /* the sge num in this sgl */ + __le16 entry_length_in_sgl; + __le16 pad0; + __le64 pad1[5]; + /* valid sge buffs total size */ + __le64 entry_size_in_sgl; + struct hisi_sge sge_entries[]; +}; + +struct hisi_sgl_pool { + /* the addr64 align offset base sgl */ + void **sgl_align; + /* the sgl src address array */ + void **sgl; + /* the sgl pool stack depth */ + __u32 depth; + __u32 top; + __u32 sge_num; + __u32 sgl_num; + pthread_spinlock_t lock; + void **phys; +}; + +static struct wd_blk_hd *wd_blk_head(struct wd_blkpool *pool, void *blk) +{ + unsigned long offset = (unsigned long)((uintptr_t)blk - + (uintptr_t)pool->act_start); + unsigned long sz = pool->hd_sz + pool->blk_sz; + unsigned long blk_idx = offset / sz; + + return (struct wd_blk_hd *)((uintptr_t)pool->act_start + blk_idx * sz); +} + +static int pool_params_check(struct wd_blkpool_setup *setup) +{ + if (!setup->block_size || + setup->block_size > MAX_BLOCK_SIZE) { + WD_ERR("Invalid block_size (%x)!\n", + setup->block_size); + return -WD_EINVAL; + } + + /* check parameters, and align_size must be 2^N */ + if (setup->align_size == 0x1 || setup->align_size > MAX_ALIGN_SIZE || + setup->align_size & (setup->align_size - 0x1)) { + WD_ERR("Invalid align_size.\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static inline int calculate_sgl_size(void) +{ + int sgl_size = sizeof(struct hisi_sgl) + + SGE_NUM_IN_SGL * sizeof(struct hisi_sge); + + return ALIGN(sgl_size, DEFAULT_ALIGN_SIZE); +} + +static inline size_t calculate_extra_sgl_size(void) +{ + return SGL_NUM_IN_BD * calculate_sgl_size(); +} + +static int wd_pool_pre_layout(struct wd_blkpool *p, + struct wd_blkpool_setup *sp) +{ + size_t extra_sgl_size = calculate_extra_sgl_size(); + unsigned int asz; + int ret; + + ret = pool_params_check(sp); + if (ret) + return ret; + + asz = sp->align_size; + + /* Get actual value by align */ + p->hd_sz = ALIGN(sizeof(struct wd_blk_hd), asz); + p->blk_sz = ALIGN(sp->block_size, asz); + + if (p->size == 0 && !p->mem) { + p->size = (p->hd_sz + p->blk_sz) * + (unsigned long)sp->block_num + asz + + extra_sgl_size; + + /* Make sure memory map granularity size align */ + if (wd_is_noiommu(p->ctx)) + p->size = ALIGN(p->size, WD_UACCE_GRAN_SIZE); + } + + return WD_SUCCESS; +} + +static void *wd_get_phys(struct wd_blkpool *pool, void *va) +{ + struct wd_ss_region *rgn; + + TAILQ_FOREACH(rgn, pool->ss_head, next) { + if (rgn->va <= va && va < rgn->va + rgn->size) + return (void *)(uintptr_t)(rgn->pa + + ((uintptr_t)va - (uintptr_t)rgn->va)); + } + + return NULL; +} + +static int wd_pool_init(struct wd_blkpool *p) +{ + __u32 blk_size = p->setup.block_size; + size_t extra_sgl_size = calculate_extra_sgl_size(); + void *dma_start, *dma_end, *va; + struct wd_blk_hd *hd = NULL; + unsigned int dma_num = 0; + unsigned int i, act_num; + unsigned long loss; + + p->act_start = (void *)ALIGN((uintptr_t)p->mem, p->setup.align_size); + loss = p->act_start - p->mem; + + /* ignore sgl */ + act_num = (p->size - loss - extra_sgl_size) / (p->hd_sz + p->blk_sz); + + /* get dma address and initialize blocks */ + for (i = 0; i < act_num; i++) { + va = (void *)((uintptr_t)p->act_start + p->hd_sz + + (unsigned long)(p->hd_sz + + p->blk_sz) * i); + dma_start = wd_get_phys(p, va); + dma_end = wd_get_phys(p, va + blk_size - 1); + if (!dma_start || !dma_end) { + WD_ERR("wd_get_phys err.\n"); + return -WD_ENOMEM; + } + + if ((uintptr_t)dma_end - (uintptr_t)dma_start != blk_size - 1) + continue; + + hd = (void *)((uintptr_t)va - p->hd_sz); + hd->blk_dma = dma_start; + hd->blk = va; + hd->blk_tag = TAG_FREE; + TAILQ_INSERT_TAIL(&p->head, hd, next); + + dma_num++; + } + + p->free_blk_num = dma_num; + p->blk_num = dma_num; + + return WD_SUCCESS; +} + +static int usr_pool_init(struct wd_blkpool *p) +{ + struct wd_blkpool_setup *sp = &p->setup; + size_t extra_sgl_size = calculate_extra_sgl_size(); + __u32 blk_size = sp->block_size; + struct wd_blk_hd *hd = NULL; + unsigned long loss; + unsigned int i, act_num; + + p->act_start = (void *)ALIGN((uintptr_t)p->mem, sp->align_size); + loss = p->act_start - p->mem; + /* ignore sgl */ + act_num = (p->size - loss - extra_sgl_size) / (p->hd_sz + p->blk_sz); + + for (i = 0; i < act_num; i++) { + hd = (void *)((uintptr_t)p->act_start + (p->hd_sz + p->blk_sz) * i); + hd->blk = (void *)((uintptr_t)hd + p->hd_sz); + hd->blk_dma = sp->br.iova_map(sp->br.usr, hd->blk, blk_size); + if (!hd->blk_dma) { + WD_ERR("failed to map usr blk.\n"); + return -WD_ENOMEM; + } + hd->blk_tag = TAG_FREE; + TAILQ_INSERT_TAIL(&p->head, hd, next); + } + + p->free_blk_num = act_num; + p->blk_num = p->free_blk_num; + + return WD_SUCCESS; +} + +static void drv_free_slice(struct wd_blkpool *p) +{ + struct wd_ss_region *rgn; + + while (true) { + rgn = TAILQ_FIRST(&p->ss_list); + if (!rgn) + break; + TAILQ_REMOVE(&p->ss_list, rgn, next); + free(rgn); + } +} + +static void drv_add_slice(struct wd_blkpool *p, struct wd_ss_region *rgn) +{ + struct wd_ss_region *rg; + + rg = TAILQ_LAST(&p->ss_list, wd_ss_region_list); + if (rg) { + if (rg->pa + rg->size == rgn->pa) { + rg->size += rgn->size; + free(rgn); + return; + } + } + + TAILQ_INSERT_TAIL(&p->ss_list, rgn, next); +} + +static void *pool_reserve_mem(struct wd_blkpool *p, size_t size) +{ + struct wd_ss_region *rgn = NULL; + unsigned long info = 0; + size_t tmp = 0; + unsigned long i = 0; + void *ptr = NULL; + int ret = 1; + + if (!p->ctx) + return NULL; + + if (p->mem) + return NULL; + + ptr = wd_reserve_mem(p->ctx, size); + if (!ptr) + return NULL; + + p->ss_head = &p->ss_list; + TAILQ_INIT(&p->ss_list); + + while (ret > 0) { + info = i; + ret = wd_ctx_set_io_cmd(p->ctx, UACCE_CMD_GET_SS_DMA, &info); + if (ret < 0) { + WD_ERR("get DMA fail!\n"); + goto err_out; + } + rgn = malloc(sizeof(*rgn)); + if (!rgn) { + WD_ERR("alloc ss region fail!\n"); + goto err_out; + } + memset(rgn, 0, sizeof(*rgn)); + + if (wd_is_noiommu(p->ctx)) + rgn->size = (info & WD_UACCE_GRAN_NUM_MASK) << + WD_UACCE_GRAN_SHIFT; + else + rgn->size = p->size; + rgn->pa = info & (~WD_UACCE_GRAN_NUM_MASK); + rgn->va = ptr + tmp; + tmp += rgn->size; + drv_add_slice(p, rgn); + i++; + } + + return ptr; + +err_out: + drv_free_slice(p); + munmap(p->mem, size); + + return NULL; +} + +static int pool_init(struct wd_blkpool *pool, + struct wd_blkpool_setup *setup) +{ + void *addr = NULL; + + /* use user's memory, and its br alloc function */ + if (setup->br.alloc && setup->br.free) { + if (!pool->mem) { + addr = setup->br.alloc(setup->br.usr, pool->size); + if (!addr) { + WD_ERR("failed to allocate memory in user pool.\n"); + return -EINVAL; + } + pool->mem = addr; + } + if (usr_pool_init(pool)) { + WD_ERR("failed to initialize user pool.\n"); + setup->br.free(setup->br.usr, addr); + return -EINVAL; + } + } else { + if (!pool->mem) { + /* use wd to reserve memory */ + addr = pool_reserve_mem(pool, pool->size); + if (!addr) { + WD_ERR("wd pool failed to reserve memory.\n"); + return -EINVAL; + } + pool->mem = addr; + } + + if (wd_pool_init(pool)) { + WD_ERR("failed to initialize wd pool.\n"); + wd_blkpool_destroy_mem(pool); + return -EINVAL; + } + } + + return 0; +} + +void *wd_blkpool_new(handle_t h_ctx) +{ + struct wd_blkpool *pool; + + if (wd_is_sva(h_ctx)) + return NULL; + + pool = calloc(1, sizeof(*pool)); + if (!pool) { + WD_ERR("failed to malloc pool.\n"); + return NULL; + } + pool->ctx = h_ctx; + + if (pthread_spin_init(&pool->lock, PTHREAD_PROCESS_SHARED) != 0) { + free(pool); + return NULL; + } + return pool; +} + +int wd_blkpool_setup(void *pool, struct wd_blkpool_setup *setup) +{ + struct wd_blkpool *p = pool; + int ret = 0; + + if (!p || !setup) + return -EINVAL; + + pthread_spin_lock(&p->lock); + if (p->mem && p->size != 0) { + if (p->setup.block_size == setup->block_size || + p->blk_sz == ALIGN(setup->block_size, setup->align_size)) + goto out; + + /* re-org blk_size, no need reserve mem */ + if (p->free_blk_num != p->blk_num) { + WD_ERR("Can not reset blk pool, as it's in use.\n"); + ret = -EINVAL; + goto out; + } + } + + memcpy(&p->setup, setup, sizeof(p->setup)); + + ret = wd_pool_pre_layout(p, setup); + if (ret) + goto out; + + TAILQ_INIT(&p->head); + + ret = pool_init(p, setup); + +out: + pthread_spin_unlock(&p->lock); + return ret; +} + +void *wd_blkpool_alloc(void *pool, size_t size) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + int ret; + + if (!p) + return NULL; + + if (!p->mem || size > p->blk_sz) { + struct wd_blkpool_setup setup; + /* + * if empty pool, will reserve mem and init pool + * if size > blk_size, will re-org as align 4K if free pool + */ + + memset(&setup, 0, sizeof(setup)); + setup.block_size = ALIGN(size, DEFAULT_BLKSIZE_ALIGN); + setup.block_num = DEFAULT_BLOCK_NM; + setup.align_size = DEFAULT_ALIGN_SIZE; + ret = wd_blkpool_setup(p, &setup); + if (ret) + return NULL; + } + + pthread_spin_lock(&p->lock); + hd = TAILQ_LAST(&p->head, wd_blk_list); + if (unlikely(!hd || hd->blk_tag != TAG_FREE)) { + p->alloc_failures++; + goto out; + } + + /* Delete the block buffer from free list */ + TAILQ_REMOVE(&p->head, hd, next); + p->free_blk_num--; + hd->blk_tag = TAG_USED; + pthread_spin_unlock(&p->lock); + + return hd->blk; + +out: + pthread_spin_unlock(&p->lock); + WD_ERR("Failed to malloc blk.\n"); + + return NULL; +} + +void wd_blkpool_free(void *pool, void *va) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + + if (!p || !va) + return; + + hd = wd_blk_head(p, va); + if (unlikely(hd->blk_tag != TAG_USED)) { + WD_ERR("free block fail!\n"); + return; + } + + pthread_spin_lock(&p->lock); + TAILQ_INSERT_TAIL(&p->head, hd, next); + p->free_blk_num++; + hd->blk_tag = TAG_FREE; + pthread_spin_unlock(&p->lock); +} + +void *wd_blkpool_phy(void *pool, void *va) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + unsigned long off, idx; + + if (!pool || !va) + return NULL; + + if (p->sgl_mem != 0 && va >= p->sgl_mem) { + off = (unsigned long) (va - p->sgl_mem); + idx = off / p->sgl_size; + + return p->sgl_pool->phys[idx]; + } + + hd = wd_blk_head(pool, va); + if (hd->blk_tag != TAG_USED || + (uintptr_t)va < (uintptr_t)hd->blk) + return NULL; + + return (void *)((uintptr_t)hd->blk_dma + ((uintptr_t)va - + (uintptr_t)hd->blk)); +} + +int wd_blkpool_get_free_blk_num(void *pool, __u32 *free_num) +{ + struct wd_blkpool *p = pool; + + if (!p || !free_num) { + WD_ERR("get_free_blk_num err, parameter err!\n"); + return -WD_EINVAL; + } + + *free_num = __atomic_load_n(&p->free_blk_num, __ATOMIC_RELAXED); + + return WD_SUCCESS; +} + +int wd_blkpool_alloc_failures(void *pool, __u32 *fail_num) +{ + struct wd_blkpool *p = pool; + + if (!p || !fail_num) { + WD_ERR("get_blk_alloc_failure err, pool is NULL!\n"); + return -WD_EINVAL; + } + + *fail_num = __atomic_load_n(&p->alloc_failures, __ATOMIC_RELAXED); + + return WD_SUCCESS; +} + +__u32 wd_blkpool_blksize(void *pool) +{ + struct wd_blkpool *p = pool; + + if (!p) { + WD_ERR("get blk_size pool is null!\n"); + return 0; + } + + return p->blk_sz; +} + +void wd_blkpool_destroy_mem(void *pool) +{ + struct wd_blkpool_setup *setup; + struct wd_blkpool *p = pool; + + if (!p) { + WD_ERR("pool destroy err, pool is NULL.\n"); + return; + } + + pthread_spin_lock(&p->lock); + if (p->mem) { + setup = &p->setup; + if (setup->br.free) { + setup->br.free(setup->br.usr, p->mem); + } else { + drv_free_slice(p); + munmap(p->mem, p->size); + } + p->mem = NULL; + p->size = 0; + } + pthread_spin_unlock(&p->lock); +} + +void wd_blkpool_delete(void *pool) +{ + struct wd_blkpool *p = pool; + + if (!p) + return; + + wd_blkpool_destroy_mem(pool); + pthread_spin_destroy(&p->lock); + free(p); +} + +handle_t wd_blkpool_create_sglpool(void *pool) +{ + struct wd_blkpool *p = pool; + struct hisi_sgl_pool *sgl_pool; + struct hisi_sgl *sgl_align; + size_t sgl_size = calculate_sgl_size(); + size_t extra_sgl_size = calculate_extra_sgl_size(); + struct wd_blkpool_setup *sp; + void *base; + + if (!p) + return 0; + + sgl_pool = calloc(1, sizeof(struct hisi_sgl_pool)); + if (!sgl_pool) { + WD_ERR("failed to alloc memory for sgl_pool!\n"); + return 0; + } + + sgl_pool->sgl_align = calloc(SGL_NUM_IN_BD, sizeof(void *)); + if (!sgl_pool->sgl_align) { + WD_ERR("failed to alloc memory for sgl align!\n"); + goto err_out; + } + + sgl_pool->phys = calloc(SGL_NUM_IN_BD, sizeof(void *)); + if (!sgl_pool->phys) { + WD_ERR("failed to alloc memory for phys!\n"); + goto err_out; + } + + base = (void *)((uintptr_t)p->mem + p->size - extra_sgl_size); + sp = &p->setup; + + for (int i = 0; i < SGL_NUM_IN_BD; i++) { + sgl_align = (struct hisi_sgl *)ALIGN(base + sgl_size * i, DEFAULT_ALIGN_SIZE); + sgl_align->entry_sum_in_chain = SGE_NUM_IN_SGL; + sgl_align->entry_sum_in_sgl = 0; + sgl_align->entry_length_in_sgl = SGE_NUM_IN_SGL; + sgl_align->next_dma = 0; + sgl_pool->sgl_align[i] = sgl_align; + if (sp->br.iova_map) + sgl_pool->phys[i] = sp->br.iova_map(sp->br.usr, sgl_align, sgl_size); + else + sgl_pool->phys[i] = wd_get_phys(p, sgl_align); + } + + if (pthread_spin_init(&sgl_pool->lock, PTHREAD_PROCESS_SHARED) != 0) { + WD_ERR("failed to init sgl pool lock!\n"); + goto err_out; + } + + sgl_pool->sgl_num = SGL_NUM_IN_BD; + sgl_pool->sge_num = SGE_NUM_IN_SGL; + sgl_pool->depth = SGL_NUM_IN_BD; + sgl_pool->top = SGL_NUM_IN_BD; + p->sgl_pool = sgl_pool; + p->sgl_size = sgl_size; + p->sgl_mem = (void *)ALIGN(base, DEFAULT_ALIGN_SIZE); + + return (handle_t)sgl_pool; + +err_out: + if (sgl_pool->phys) + free(sgl_pool->phys); + if (sgl_pool->sgl_align) + free(sgl_pool->sgl_align); + free(sgl_pool); + return (handle_t)0; +} + +void wd_blkpool_destroy_sglpool(void *pool, handle_t h_sgl_pool) +{ + struct hisi_sgl_pool *sgl_pool = (struct hisi_sgl_pool *)h_sgl_pool; + + if (!h_sgl_pool) + return; + + pthread_spin_destroy(&sgl_pool->lock); + if (sgl_pool->phys) + free(sgl_pool->phys); + if (sgl_pool->sgl_align) + free(sgl_pool->sgl_align); + free(sgl_pool); +}
When init, all ctxs will call wd_blkpool_new, Only nosva case will get a pointer, while sva case get NULL.
When uninit, delete blkpool and related resources.
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- include/wd_alg_common.h | 9 +++++++++ wd_util.c | 9 ++++++++- 2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index fd77426..951995f 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -136,12 +136,21 @@ struct wd_soft_ctx { void *priv; };
+enum wd_blkpool_mode { + BLKPOOL_MODE_MEMCPY, + BLKPOOL_MODE_USER, + BLKPOOL_MODE_MAX, +}; + struct wd_ctx_internal { handle_t ctx; __u8 op_type; __u8 ctx_mode; __u16 sqn; pthread_spinlock_t lock; + void *blkpool; + __u8 blkpool_mode; + handle_t h_sgl_pool; };
struct wd_ctx_config_internal { diff --git a/wd_util.c b/wd_util.c index 9675098..949d467 100644 --- a/wd_util.c +++ b/wd_util.c @@ -12,6 +12,7 @@ #include <string.h> #include <ctype.h> #include "wd_sched.h" +#include "wd_bmm.h" #include "wd_util.h"
#define WD_ASYNC_DEF_POLL_NUM 1 @@ -247,6 +248,7 @@ int wd_init_ctx_config(struct wd_ctx_config_internal *in, WD_ERR("failed to init ctxs lock!\n"); goto err_out; } + ctxs[i].blkpool = wd_blkpool_new(ctxs[i].ctx); }
in->ctxs = ctxs; @@ -298,8 +300,13 @@ void wd_clear_ctx_config(struct wd_ctx_config_internal *in) { __u32 i;
- for (i = 0; i < in->ctx_num; i++) + for (i = 0; i < in->ctx_num; i++) { + if (in->ctxs[i].blkpool) { + wd_blkpool_destroy_sglpool(in->ctxs[i].blkpool, in->ctxs[i].h_sgl_pool); + wd_blkpool_delete(in->ctxs[i].blkpool); + } pthread_spin_destroy(&in->ctxs[i].lock); + }
in->priv = NULL; in->ctx_num = 0;
Add api wd_comp_setup_blkpool. Other alg.c will need wd_xxx_setup_blkpool as well. The reason is app does not know ctx.
It will setup blkpool for ctx[0] and sglpool for sgl mode. The blkpool will be used by app and driver.
App need call wd_xxx_setup_blkpool for user pointer mode and sgl mode. The returned blkpool will be used for wd_blkpool_alloc/free.
Alloc_sess will call wd_xxx_setup_blkpool if it is not called by app. Then uadk library will alloc blkpool and memcpy to user memory, with poorer performance.
The driver will translate va to pa when configure register.
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- include/wd_comp.h | 2 ++ libwd_comp.map | 1 + wd_comp.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+)
diff --git a/include/wd_comp.h b/include/wd_comp.h index 45994ff..a957021 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -10,6 +10,7 @@ #include <numa.h>
#include "wd_alg_common.h" +#include "wd_bmm.h"
#ifdef __cplusplus extern "C" { @@ -256,6 +257,7 @@ void wd_comp_ctx_num_uninit(void); int wd_comp_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable);
+void *wd_comp_setup_blkpool(struct wd_blkpool_setup *setup); #ifdef __cplusplus } #endif diff --git a/libwd_comp.map b/libwd_comp.map index 6b1f8c2..033b476 100644 --- a/libwd_comp.map +++ b/libwd_comp.map @@ -22,6 +22,7 @@ global: wd_comp_get_driver; wd_comp_get_msg; wd_comp_reset_sess; + wd_comp_setup_blkpool;
wd_sched_rr_instance; wd_sched_rr_alloc; diff --git a/wd_comp.c b/wd_comp.c index 647c320..692abf4 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -588,6 +588,27 @@ static int wd_comp_check_params(struct wd_comp_sess *sess, return 0; }
+void *wd_comp_setup_blkpool(struct wd_blkpool_setup *setup) +{ + struct wd_ctx_config_internal *config = &wd_comp_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; + int ret; + + if (setup->block_size < HW_CTX_SIZE) + setup->block_size = HW_CTX_SIZE; + + ret = wd_blkpool_setup(ctx->blkpool, setup); + if (ret) + return NULL; + + ctx->blkpool_mode = BLKPOOL_MODE_USER; + pthread_spin_lock(&ctx->lock); + if (ctx->h_sgl_pool == 0) + ctx->h_sgl_pool = wd_blkpool_create_sglpool(ctx->blkpool); + pthread_spin_unlock(&ctx->lock); + return ctx->blkpool; +} + static int wd_comp_sync_job(struct wd_comp_sess *sess, struct wd_comp_req *req, struct wd_comp_msg *msg)
Support sync: mempcy, user pointer and sgl case Support async: memcpy, user pointer
For flat memory: If user call wd_xxx_setup_blkpool, will use user pointer mode. uadk directly use pointer for app, assume it is continuous memory and translate va to pa when configure register
Otherwise, alloc_sess will setup blkpool and use memcpy mode. wd_comp alloc continuous memory for hardware and memcpy from src pointer and memcpy results to dst pointer
For sgl memory: App has to call wd_xxx_setup_blkpool. The wd_datalist.data has to use continuous memory
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drv/hisi_comp.c | 102 ++++++++++++++++++++++++++++---------- drv/hisi_qm_udrv.c | 14 ++++-- drv/hisi_qm_udrv.h | 3 +- drv/hisi_sec.c | 8 +-- include/drv/wd_comp_drv.h | 7 +++ wd_comp.c | 98 ++++++++++++++++++++++++++++++++++++ 6 files changed, 197 insertions(+), 35 deletions(-)
diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index cd558a8..7de055a 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -431,7 +431,14 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, if (msg->ctx_buf) ctx_buf = msg->ctx_buf + RSV_OFFSET;
- fill_buf_addr_deflate(sqe, src, dst, ctx_buf); + if (msg->blkpool) { + fill_buf_addr_deflate(sqe, + wd_blkpool_phy(msg->blkpool, src), + wd_blkpool_phy(msg->blkpool, dst), + wd_blkpool_phy(msg->blkpool, ctx_buf)); + } else { + fill_buf_addr_deflate(sqe, src, dst, ctx_buf); + }
return 0; } @@ -464,32 +471,45 @@ static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) }
static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg, struct wd_datalist *list_src, struct wd_datalist *list_dst) { void *hw_sgl_in, *hw_sgl_out; handle_t h_sgl_pool;
- h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (msg->h_sgl_pool) + h_sgl_pool = msg->h_sgl_pool; + else + h_sgl_pool = hisi_qm_get_sglpool(h_qp); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool!\n"); return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, list_src); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, list_src, msg->blkpool); if (unlikely(!hw_sgl_in)) { WD_ERR("failed to get hw sgl in!\n"); - return -WD_ENOMEM; + return -WD_EBUSY; }
- hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, list_dst); + hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, list_dst, msg->blkpool); if (unlikely(!hw_sgl_out)) { WD_ERR("failed to get hw sgl out!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); - return -WD_ENOMEM; + return -WD_EBUSY; }
- fill_buf_addr_deflate(sqe, hw_sgl_in, hw_sgl_out, NULL); + if (msg->h_sgl_pool) { + fill_buf_addr_deflate(sqe, + wd_blkpool_phy(msg->blkpool, hw_sgl_in), + wd_blkpool_phy(msg->blkpool, hw_sgl_out), + NULL); + msg->hw_sgl_in = hw_sgl_in; + msg->hw_sgl_out = hw_sgl_out; + } else { + fill_buf_addr_deflate(sqe, hw_sgl_in, hw_sgl_out, NULL); + }
return 0; } @@ -543,7 +563,7 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe,
fill_buf_type_sgl(sqe);
- ret = fill_buf_addr_deflate_sgl(h_qp, sqe, list_src, list_dst); + ret = fill_buf_addr_deflate_sgl(h_qp, sqe, msg, list_src, list_dst); if (unlikely(ret)) return ret;
@@ -738,34 +758,48 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe,
fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size);
- h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (msg->h_sgl_pool) + h_sgl_pool = msg->h_sgl_pool; + else + h_sgl_pool = hisi_qm_get_sglpool(h_qp); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool!\n"); return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_src); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_src, msg->blkpool); if (unlikely(!hw_sgl_in)) { WD_ERR("failed to get hw sgl in!\n"); return -WD_ENOMEM; }
- hw_sgl_out_lit = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_dst); + hw_sgl_out_lit = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_dst, msg->blkpool); if (unlikely(!hw_sgl_out_lit)) { WD_ERR("failed to get hw sgl out for literals!\n"); ret = -WD_ENOMEM; goto err_free_sgl_in; }
- hw_sgl_out_seq = hisi_qm_get_hw_sgl(h_sgl_pool, seq_start); + hw_sgl_out_seq = hisi_qm_get_hw_sgl(h_sgl_pool, seq_start, msg->blkpool); if (unlikely(!hw_sgl_out_seq)) { WD_ERR("failed to get hw sgl out for sequences!\n"); ret = -WD_ENOMEM; goto err_free_sgl_out_lit; }
- fill_buf_addr_lz77_zstd(sqe, hw_sgl_in, hw_sgl_out_lit, + if (msg->h_sgl_pool) { + fill_buf_addr_lz77_zstd(sqe, + wd_blkpool_phy(msg->blkpool, hw_sgl_in), + wd_blkpool_phy(msg->blkpool, hw_sgl_out_lit), + wd_blkpool_phy(msg->blkpool, hw_sgl_out_seq), + NULL); + msg->hw_sgl_in = hw_sgl_in; + msg->hw_sgl_out = hw_sgl_out_lit; + msg->hw_sgl_out_seq = hw_sgl_out_seq; + } else { + fill_buf_addr_lz77_zstd(sqe, hw_sgl_in, hw_sgl_out_lit, hw_sgl_out_seq, NULL); + }
return 0;
@@ -1116,27 +1150,41 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, }
static void free_hw_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg, enum wd_comp_alg_type alg_type) { void *hw_sgl_in, *hw_sgl_out; handle_t h_sgl_pool;
- h_sgl_pool = hisi_qm_get_sglpool(h_qp); - if (unlikely(!h_sgl_pool)) { - WD_ERR("failed to get sglpool to free hw sgl!\n"); - return; - } + if (msg->h_sgl_pool) { + h_sgl_pool = msg->h_sgl_pool; + if (unlikely(!h_sgl_pool)) { + WD_ERR("failed to get sglpool to free hw sgl!\n"); + return; + } + hisi_qm_put_hw_sgl(h_sgl_pool, msg->hw_sgl_in); + hisi_qm_put_hw_sgl(h_sgl_pool, msg->hw_sgl_out); + if (alg_type == WD_LZ77_ZSTD) + hisi_qm_put_hw_sgl(h_sgl_pool, msg->hw_sgl_out_seq); + } else {
- hw_sgl_in = VA_ADDR(sqe->source_addr_h, sqe->source_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); + h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (unlikely(!h_sgl_pool)) { + WD_ERR("failed to get sglpool to free hw sgl!\n"); + return; + }
- hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); + hw_sgl_in = VA_ADDR(sqe->source_addr_h, sqe->source_addr_l); + hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in);
- if (alg_type == WD_LZ77_ZSTD) { - hw_sgl_out = VA_ADDR(sqe->literals_addr_h, - sqe->literals_addr_l); + hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); + + if (alg_type == WD_LZ77_ZSTD) { + hw_sgl_out = VA_ADDR(sqe->literals_addr_h, + sqe->literals_addr_l); + hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); + } } }
@@ -1163,7 +1211,7 @@ static int hisi_zip_comp_send(struct wd_alg_driver *drv, handle_t ctx, void *com ret = hisi_qm_send(h_qp, &sqe, 1, &count); if (unlikely(ret < 0)) { if (msg->req.data_fmt == WD_SGL_BUF) - free_hw_sgl(h_qp, &sqe, msg->alg_type); + free_hw_sgl(h_qp, &sqe, msg, msg->alg_type); if (ret != -WD_EBUSY) WD_ERR("failed to send to hardware, ret = %d!\n", ret);
@@ -1304,7 +1352,7 @@ static int parse_zip_sqe(struct hisi_qp *qp, struct hisi_zip_sqe *sqe, recv_msg->alg_type = alg_type;
if (buf_type == WD_SGL_BUF) - free_hw_sgl((handle_t)qp, sqe, alg_type); + free_hw_sgl((handle_t)qp, sqe, recv_msg, alg_type);
if (unlikely(recv_msg->req.status == WD_IN_EPARA)) dump_zip_msg(recv_msg); diff --git a/drv/hisi_qm_udrv.c b/drv/hisi_qm_udrv.c index 304764e..78f6583 100644 --- a/drv/hisi_qm_udrv.c +++ b/drv/hisi_qm_udrv.c @@ -9,6 +9,7 @@
#include "hisi_qm_udrv.h" #include "wd_util.h" +#include "wd_bmm.h"
#define QM_DBELL_CMD_SQ 0 #define QM_DBELL_CMD_CQ 1 @@ -842,7 +843,8 @@ static void hisi_qm_dump_sgl(void *sgl) } }
-void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) +void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl, + void *blkpool) { struct hisi_sgl_pool *pool = (struct hisi_sgl_pool *)sgl_pool; struct wd_datalist *tmp = sgl; @@ -872,7 +874,10 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) goto err_out; }
- cur->sge_entries[i].buff = (uintptr_t)tmp->data; + if (blkpool) + cur->sge_entries[i].buff = (uintptr_t)wd_blkpool_phy(blkpool, tmp->data); + else + cur->sge_entries[i].buff = (uintptr_t)tmp->data; cur->sge_entries[i].len = tmp->len; cur->entry_sum_in_sgl++; cur->entry_size_in_sgl += tmp->len; @@ -890,7 +895,10 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) WD_ERR("invalid: the sgl pool is not enough!\n"); goto err_out; } - cur->next_dma = (uintptr_t)next; + if (blkpool) + cur->next_dma = (uintptr_t)wd_blkpool_phy(blkpool, next); + else + cur->next_dma = (uintptr_t)next; cur = next; head->entry_sum_in_chain += pool->sge_num; /* In the new sgl chain, the subscript must be reset */ diff --git a/drv/hisi_qm_udrv.h b/drv/hisi_qm_udrv.h index b02e8e7..ddb666e 100644 --- a/drv/hisi_qm_udrv.h +++ b/drv/hisi_qm_udrv.h @@ -162,7 +162,8 @@ void hisi_qm_destroy_sglpool(handle_t sgl_pool); * * Return the hw sgl addr which can fill into the sqe. */ -void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl); +void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl, + void *blkpool);
/** * hisi_qm_put_hw_sgl - Reback the hw sgl to the sgl pool. diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index 747d3a8..a305985 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -1050,7 +1050,7 @@ static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in)); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in), NULL); if (!hw_sgl_in) { WD_ERR("failed to get sgl in for hw_v2!\n"); return -WD_EINVAL; @@ -1060,7 +1060,7 @@ static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, hw_sgl_out = *out; } else { hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, - (struct wd_datalist *)(*out)); + (struct wd_datalist *)(*out), NULL); if (!hw_sgl_out) { WD_ERR("failed to get hw sgl out for hw_v2!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); @@ -1090,7 +1090,7 @@ static int hisi_sec_fill_sgl_v3(handle_t h_qp, __u8 **in, __u8 **out, return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in)); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in), NULL); if (!hw_sgl_in) { WD_ERR("failed to get sgl in for hw_v3!\n"); return -WD_EINVAL; @@ -1101,7 +1101,7 @@ static int hisi_sec_fill_sgl_v3(handle_t h_qp, __u8 **in, __u8 **out, sqe->bd_param |= SEC_PBUFF_MODE_MASK_V3; } else { hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, - (struct wd_datalist *)(*out)); + (struct wd_datalist *)(*out), NULL); if (!hw_sgl_out) { WD_ERR("failed to get hw sgl out for hw_v3!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); diff --git a/include/drv/wd_comp_drv.h b/include/drv/wd_comp_drv.h index 213cf2d..1b8273c 100644 --- a/include/drv/wd_comp_drv.h +++ b/include/drv/wd_comp_drv.h @@ -53,6 +53,13 @@ struct wd_comp_msg { __u32 checksum; /* Request identifier */ __u32 tag; + void *blkpool; + void *src; + void *dst; + handle_t h_sgl_pool; + void *hw_sgl_in; + void *hw_sgl_out; + void *hw_sgl_out_seq; };
struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag); diff --git a/wd_comp.c b/wd_comp.c index 692abf4..f26c5da 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -39,6 +39,10 @@ struct wd_comp_sess { __u32 checksum; __u8 *ctx_buf; void *sched_key; + void *blkpool; + __u8 *blkpool_ctxbuf; + handle_t h_sgl_pool; + __u8 blkpool_mode; };
struct wd_comp_setting { @@ -361,6 +365,7 @@ int wd_comp_poll_ctx(__u32 idx, __u32 expt, __u32 *count) struct wd_comp_req *req; __u64 recv_count = 0; __u32 tmp = expt; + void *blkpool = NULL; int ret;
if (unlikely(!count || !expt)) { @@ -374,6 +379,10 @@ int wd_comp_poll_ctx(__u32 idx, __u32 expt, __u32 *count) if (unlikely(ret)) return ret;
+ ctx = config->ctxs; + if (ctx->blkpool && ctx->blkpool_mode == BLKPOOL_MODE_MEMCPY) + blkpool = ctx->blkpool; + ctx = config->ctxs + idx;
do { @@ -394,6 +403,11 @@ int wd_comp_poll_ctx(__u32 idx, __u32 expt, __u32 *count) }
req = &msg->req; + if (blkpool) { + memcpy(msg->dst, req->dst, msg->produced); + wd_blkpool_free(blkpool, msg->req.src); + wd_blkpool_free(blkpool, msg->req.dst); + } req->src_len = msg->in_cons; req->dst_len = msg->produced; req->cb(req, req->cb_param); @@ -436,6 +450,8 @@ static int wd_comp_check_sess_params(struct wd_comp_sess_setup *setup)
handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) { + struct wd_ctx_config_internal *config = &wd_comp_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; struct wd_comp_sess *sess; int ret;
@@ -467,6 +483,28 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) goto sched_err; }
+ if (ctx->blkpool) { + sess->blkpool = ctx->blkpool; + sess->h_sgl_pool = ctx->h_sgl_pool; + sess->blkpool_mode = ctx->blkpool_mode; + + if (ctx->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + struct wd_blkpool_setup blksetup; + + memset(&blksetup, 0, sizeof(blksetup)); + blksetup.block_size = HW_CTX_SIZE; + blksetup.block_num = DEFAULT_BLOCK_NM; + blksetup.align_size = DEFAULT_ALIGN_SIZE; + ret = wd_blkpool_setup(sess->blkpool, &blksetup); + if (ret) + goto sched_err; + } + + sess->blkpool_ctxbuf = wd_blkpool_alloc(sess->blkpool, HW_CTX_SIZE); + if (!sess->blkpool_ctxbuf) + goto sched_err; + } + return (handle_t)sess;
sched_err: @@ -486,6 +524,9 @@ void wd_comp_free_sess(handle_t h_sess) if (sess->ctx_buf) free(sess->ctx_buf);
+ if (sess->blkpool_ctxbuf) + wd_blkpool_free(sess->blkpool, sess->blkpool_ctxbuf); + if (sess->sched_key) free(sess->sched_key);
@@ -506,6 +547,9 @@ int wd_comp_reset_sess(handle_t h_sess) if (sess->ctx_buf) memset(sess->ctx_buf, 0, HW_CTX_SIZE);
+ if (sess->blkpool_ctxbuf) + memset(sess->blkpool_ctxbuf, 0, HW_CTX_SIZE); + return 0; }
@@ -630,6 +674,29 @@ static int wd_comp_sync_job(struct wd_comp_sess *sess, wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); ctx = config->ctxs + idx;
+ if (sess->blkpool) { + msg->ctx_buf = sess->blkpool_ctxbuf; + msg->blkpool = sess->blkpool; + msg->h_sgl_pool = sess->h_sgl_pool; + + if (sess->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + void *src = wd_blkpool_alloc(sess->blkpool, req->src_len); + void *dst = wd_blkpool_alloc(sess->blkpool, req->dst_len); + + if (!src || !dst) + return -ENOMEM; + + /* save */ + msg->src = msg->req.src; + msg->dst = msg->req.dst; + /* replace */ + msg->req.src = src; + msg->req.dst = dst; + + memcpy(msg->req.src, msg->src, req->src_len); + } + } + msg_handle.send = wd_comp_setting.driver->send; msg_handle.recv = wd_comp_setting.driver->recv;
@@ -638,6 +705,14 @@ static int wd_comp_sync_job(struct wd_comp_sess *sess, msg, NULL, config->epoll_en); pthread_spin_unlock(&ctx->lock);
+ if (sess->blkpool) { + if (sess->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + memcpy(msg->dst, req->dst, msg->produced); + wd_blkpool_free(sess->blkpool, msg->req.src); + wd_blkpool_free(sess->blkpool, msg->req.dst); + } + } + return ret; }
@@ -893,6 +968,29 @@ int wd_do_comp_async(handle_t h_sess, struct wd_comp_req *req) msg->tag = tag; msg->stream_mode = WD_COMP_STATELESS;
+ if (sess->blkpool) { + /* async mode has no ctx_buf */ + msg->blkpool = sess->blkpool; + msg->h_sgl_pool = sess->h_sgl_pool; + + if (sess->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + void *src = wd_blkpool_alloc(sess->blkpool, req->src_len); + void *dst = wd_blkpool_alloc(sess->blkpool, req->dst_len); + + if (!src || !dst) + return -ENOMEM; + + /* save */ + msg->src = msg->req.src; + msg->dst = msg->req.dst; + /* replace */ + msg->req.src = src; + msg->req.dst = dst; + + memcpy(msg->req.src, msg->src, req->src_len); + } + } + ret = wd_alg_driver_send(wd_comp_setting.driver, ctx->ctx, msg); if (unlikely(ret < 0)) { WD_ERR("wd comp send error, ret = %d!\n", ret);
sync: mempcy mode ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --opt 0 --sync --pktlen 1024
user pointer mode, --user ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --user --opt 0 --sync --pktlen 1024
sgl mode, --sgl ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --sgl --opt 0 --sync --pktlen 1024
async: memcpy mode ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --opt 0 --async --pktlen 1024
user pointer mode, --user ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --user --opt 0 --async --pktlen 1024
sgl mode, --sgl ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --sgl --opt 0 --async --pktlen 1024
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- uadk_tool/benchmark/uadk_benchmark.c | 8 + uadk_tool/benchmark/uadk_benchmark.h | 2 + uadk_tool/benchmark/zip_uadk_benchmark.c | 249 ++++++++++++++++++++--- 3 files changed, 226 insertions(+), 33 deletions(-)
diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 1698061..064affc 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -718,6 +718,8 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) {"complevel", required_argument, 0, 16}, {"init2", no_argument, 0, 17}, {"device", required_argument, 0, 18}, + {"user", no_argument, 0, 19}, + {"sgl", no_argument, 0, 20}, {0, 0, 0, 0} };
@@ -789,6 +791,12 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) } strcpy(option->device, optarg); break; + case 19: + option->user = true; + break; + case 20: + option->sgl = true; + break; default: ACC_TST_PRT("invalid: bad input parameter!\n"); print_benchmark_help(); diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index 2739a0e..7fde341 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -81,6 +81,8 @@ struct acc_option { bool latency; u32 sched_type; int task_type; + bool user; + bool sgl; };
enum acc_type { diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index cad8016..cfb1a54 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -8,6 +8,8 @@ #include "include/wd_sched.h" #include "include/fse.h"
+#define HW_CTX_SIZE (64 * 1024) + #define ZIP_TST_PRT printf #define PATH_SIZE 64 #define ZIP_FILE "./zip" @@ -22,6 +24,8 @@ struct uadk_bd { u8 *dst; u32 src_len; u32 dst_len; + void *pool_src; + void *pool_dst; };
struct bd_pool { @@ -32,6 +36,8 @@ struct thread_pool { struct bd_pool *pool; } g_zip_pool;
+void *g_blkpool; + enum ZIP_OP_MODE { BLOCK_MODE, STREAM_MODE @@ -62,6 +68,7 @@ typedef struct uadk_thread_res { struct zip_async_tag *tag; COMP_TUPLE_TAG *ftuple; char *hw_buff_out; + bool sgl; } thread_data;
struct zip_file_head { @@ -145,7 +152,11 @@ static int save_file_data(const char *alg, u32 pkg_len, u32 optype)
// write data for one buffer one buffer to file line. for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - size = write(fd, g_zip_pool.pool[0].bds[j].dst, + if (g_blkpool) + size = write(fd, g_zip_pool.pool[0].bds[j].pool_dst, + fhead->blk_sz[j]); + else + size = write(fd, g_zip_pool.pool[0].bds[j].dst, fhead->blk_sz[j]); if (size < 0) { ZIP_TST_PRT("compress write data error size: %lu!\n", size); @@ -297,6 +308,17 @@ static void uninit_ctx_config2(void) wd_comp_uninit2(); }
+static void init_blkpool(struct acc_option *options) +{ + struct wd_blkpool_setup setup; + + memset(&setup, 0, sizeof(setup)); + setup.block_size = HW_CTX_SIZE; + setup.block_num = DEFAULT_BLOCK_NM; + setup.align_size = DEFAULT_ALIGN_SIZE; + g_blkpool = wd_comp_setup_blkpool(&setup); +} + static int init_ctx_config2(struct acc_option *options) { struct wd_ctx_params cparams = {0}; @@ -333,6 +355,10 @@ static int init_ctx_config2(struct acc_option *options) ZIP_TST_PRT("failed to do comp init2!\n");
free(ctx_set_num); + + if (options->user || options->sgl) + init_blkpool(options); + return ret; }
@@ -506,6 +532,8 @@ static int init_ctx_config(struct acc_option *options) goto free_sched; }
+ if (options->user || options->sgl) + init_blkpool(options); return 0;
free_sched: @@ -614,6 +642,13 @@ static void free_uadk_bd_pool(void) for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { free(g_zip_pool.pool[i].bds[j].src); free(g_zip_pool.pool[i].bds[j].dst); + + if (g_blkpool) { + wd_blkpool_free(g_blkpool, + g_zip_pool.pool[i].bds[j].pool_src); + wd_blkpool_free(g_blkpool, + g_zip_pool.pool[i].bds[j].pool_dst); + } } } free(g_zip_pool.pool[i].bds); @@ -975,6 +1010,7 @@ static void *zip_uadk_blk_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; + struct wd_datalist *list_src = NULL, *list_dst = NULL; struct bd_pool *uadk_pool; struct wd_comp_req creq; handle_t h_sess; @@ -1003,23 +1039,71 @@ static void *zip_uadk_blk_sync_run(void *arg) out_len = uadk_pool->bds[0].dst_len;
creq.cb = NULL; - creq.data_fmt = 0; + if (pdata->sgl) + creq.data_fmt = WD_SGL_BUF; + else + creq.data_fmt = 0; creq.priv = 0; creq.status = 0;
+ if (pdata->sgl) { + struct wd_datalist *src, *dst; + + list_src = calloc(MAX_POOL_LENTH_COMP, sizeof(struct wd_datalist)); + list_dst = calloc(MAX_POOL_LENTH_COMP, sizeof(struct wd_datalist)); + + for (i = 0; i < MAX_POOL_LENTH_COMP; i++) { + src = &list_src[i]; + dst = &list_dst[i]; + if (g_blkpool) { + src->data = uadk_pool->bds[i].pool_src; + dst->data = uadk_pool->bds[i].pool_dst; + } else { + src->data = uadk_pool->bds[i].src; + dst->data = uadk_pool->bds[i].dst; + } + src->len = uadk_pool->bds[i].src_len; + dst->len = uadk_pool->bds[i].dst_len; + src->next = (i < MAX_POOL_LENTH_COMP-1) ? &list_src[i+1] : NULL; + dst->next = (i < MAX_POOL_LENTH_COMP-1) ? &list_dst[i+1] : NULL; + } + } + while(1) { - i = count % MAX_POOL_LENTH_COMP; - creq.src = uadk_pool->bds[i].src; - creq.dst = uadk_pool->bds[i].dst; - creq.src_len = uadk_pool->bds[i].src_len; - creq.dst_len = out_len; + if (pdata->sgl) { + creq.list_src = list_src; + creq.list_dst = list_dst;
- ret = wd_do_comp_sync(h_sess, &creq); - if (ret || creq.status) - break; + creq.src_len = uadk_pool->bds[0].src_len * MAX_POOL_LENTH_COMP; + creq.dst_len = out_len * MAX_POOL_LENTH_COMP;
- count++; - uadk_pool->bds[i].dst_len = creq.dst_len; + ret = wd_do_comp_sync(h_sess, &creq); + if (ret || creq.status) + break; + count++; + uadk_pool->bds[0].dst_len = creq.dst_len; + if (get_run_state() == 0) + break; + + } else { + i = count % MAX_POOL_LENTH_COMP; + if (g_blkpool) { + creq.src = uadk_pool->bds[i].pool_src; + creq.dst = uadk_pool->bds[i].pool_dst; + } else { + creq.src = uadk_pool->bds[i].src; + creq.dst = uadk_pool->bds[i].dst; + } + creq.src_len = uadk_pool->bds[i].src_len; + creq.dst_len = out_len; + + ret = wd_do_comp_sync(h_sess, &creq); + if (ret || creq.status) + break; + + count++; + uadk_pool->bds[i].dst_len = creq.dst_len; + } if (get_run_state() == 0) break; } @@ -1028,6 +1112,10 @@ static void *zip_uadk_blk_sync_run(void *arg) cal_avg_latency(count); add_recv_data(count, g_pktlen);
+ if (pdata->sgl) { + free(list_src); + free(list_dst); + } return NULL; }
@@ -1099,6 +1187,7 @@ static void *zip_uadk_blk_async_run(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; + struct wd_datalist *list_src = NULL, *list_dst = NULL; struct bd_pool *uadk_pool; struct wd_comp_req creq; handle_t h_sess; @@ -1128,35 +1217,94 @@ static void *zip_uadk_blk_async_run(void *arg) out_len = uadk_pool->bds[0].dst_len;
creq.cb = zip_async_cb; - creq.data_fmt = 0; + if (pdata->sgl) + creq.data_fmt = WD_SGL_BUF; + else + creq.data_fmt = 0; creq.priv = 0; creq.status = 0;
+ if (pdata->sgl) { + struct wd_datalist *src, *dst; + + list_src = calloc(MAX_POOL_LENTH_COMP, sizeof(struct wd_datalist)); + list_dst = calloc(MAX_POOL_LENTH_COMP, sizeof(struct wd_datalist)); + + for (i = 0; i < MAX_POOL_LENTH_COMP; i++) { + src = &list_src[i]; + dst = &list_dst[i]; + if (g_blkpool) { + src->data = uadk_pool->bds[i].pool_src; + dst->data = uadk_pool->bds[i].pool_dst; + } else { + src->data = uadk_pool->bds[i].src; + dst->data = uadk_pool->bds[i].dst; + } + src->len = uadk_pool->bds[i].src_len; + dst->len = uadk_pool->bds[i].dst_len; + src->next = (i < MAX_POOL_LENTH_COMP-1) ? &list_src[i+1] : NULL; + dst->next = (i < MAX_POOL_LENTH_COMP-1) ? &list_dst[i+1] : NULL; + } + } + while(1) { if (get_run_state() == 0) break;
- i = count % MAX_POOL_LENTH_COMP; - creq.src = uadk_pool->bds[i].src; - creq.dst = uadk_pool->bds[i].dst; - creq.src_len = uadk_pool->bds[i].src_len; - creq.dst_len = out_len; - - pdata->tag[i].td_id = pdata->td_id; - pdata->tag[i].bd_idx = i; - creq.cb_param = &pdata->tag[i]; - - ret = wd_do_comp_async(h_sess, &creq); - if (ret == -WD_EBUSY) { - usleep(SEND_USLEEP * try_cnt); - try_cnt++; - if (try_cnt > MAX_TRY_CNT) { - ZIP_TST_PRT("Test compress send fail %d times!\n", MAX_TRY_CNT); - try_cnt = 0; + if (pdata->sgl) { + creq.list_src = list_src; + creq.list_dst = list_dst; + + creq.src_len = uadk_pool->bds[0].src_len * MAX_POOL_LENTH_COMP; + creq.dst_len = out_len * MAX_POOL_LENTH_COMP; + + pdata->tag[0].td_id = pdata->td_id; + pdata->tag[0].bd_idx = i; + creq.cb_param = &pdata->tag[0]; + + ret = wd_do_comp_async(h_sess, &creq); + if (ret == -WD_EBUSY) { + usleep(SEND_USLEEP * try_cnt); + try_cnt++; + if (try_cnt > MAX_TRY_CNT) { + ZIP_TST_PRT("Test compress send fail %d times!\n", + MAX_TRY_CNT); + try_cnt = 0; + } + continue; + } else if (ret || creq.status) { + break; + } + uadk_pool->bds[0].dst_len = creq.dst_len; + } else { + i = count % MAX_POOL_LENTH_COMP; + if (g_blkpool) { + creq.src = uadk_pool->bds[i].pool_src; + creq.dst = uadk_pool->bds[i].pool_dst; + } else { + creq.src = uadk_pool->bds[i].src; + creq.dst = uadk_pool->bds[i].dst; + } + creq.src_len = uadk_pool->bds[i].src_len; + creq.dst_len = out_len; + + pdata->tag[i].td_id = pdata->td_id; + pdata->tag[i].bd_idx = i; + creq.cb_param = &pdata->tag[i]; + + ret = wd_do_comp_async(h_sess, &creq); + if (ret == -WD_EBUSY) { + usleep(SEND_USLEEP * try_cnt); + try_cnt++; + if (try_cnt > MAX_TRY_CNT) { + ZIP_TST_PRT("Test compress send fail %d times!\n", + MAX_TRY_CNT); + try_cnt = 0; + } + continue; + } else if (ret || creq.status) { + break; } - continue; - } else if (ret || creq.status) { - break; } try_cnt = 0; count++; @@ -1202,6 +1350,7 @@ static int zip_uadk_sync_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].win_sz = threads_option.win_sz; threads_args[i].comp_lv = threads_option.comp_lv; + threads_args[i].sgl = options->sgl; threads_args[i].td_id = i; ret = pthread_create(&tdid[i], NULL, uadk_zip_sync_run, &threads_args[i]); if (ret) { @@ -1293,6 +1442,7 @@ static int zip_uadk_async_threads(struct acc_option *options) } threads_args[i].tag->recv_cnt = 0; threads_args[i].send_cnt = 0; + threads_args[i].sgl = options->sgl; ret = pthread_create(&tdid[i], NULL, uadk_zip_async_run, &threads_args[i]); if (ret) { ZIP_TST_PRT("Create async thread fail!\n"); @@ -1344,6 +1494,35 @@ async_error: return ret; }
+static int load_blkpool_data(void) +{ + int i, j; + int src_len, dst_len; + + if (!g_blkpool) + return 0; + + for (i = 0; i < g_thread_num; i++) { + for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { + src_len = g_zip_pool.pool[i].bds[j].src_len; + g_zip_pool.pool[i].bds[j].pool_src = + wd_blkpool_alloc(g_blkpool, src_len); + + dst_len = g_zip_pool.pool[i].bds[j].dst_len; + g_zip_pool.pool[i].bds[j].pool_dst = + wd_blkpool_alloc(g_blkpool, dst_len); + + if (!g_zip_pool.pool[i].bds[j].pool_src || + !g_zip_pool.pool[i].bds[j].pool_dst) + return -EINVAL; + + memcpy(g_zip_pool.pool[i].bds[j].pool_src, + g_zip_pool.pool[0].bds[j].src, src_len); + } + } + return 0; +} + int zip_uadk_benchmark(struct acc_option *options) { u32 ptime; @@ -1375,6 +1554,10 @@ int zip_uadk_benchmark(struct acc_option *options) if (ret) return ret;
+ ret = load_blkpool_data(); + if (ret) + return ret; + get_pid_cpu_time(&ptime); time_start(options->times); if (options->syncmode)
This patch addresses the SGL (Scatter-Gather List) next pointer handling issue in both SVA and non-SVA scenarios.
Background: - A single SGL can typically cover 255 SGEs (Scatter-Gather Elements) * 8MB - Multi-SGL cases require proper next pointer handling
Implementation details: 1. In SVA (Shared Virtual Addressing) case: - hisi_sgl.next_dma serves both hardware and CPU access - Address translation is handled automatically by the IOMMU
2. In non-SVA case: - hisi_sgl.next_dma contains DMA address for hardware use only - CPU cannot directly use next_dma to access SGL members - Solution: Reuse one pad1 field as hisi_sgl.next for CPU access
The modification maintains the original 64B hardware SGL header size while adding proper CPU-accessible pointer support. This ensures correct operation in both single and multi-SGL scenarios.
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drv/hisi_qm_udrv.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drv/hisi_qm_udrv.c b/drv/hisi_qm_udrv.c index 78f6583..0bb16db 100644 --- a/drv/hisi_qm_udrv.c +++ b/drv/hisi_qm_udrv.c @@ -83,7 +83,7 @@ struct hisi_sge {
/* use default hw sgl head size 64B, in little-endian */ struct hisi_sgl { - /* the next sgl address */ + /* the next sgl dma address */ uintptr_t next_dma; /* the sge num of all the sgl */ __le16 entry_sum_in_chain; @@ -92,7 +92,9 @@ struct hisi_sgl { /* the sge num in this sgl */ __le16 entry_length_in_sgl; __le16 pad0; - __le64 pad1[5]; + __le64 pad1[4]; + /* the next sgl address */ + struct hisi_sgl *next; /* valid sge buffs total size */ __le64 entry_size_in_sgl; struct hisi_sge sge_entries[]; @@ -656,6 +658,7 @@ static struct hisi_sgl *hisi_qm_align_sgl(const void *sgl, __u32 sge_num) sgl_align->entry_sum_in_chain = sge_num; sgl_align->entry_sum_in_sgl = 0; sgl_align->entry_length_in_sgl = sge_num; + sgl_align->next = 0; sgl_align->next_dma = 0;
return sgl_align; @@ -777,6 +780,7 @@ static int hisi_qm_sgl_push(struct hisi_sgl_pool *pool, struct hisi_sgl *hw_sgl) return -WD_EINVAL; }
+ hw_sgl->next = 0; hw_sgl->next_dma = 0; hw_sgl->entry_sum_in_sgl = 0; hw_sgl->entry_sum_in_chain = pool->sge_num; @@ -801,7 +805,7 @@ void hisi_qm_put_hw_sgl(handle_t sgl_pool, void *hw_sgl) return;
while (cur) { - next = (struct hisi_sgl *)cur->next_dma; + next = (struct hisi_sgl *)cur->next; ret = hisi_qm_sgl_push(pool, cur); if (ret) break; @@ -833,7 +837,7 @@ static void hisi_qm_dump_sgl(void *sgl) WD_DEBUG("[sgl-%d]->sge_entries[%d].len: %u\n", k, i, tmp->sge_entries[i].len);
- tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; k++;
if (!tmp) { @@ -899,6 +903,7 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl, cur->next_dma = (uintptr_t)wd_blkpool_phy(blkpool, next); else cur->next_dma = (uintptr_t)next; + cur->next = next; cur = next; head->entry_sum_in_chain += pool->sge_num; /* In the new sgl chain, the subscript must be reset */ @@ -957,7 +962,7 @@ static void hisi_qm_sgl_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, offset += tmp->sge_entries[i].len; }
- tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; i = 0; } } @@ -989,7 +994,7 @@ static void hisi_qm_pbuff_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, offset += tmp->sge_entries[i].len; }
- tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; i = 0; } } @@ -1008,7 +1013,7 @@ void hisi_qm_sgl_copy(void *pbuff, void *hw_sgl, __u32 offset, __u32 size, while (len + tmp->entry_size_in_sgl <= offset) { len += tmp->entry_size_in_sgl;
- tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; if (!tmp) return; }
test:
digest: uadk_tool benchmark --alg aes-128-ecb --mode sva --opt 0 \ --sync --pktlen 1024 --seconds 1 --multi 1 --thread 1 uadk_tool benchmark --alg aes-128-ecb --mode sva --opt 0 \ --async --pktlen 1024 --seconds 1 --multi 1 --thread 1
cipher: uadk_tool benchmark --alg sm4-128-cbc --mode sva --opt 0 \ --sync --pktlen 1024 --seconds 1 --multi 1 --thread 1 uadk_tool benchmark --alg sm4-128-cbc --mode sva --opt 0 \ --async --pktlen 1024 --seconds 1 --multi 1 --thread 1
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drv/hisi_sec.c | 57 ++++++++++++++++++++++++++----------- include/drv/wd_cipher_drv.h | 2 ++ include/drv/wd_digest_drv.h | 2 ++ include/wd_cipher.h | 2 ++ include/wd_digest.h | 2 ++ libwd_crypto.map | 2 ++ wd_cipher.c | 52 +++++++++++++++++++++++++++++++-- wd_digest.c | 45 +++++++++++++++++++++++++++-- 8 files changed, 142 insertions(+), 22 deletions(-)
diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index a305985..1e5d7b2 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -916,10 +916,17 @@ static int fill_cipher_bd2_mode(struct wd_cipher_msg *msg, static void fill_cipher_bd2_addr(struct wd_cipher_msg *msg, struct hisi_sec_sqe *sqe) { - sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe->type2.data_dst_addr = (__u64)(uintptr_t)msg->out; - sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->type2.c_key_addr = (__u64)(uintptr_t)msg->key; + if (msg->blkpool) { + sqe->type2.data_src_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->in); + sqe->type2.data_dst_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->out); + sqe->type2.c_ivin_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->iv); + sqe->type2.c_key_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->key); + } else { + sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; + sqe->type2.data_dst_addr = (__u64)(uintptr_t)msg->out; + sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + sqe->type2.c_key_addr = (__u64)(uintptr_t)msg->key; + } }
static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, @@ -944,8 +951,6 @@ static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_CIPHER; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); - recv_msg->in = (__u8 *)(uintptr_t)sqe->type2.data_src_addr; - recv_msg->out = (__u8 *)(uintptr_t)sqe->type2.data_dst_addr; temp_msg = wd_cipher_get_msg(qp->q_info.idx, tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -953,6 +958,8 @@ static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, qp->q_info.idx, tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->out = temp_msg->out; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; @@ -1318,10 +1325,17 @@ static int fill_cipher_bd3_mode(struct wd_cipher_msg *msg, static void fill_cipher_bd3_addr(struct wd_cipher_msg *msg, struct hisi_sec_sqe3 *sqe) { - sqe->data_src_addr = (__u64)(uintptr_t)msg->in; - sqe->data_dst_addr = (__u64)(uintptr_t)msg->out; - sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->c_key_addr = (__u64)(uintptr_t)msg->key; + if (msg->blkpool) { + sqe->data_src_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->in); + sqe->data_dst_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->out); + sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->iv); + sqe->c_key_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->key); + } else { + sqe->data_src_addr = (__u64)(uintptr_t)msg->in; + sqe->data_dst_addr = (__u64)(uintptr_t)msg->out; + sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + sqe->c_key_addr = (__u64)(uintptr_t)msg->key; + } }
static int fill_cipher_bd3(struct wd_cipher_msg *msg, struct hisi_sec_sqe3 *sqe) @@ -1429,8 +1443,6 @@ static void parse_cipher_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_CIPHER; recv_msg->data_fmt = get_data_fmt_v3(sqe->bd_param); - recv_msg->in = (__u8 *)(uintptr_t)sqe->data_src_addr; - recv_msg->out = (__u8 *)(uintptr_t)sqe->data_dst_addr; temp_msg = wd_cipher_get_msg(qp->q_info.idx, tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -1438,6 +1450,8 @@ static void parse_cipher_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, qp->q_info.idx, tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->out = temp_msg->out; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; @@ -1597,7 +1611,6 @@ static void parse_digest_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_DIGEST; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); - recv_msg->in = (__u8 *)(uintptr_t)sqe->type2.data_src_addr; temp_msg = wd_digest_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -1742,8 +1755,13 @@ static int hisi_sec_digest_send(struct wd_alg_driver *drv, handle_t ctx, void *w
sqe.sds_sa_type |= (__u8)(de | scene); sqe.type2.alen_ivllen |= (__u32)msg->in_bytes; - sqe.type2.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe.type2.mac_addr = (__u64)(uintptr_t)msg->out; + if (msg->blkpool) { + sqe.type2.data_src_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->in); + sqe.type2.mac_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->out); + } else { + sqe.type2.data_src_addr = (__u64)(uintptr_t)msg->in; + sqe.type2.mac_addr = (__u64)(uintptr_t)msg->out; + }
ret = fill_digest_bd2_alg(msg, &sqe); if (ret) @@ -1977,8 +1995,13 @@ static int hisi_sec_digest_send_v3(struct wd_alg_driver *drv, handle_t ctx, void }
sqe.a_len_key = (__u32)msg->in_bytes; - sqe.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe.mac_addr = (__u64)(uintptr_t)msg->out; + if (msg->blkpool) { + sqe.data_src_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->in); + sqe.mac_addr = (__u64)(uintptr_t)wd_blkpool_phy(msg->blkpool, msg->out); + } else { + sqe.data_src_addr = (__u64)(uintptr_t)msg->in; + sqe.mac_addr = (__u64)(uintptr_t)msg->out; + }
ret = fill_digest_bd3_alg(msg, &sqe); if (ret) diff --git a/include/drv/wd_cipher_drv.h b/include/drv/wd_cipher_drv.h index c6d8ddf..b0da45a 100644 --- a/include/drv/wd_cipher_drv.h +++ b/include/drv/wd_cipher_drv.h @@ -48,6 +48,8 @@ struct wd_cipher_msg { __u8 *in; /* output data pointer */ __u8 *out; + void *blkpool; + handle_t h_sgl_pool; };
struct wd_cipher_msg *wd_cipher_get_msg(__u32 idx, __u32 tag); diff --git a/include/drv/wd_digest_drv.h b/include/drv/wd_digest_drv.h index a55ef5b..5e693ae 100644 --- a/include/drv/wd_digest_drv.h +++ b/include/drv/wd_digest_drv.h @@ -59,6 +59,8 @@ struct wd_digest_msg { __u8 *partial_block; /* total of data for stream mode */ __u64 long_data_len; + void *blkpool; + handle_t h_sgl_pool; };
static inline enum hash_block_type get_hash_block_type(struct wd_digest_msg *msg) diff --git a/include/wd_cipher.h b/include/wd_cipher.h index d54f7fe..acb99a5 100644 --- a/include/wd_cipher.h +++ b/include/wd_cipher.h @@ -10,6 +10,7 @@ #include <dlfcn.h> #include <asm/types.h> #include "wd_alg_common.h" +#include "wd_bmm.h"
#ifdef __cplusplus extern "C" { @@ -220,6 +221,7 @@ void wd_cipher_ctx_num_uninit(void); int wd_cipher_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable);
+void *wd_cipher_setup_blkpool(struct wd_blkpool_setup *setup); #ifdef __cplusplus } #endif diff --git a/include/wd_digest.h b/include/wd_digest.h index 6ce31f2..e130ea0 100644 --- a/include/wd_digest.h +++ b/include/wd_digest.h @@ -9,6 +9,7 @@ #include <dlfcn.h>
#include "wd_alg_common.h" +#include "wd_bmm.h"
#ifdef __cplusplus extern "C" { @@ -281,6 +282,7 @@ void wd_digest_ctx_num_uninit(void); int wd_digest_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable);
+void *wd_digest_setup_blkpool(struct wd_blkpool_setup *setup); #ifdef __cplusplus } #endif diff --git a/libwd_crypto.map b/libwd_crypto.map index e8555c9..b430886 100644 --- a/libwd_crypto.map +++ b/libwd_crypto.map @@ -20,6 +20,7 @@ global: wd_cipher_set_driver; wd_cipher_get_driver; wd_cipher_get_msg; + wd_cipher_setup_blkpool;
wd_aead_init; wd_aead_uninit; @@ -66,6 +67,7 @@ global: wd_digest_set_driver; wd_digest_get_driver; wd_digest_get_msg; + wd_digest_setup_blkpool;
wd_rsa_is_crt; wd_rsa_get_key_bits; diff --git a/wd_cipher.c b/wd_cipher.c index 95d6eb6..4b0e637 100644 --- a/wd_cipher.c +++ b/wd_cipher.c @@ -66,6 +66,9 @@ struct wd_cipher_sess { unsigned char key[MAX_CIPHER_KEY_SIZE]; __u32 key_bytes; void *sched_key; + void *blkpool; + unsigned char *pool_key; + handle_t h_sgl_pool; };
struct wd_env_config wd_cipher_env_config; @@ -244,13 +247,36 @@ int wd_cipher_set_key(handle_t h_sess, const __u8 *key, __u32 key_len) }
sess->key_bytes = key_len; - memcpy(sess->key, key, key_len); + if (sess->pool_key) + memcpy(sess->pool_key, key, key_len); + else + memcpy(sess->key, key, key_len);
return 0; }
+void *wd_cipher_setup_blkpool(struct wd_blkpool_setup *setup) +{ + struct wd_ctx_config_internal *config = &wd_cipher_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; + int ret; + + ret = wd_blkpool_setup(ctx->blkpool, setup); + if (ret) + return NULL; + + ctx->blkpool_mode = BLKPOOL_MODE_USER; + pthread_spin_lock(&ctx->lock); + if (ctx->h_sgl_pool == 0) + ctx->h_sgl_pool = wd_blkpool_create_sglpool(ctx->blkpool); + pthread_spin_unlock(&ctx->lock); + return ctx->blkpool; +} + handle_t wd_cipher_alloc_sess(struct wd_cipher_sess_setup *setup) { + struct wd_ctx_config_internal *config = &wd_cipher_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; struct wd_cipher_sess *sess = NULL; bool ret;
@@ -289,6 +315,15 @@ handle_t wd_cipher_alloc_sess(struct wd_cipher_sess_setup *setup) goto free_sess; }
+ if (ctx->blkpool) { + sess->blkpool = ctx->blkpool; + sess->h_sgl_pool = ctx->h_sgl_pool; + + sess->pool_key = wd_blkpool_alloc(sess->blkpool, MAX_CIPHER_KEY_SIZE); + if (!sess->pool_key) + goto free_sess; + } + return (handle_t)sess;
free_sess: @@ -539,7 +574,10 @@ static void fill_request_msg(struct wd_cipher_msg *msg, msg->in_bytes = req->in_bytes; msg->out = req->dst; msg->out_bytes = req->out_bytes; - msg->key = sess->key; + if (sess->blkpool) + msg->key = sess->pool_key; + else + msg->key = sess->key; msg->key_bytes = sess->key_bytes; msg->iv = req->iv; msg->iv_bytes = req->iv_bytes; @@ -719,6 +757,11 @@ int wd_do_cipher_sync(handle_t h_sess, struct wd_cipher_req *req) wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); ctx = config->ctxs + idx;
+ if (sess->blkpool) { + msg.blkpool = sess->blkpool; + msg.h_sgl_pool = sess->h_sgl_pool; + } + ret = send_recv_sync(ctx, &msg); req->state = msg.result;
@@ -759,6 +802,11 @@ int wd_do_cipher_async(handle_t h_sess, struct wd_cipher_req *req) fill_request_msg(msg, req, sess); msg->tag = msg_id;
+ if (sess->blkpool) { + msg->blkpool = sess->blkpool; + msg->h_sgl_pool = sess->h_sgl_pool; + } + ret = wd_alg_driver_send(wd_cipher_setting.driver, ctx->ctx, msg); if (unlikely(ret < 0)) { if (ret != -WD_EBUSY) diff --git a/wd_digest.c b/wd_digest.c index 4c1b0ca..696f1b0 100644 --- a/wd_digest.c +++ b/wd_digest.c @@ -68,6 +68,9 @@ struct wd_digest_sess { __u32 key_bytes; void *sched_key; struct wd_digest_stream_data stream_data; + void *blkpool; + unsigned char *pool_key; + handle_t h_sgl_pool; };
struct wd_env_config wd_digest_env_config; @@ -180,14 +183,38 @@ int wd_digest_set_key(handle_t h_sess, const __u8 *key, __u32 key_len) }
sess->key_bytes = key_len; - if (key_len) - memcpy(sess->key, key, key_len); + if (key_len) { + if (sess->pool_key) + memcpy(sess->pool_key, key, key_len); + else + memcpy(sess->key, key, key_len); + }
return 0; }
+void *wd_digest_setup_blkpool(struct wd_blkpool_setup *setup) +{ + struct wd_ctx_config_internal *config = &wd_digest_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; + int ret; + + ret = wd_blkpool_setup(ctx->blkpool, setup); + if (ret) + return NULL; + + ctx->blkpool_mode = BLKPOOL_MODE_USER; + pthread_spin_lock(&ctx->lock); + if (ctx->h_sgl_pool == 0) + ctx->h_sgl_pool = wd_blkpool_create_sglpool(ctx->blkpool); + pthread_spin_unlock(&ctx->lock); + return ctx->blkpool; +} + handle_t wd_digest_alloc_sess(struct wd_digest_sess_setup *setup) { + struct wd_ctx_config_internal *config = &wd_digest_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; struct wd_digest_sess *sess = NULL; bool ret;
@@ -222,6 +249,15 @@ handle_t wd_digest_alloc_sess(struct wd_digest_sess_setup *setup) goto err_sess; }
+ if (ctx->blkpool) { + sess->blkpool = ctx->blkpool; + sess->h_sgl_pool = ctx->h_sgl_pool; + + sess->pool_key = wd_blkpool_alloc(sess->blkpool, MAX_HMAC_KEY_SIZE); + if (!sess->pool_key) + goto err_sess; + } + return (handle_t)sess;
err_sess: @@ -589,7 +625,10 @@ static void fill_request_msg(struct wd_digest_msg *msg, msg->alg_type = WD_DIGEST; msg->alg = sess->alg; msg->mode = sess->mode; - msg->key = sess->key; + if (sess->blkpool) + msg->key = sess->pool_key; + else + msg->key = sess->key; msg->key_bytes = sess->key_bytes; msg->iv = req->iv; msg->in = req->in;
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- uadk_tool/benchmark/sec_uadk_benchmark.c | 153 +++++++++++++++++++++-- 1 file changed, 140 insertions(+), 13 deletions(-)
diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c index 7cc3f4a..4fca9f7 100644 --- a/uadk_tool/benchmark/sec_uadk_benchmark.c +++ b/uadk_tool/benchmark/sec_uadk_benchmark.c @@ -29,6 +29,9 @@ struct uadk_bd { u8 *src; u8 *dst; u8 mac[SEC_MAX_MAC_LEN]; + u8 *pool_src; + u8 *pool_dst; + u8 *pool_mac; };
struct bd_pool { @@ -40,8 +43,12 @@ struct thread_pool { u8 **iv; u8 **key; u8 **hash; + u8 **pool_iv; + u8 **pool_hash; } g_uadk_pool;
+void *g_blkpool; + typedef struct uadk_thread_res { u32 subtype; u32 alg; @@ -656,6 +663,27 @@ free_ctx: return ret; }
+static void init_blkpool(int subtype) +{ + struct wd_blkpool_setup setup; + + memset(&setup, 0, sizeof(setup)); + setup.block_size = DEFAULT_BLOCK_SIZE; + setup.block_num = DEFAULT_BLOCK_NM; + setup.align_size = DEFAULT_ALIGN_SIZE; + + switch (subtype) { + case CIPHER_TYPE: + g_blkpool = wd_cipher_setup_blkpool(&setup); + break; + case DIGEST_TYPE: + g_blkpool = wd_digest_setup_blkpool(&setup); + break; + default: + break; + } +} + static int init_ctx_config(struct acc_option *options) { struct sched_params param = {0}; @@ -732,6 +760,8 @@ static int init_ctx_config(struct acc_option *options) goto free_sched; }
+ init_blkpool(subtype); + return 0;
free_sched: @@ -849,6 +879,8 @@ static int init_ctx_config2(struct acc_option *options) ret = -EINVAL; }
+ init_blkpool(subtype); + free(ctx_set_num);
return ret; @@ -954,8 +986,38 @@ static int init_ivkey_source(void) memcpy(g_uadk_pool.hash[m], aead_key, SEC_PERF_KEY_LEN); }
+ if (!g_blkpool) + return 0; + + g_uadk_pool.pool_iv = malloc(sizeof(char *) * g_thread_num); + memset(g_uadk_pool.pool_iv, 0, sizeof(char *) * g_thread_num); + for (i = 0; i < g_thread_num; i++) { + g_uadk_pool.pool_iv[i] = wd_blkpool_alloc(g_blkpool, + MAX_IVK_LENTH * sizeof(char)); + if (!g_uadk_pool.pool_iv[i]) + goto free_pool_iv; + } + g_uadk_pool.pool_hash = malloc(sizeof(char *) * g_thread_num); + memset(g_uadk_pool.pool_hash, 0, sizeof(char *) * g_thread_num); + for (m = 0; m < g_thread_num; m++) { + g_uadk_pool.pool_hash[m] = wd_blkpool_alloc(g_blkpool, + MAX_IVK_LENTH * sizeof(char)); + if (!g_uadk_pool.pool_hash[m]) + goto free_pool_hash; + + memcpy(g_uadk_pool.pool_hash[m], aead_key, SEC_PERF_KEY_LEN); + } return 0;
+free_pool_hash: + for (idx = m - 1; idx >= 0; idx--) + wd_blkpool_free(g_blkpool, g_uadk_pool.pool_hash[idx]); + free(g_uadk_pool.pool_hash); +free_pool_iv: + for (idx = i - 1; idx >= 0; idx--) + wd_blkpool_free(g_blkpool, g_uadk_pool.pool_iv[idx]); + + free(g_uadk_pool.pool_iv); free_hash: for (idx = m - 1; idx >= 0; idx--) free(g_uadk_pool.hash[idx]); @@ -983,11 +1045,17 @@ static void free_ivkey_source(void) free(g_uadk_pool.hash[i]); free(g_uadk_pool.key[i]); free(g_uadk_pool.iv[i]); + if (g_blkpool) { + wd_blkpool_free(g_blkpool, g_uadk_pool.pool_hash[i]); + wd_blkpool_free(g_blkpool, g_uadk_pool.pool_iv[i]); + } }
free(g_uadk_pool.hash); free(g_uadk_pool.key); free(g_uadk_pool.iv); + free(g_uadk_pool.pool_hash); + free(g_uadk_pool.pool_iv); }
static int init_uadk_bd_pool(void) @@ -1043,6 +1111,23 @@ static int init_uadk_bd_pool(void) memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); } } + if (!g_blkpool) + continue; + + g_uadk_pool.pool[i].bds[j].pool_src = + wd_blkpool_alloc(g_blkpool, step); + g_uadk_pool.pool[i].bds[j].pool_dst = + wd_blkpool_alloc(g_blkpool, step); + g_uadk_pool.pool[i].bds[j].pool_mac = + wd_blkpool_alloc(g_blkpool, SEC_MAX_MAC_LEN); + if (!g_uadk_pool.pool[i].bds[j].pool_src || + !g_uadk_pool.pool[i].bds[j].pool_dst || + !g_uadk_pool.pool[i].bds[j].pool_mac) + goto malloc_error3; + memcpy(g_uadk_pool.pool[i].bds[j].pool_src, + g_uadk_pool.pool[i].bds[j].src, step); + memcpy(g_uadk_pool.pool[i].bds[j].pool_mac, + g_uadk_pool.pool[i].bds[j].mac, SEC_MAX_MAC_LEN); } } } @@ -1089,6 +1174,14 @@ static void free_uadk_bd_pool(void) for (j = 0; j < MAX_POOL_LENTH; j++) { free(g_uadk_pool.pool[i].bds[j].src); free(g_uadk_pool.pool[i].bds[j].dst); + if (g_blkpool) { + wd_blkpool_free(g_blkpool, + g_uadk_pool.pool[i].bds[j].pool_src); + wd_blkpool_free(g_blkpool, + g_uadk_pool.pool[i].bds[j].pool_dst); + wd_blkpool_free(g_blkpool, + g_uadk_pool.pool[i].bds[j].pool_mac); + } } } free(g_uadk_pool.pool[i].bds); @@ -1213,7 +1306,10 @@ static void *sec_uadk_cipher_async(void *arg) return NULL;
uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; + if (g_blkpool) + priv_iv = g_uadk_pool.pool_iv[pdata->td_id]; + else + priv_iv = g_uadk_pool.iv[pdata->td_id]; priv_key = g_uadk_pool.key[pdata->td_id];
memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); @@ -1246,8 +1342,13 @@ static void *sec_uadk_cipher_async(void *arg) break; try_cnt = 0; i = count % MAX_POOL_LENTH; - creq.src = uadk_pool->bds[i].src; - creq.dst = uadk_pool->bds[i].dst; + if (g_blkpool) { + creq.src = uadk_pool->bds[i].pool_src; + creq.dst = uadk_pool->bds[i].pool_dst; + } else { + creq.src = uadk_pool->bds[i].src; + creq.dst = uadk_pool->bds[i].dst; + }
ret = wd_do_cipher_async(h_sess, &creq); if (ret < 0) { @@ -1391,7 +1492,10 @@ static void *sec_uadk_digest_async(void *arg) return NULL;
uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; + if (g_blkpool) + priv_iv = g_uadk_pool.pool_iv[pdata->td_id]; + else + priv_iv = g_uadk_pool.iv[pdata->td_id]; priv_key = g_uadk_pool.key[pdata->td_id];
memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); @@ -1423,8 +1527,13 @@ static void *sec_uadk_digest_async(void *arg) break; try_cnt = 0; i = count % MAX_POOL_LENTH; - dreq.in = uadk_pool->bds[i].src; - dreq.out = uadk_pool->bds[i].dst; + if (g_blkpool) { + dreq.in = uadk_pool->bds[i].pool_src; + dreq.out = uadk_pool->bds[i].pool_dst; + } else { + dreq.in = uadk_pool->bds[i].src; + dreq.out = uadk_pool->bds[i].dst; + }
ret = wd_do_digest_async(h_sess, &dreq); if (ret < 0) { @@ -1460,9 +1569,13 @@ static void *sec_uadk_cipher_sync(void *arg) return NULL;
uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; - priv_key = g_uadk_pool.key[pdata->td_id];
+ if (g_blkpool) + priv_iv = g_uadk_pool.pool_iv[pdata->td_id]; + else + priv_iv = g_uadk_pool.iv[pdata->td_id]; + + priv_key = g_uadk_pool.key[pdata->td_id]; memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); memset(priv_key, DEF_IVK_DATA, MAX_IVK_LENTH);
@@ -1489,8 +1602,14 @@ static void *sec_uadk_cipher_sync(void *arg)
while(1) { i = count % MAX_POOL_LENTH; - creq.src = uadk_pool->bds[i].src; - creq.dst = uadk_pool->bds[i].dst; + if (g_blkpool) { + creq.src = uadk_pool->bds[i].pool_src; + creq.dst = uadk_pool->bds[i].pool_dst; + } else { + creq.src = uadk_pool->bds[i].src; + creq.dst = uadk_pool->bds[i].dst; + } + ret = wd_do_cipher_sync(h_sess, &creq); if ((ret < 0 && ret != -WD_EBUSY) || creq.state) break; @@ -1611,7 +1730,10 @@ static void *sec_uadk_digest_sync(void *arg) return NULL;
uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; + if (g_blkpool) + priv_iv = g_uadk_pool.pool_iv[pdata->td_id]; + else + priv_iv = g_uadk_pool.iv[pdata->td_id]; priv_key = g_uadk_pool.key[pdata->td_id];
memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); @@ -1639,8 +1761,13 @@ static void *sec_uadk_digest_sync(void *arg)
while(1) { i = count % MAX_POOL_LENTH; - dreq.in = uadk_pool->bds[i].src; - dreq.out = uadk_pool->bds[i].dst; + if (g_blkpool) { + dreq.in = uadk_pool->bds[i].pool_src; + dreq.out = uadk_pool->bds[i].pool_dst; + } else { + dreq.in = uadk_pool->bds[i].src; + dreq.out = uadk_pool->bds[i].dst; + } ret = wd_do_digest_sync(h_sess, &dreq); if (ret || dreq.state) break;