Add support to DNN image processing accelerator on Toshiba Visconti ARM SoCs. The accelerator is applicable to DNN inference tasks.
Signed-off-by: Yuji Ishikawa yuji2.ishikawa@toshiba.co.jp Reviewed-by: Nobuhiro Iwamatsu nobuhiro1.iwamatsu@toshiba.co.jp --- drivers/soc/visconti/Kconfig | 6 + drivers/soc/visconti/Makefile | 2 + drivers/soc/visconti/dnn/Makefile | 6 + drivers/soc/visconti/dnn/dnn.c | 533 +++++++++++++++++++++++++ drivers/soc/visconti/dnn/hwd_dnn.c | 183 +++++++++ drivers/soc/visconti/dnn/hwd_dnn.h | 68 ++++ drivers/soc/visconti/dnn/hwd_dnn_reg.h | 228 +++++++++++ drivers/soc/visconti/uapi/dnn.h | 77 ++++ 8 files changed, 1103 insertions(+) create mode 100644 drivers/soc/visconti/dnn/Makefile create mode 100644 drivers/soc/visconti/dnn/dnn.c create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.c create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.h create mode 100644 drivers/soc/visconti/dnn/hwd_dnn_reg.h create mode 100644 drivers/soc/visconti/uapi/dnn.h
diff --git a/drivers/soc/visconti/Kconfig b/drivers/soc/visconti/Kconfig index 8b1378917..a25287d0c 100644 --- a/drivers/soc/visconti/Kconfig +++ b/drivers/soc/visconti/Kconfig @@ -1 +1,7 @@ +if ARCH_VISCONTI + +config VISCONTI_DNN + bool "Visconti DNN driver" + +endif
diff --git a/drivers/soc/visconti/Makefile b/drivers/soc/visconti/Makefile index 8d710da08..b9bd0f7e2 100644 --- a/drivers/soc/visconti/Makefile +++ b/drivers/soc/visconti/Makefile @@ -4,3 +4,5 @@ #
obj-y += ipa_common.o + +obj-$(CONFIG_VISCONTI_DNN) += dnn/ diff --git a/drivers/soc/visconti/dnn/Makefile b/drivers/soc/visconti/dnn/Makefile new file mode 100644 index 000000000..52d57b60d --- /dev/null +++ b/drivers/soc/visconti/dnn/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Visconti DNN driver +# + +obj-y += dnn.o hwd_dnn.o diff --git a/drivers/soc/visconti/dnn/dnn.c b/drivers/soc/visconti/dnn/dnn.c new file mode 100644 index 000000000..aeff92037 --- /dev/null +++ b/drivers/soc/visconti/dnn/dnn.c @@ -0,0 +1,533 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* Toshiba Visconti DNN Accelerator Support + * + * (C) Copyright 2022 TOSHIBA CORPORATION + * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation + */ + +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/poll.h> +#include <linux/wait.h> + +#include "../ipa_common.h" +#include "../uapi/dnn.h" +#include "hwd_dnn.h" + +struct dnn_priv { + struct device *dev; + struct miscdevice miscdev; + struct mutex lock; + void __iomem *regs; + int irq; + wait_queue_head_t waitq; + enum drv_ipa_state status; + unsigned int poll_event; + int id; + char name[16]; + bool dma_coherent; + struct hwd_dnn_status hwd_status; + + u32 *list_vaddr[DRV_DNN_BASE_ADDR_NUM]; + dma_addr_t list_paddr[DRV_DNN_BASE_ADDR_NUM]; + unsigned int err_flags[HWD_DNN_EER_FLAG_NUM]; + + struct dma_buf_attachment *dba[DRV_DNN_BUFFER_INDEX_MAX]; + struct sg_table *sgt[DRV_DNN_BUFFER_INDEX_MAX]; + enum dma_data_direction dma_dir[DRV_DNN_BUFFER_INDEX_MAX]; + unsigned int dma_count; + + dma_addr_t buffer_iova[DRV_DNN_BUFFER_INDEX_MAX]; + + struct drv_ipa_addr temp_list[HWD_DNN_LIST_NUM_MAX]; +}; + +static uint32_t dnn_ipa_addr_to_iova(struct dnn_priv *priv, struct drv_ipa_addr addr) +{ + u32 iova = 0; + + if (addr.buffer_index < priv->dma_count && + addr.offset < priv->dba[addr.buffer_index]->dmabuf->size) + iova = priv->buffer_iova[addr.buffer_index] + addr.offset; + return iova; +} + +static int dnn_attach_dma_buf(struct dnn_priv *priv, unsigned int buffer_index, + struct drv_ipa_buffer_info buffer_info[DRV_DNN_BUFFER_INDEX_MAX]) +{ + int ret = 0; + dma_addr_t addr; + + if (buffer_index >= DRV_DNN_BUFFER_INDEX_MAX) { + dev_err(priv->dev, "Buffer index invalid: index=%d\n", buffer_index); + return -EINVAL; + } + + switch (buffer_info[buffer_index].direction) { + case DRV_IPA_DIR_NONE: + priv->dma_dir[priv->dma_count] = DMA_NONE; + break; + case DRV_IPA_DIR_TO_DEVICE: + priv->dma_dir[priv->dma_count] = DMA_TO_DEVICE; + break; + case DRV_IPA_DIR_FROM_DEVICE: + priv->dma_dir[priv->dma_count] = DMA_FROM_DEVICE; + break; + case DRV_IPA_DIR_BIDIRECTION: + priv->dma_dir[priv->dma_count] = DMA_BIDIRECTIONAL; + break; + default: + dev_err(priv->dev, "DMA direction invalid: index=%d dir=%d\n", buffer_index, + buffer_info[buffer_index].direction); + return -EINVAL; + } + + if (!buffer_info[buffer_index].coherent) { + priv->dev->dma_coherent = false; + if (priv->dma_coherent) + priv->dma_dir[priv->dma_count] = DMA_NONE; + } + + ret = ipa_attach_dmabuf(priv->dev, buffer_info[buffer_index].fd, + &priv->dba[priv->dma_count], &priv->sgt[priv->dma_count], &addr, + priv->dma_dir[priv->dma_count]); + if (ret == 0) { + priv->dma_count++; + priv->buffer_iova[buffer_index] = addr; + } + + priv->dev->dma_coherent = priv->dma_coherent; + + return ret; +} + +static void dnn_detach_dma_buf(struct dnn_priv *priv) +{ + struct dma_buf *dmabuf; + int i; + + for (i = 0; i < priv->dma_count; i++) { + dmabuf = priv->dba[i]->dmabuf; + dma_buf_unmap_attachment(priv->dba[i], priv->sgt[i], priv->dma_dir[i]); + dma_buf_detach(dmabuf, priv->dba[i]); + dma_buf_put(dmabuf); + } +} + +static irqreturn_t dnn_irq(int irq, void *dev_id) +{ + struct dnn_priv *priv = dev_id; + unsigned int event; + + event = hwd_dnn_irq_handler(priv->id); + + if (event & HWD_DNN_EVENT_EXEC_DONE) { + disable_irq_nosync(priv->irq); + return IRQ_WAKE_THREAD; + } + + return IRQ_NONE; +} + +static irqreturn_t dnn_irq_thread(int irq, void *dev_id) +{ + struct dnn_priv *priv = dev_id; + unsigned long delay = 1; + + mutex_lock(&priv->lock); + dnn_detach_dma_buf(priv); + + hwd_dnn_get_status(priv->id, &priv->hwd_status); + + priv->status = DRV_IPA_STATE_IDLE; + + /* status should be updated before poll_event so that + * when poll() returns, user context must observe state as idle + */ + smp_wmb(); + + if (priv->hwd_status.eer) + priv->poll_event = IPA_POLL_EVENT_ERROR; + else + priv->poll_event = IPA_POLL_EVENT_DONE; + + /* General barrier to avoid re-ordering of priv->poll_event=N and + * waitqueue_active() + */ + smp_mb(); + + /* Threads going to sleep in poll() can miss wakeup, when wakeup is done + * between event check in ipa_poll() and sleeping. Wakeup repeatedly. + */ + while (waitqueue_active(&priv->waitq)) { + wake_up_interruptible(&priv->waitq); + + WARN_ON(delay > IPA_WAKEUP_RETRY_DELAY); + usleep_range(delay, delay + 1); + delay += delay; + } + + mutex_unlock(&priv->lock); + + return IRQ_HANDLED; +} + +static void dnn_start(struct dnn_priv *priv, struct hwd_dnn_descriptor *desc) +{ + hwd_dnn_start(priv->id, desc); +} + +static int dnn_parse_entry_addr(struct dnn_priv *priv, int idx, struct drv_dnn_descriptor *desc, + struct hwd_dnn_descriptor *hwd_desc) +{ + dma_addr_t addr; + struct drv_ipa_addr ipa_addr; + int j, ret = 0; + + switch (desc->base_addr[idx].purpose) { + case DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT: + case DRV_DNN_BASE_ADDR_PURPOSE_INPUT: + if (copy_from_user(priv->temp_list, + (void __user *)desc->base_addr[idx].addr.list_addr, + sizeof(struct drv_ipa_addr) * (desc->list_num + 1))) { + dev_err(priv->dev, "IPA address to iova conversion error: %d\n", __LINE__); + ret = -EFAULT; + break; + } + + hwd_desc->base_addr[idx] = priv->list_paddr[idx]; + for (j = 0; j < desc->list_num + 1; j++) { + ipa_addr = priv->temp_list[j]; + addr = dnn_ipa_addr_to_iova(priv, ipa_addr); + if (addr == 0) { + dev_err(priv->dev, + "@LIST@ IPA address to iova conversion error: %d\n", + __LINE__); + ret = -EINVAL; + break; + } + dev_dbg(priv->dev, "@LIST@ %d: fd=%d %llx %x\n", idx, + desc->buffer_info[ipa_addr.buffer_index].fd, + (u64)hwd_desc->base_addr[idx], (u32)addr); + priv->list_vaddr[idx][j] = addr; + } + break; + case DRV_DNN_BASE_ADDR_PURPOSE_AWB: + ipa_addr = desc->base_addr[idx].addr.ipa_addr; + addr = dnn_ipa_addr_to_iova(priv, ipa_addr); + if (addr == 0) { + dev_err(priv->dev, "@AWB@ IPA address to iova conversion error: %d\n", + __LINE__); + ret = -EINVAL; + break; + } + hwd_desc->base_addr[idx] = addr; + dev_dbg(priv->dev, "@AWB@ %d: fd=%d %llx\n", idx, + desc->buffer_info[ipa_addr.buffer_index].fd, (u64)hwd_desc->base_addr[idx]); + break; + case DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY: + ipa_addr = desc->base_addr[idx].addr.ipa_addr; + addr = dnn_ipa_addr_to_iova(priv, ipa_addr); + if (addr == 0) { + dev_err(priv->dev, "@TEMP@ IPA address to iova conversion error: %d\n", + __LINE__); + ret = -EINVAL; + break; + } + hwd_desc->base_addr[idx] = addr; + dev_dbg(priv->dev, "@TEMP@ %d: fd=%d %llx\n", idx, + desc->buffer_info[ipa_addr.buffer_index].fd, (u64)hwd_desc->base_addr[idx]); + break; + default: + hwd_desc->base_addr[idx] = 0; + break; + } + return ret; +} + +static int dnn_ioctl_start(struct dnn_priv *priv, unsigned long arg) +{ + struct drv_dnn_descriptor desc; + struct hwd_dnn_descriptor hwd_desc; + int i, ret = 0; + + ret = mutex_lock_interruptible(&priv->lock); + if (ret) + return ret; + + if (priv->status == DRV_IPA_STATE_BUSY) { + dev_dbg(priv->dev, "busy: %d\n", priv->status); + mutex_unlock(&priv->lock); + return -EBUSY; + } + + if (copy_from_user(&desc, (void __user *)arg, sizeof(struct drv_dnn_descriptor))) { + dev_err(priv->dev, "Descriptor memory access error\n"); + ret = -EFAULT; + goto err1; + } + + if (DRV_DNN_BIT_CONFIG_DESC_FINAL != (desc.config_done & DRV_DNN_BIT_CONFIG_DESC_FINAL)) { + dev_err(priv->dev, "Descriptor configuration not complete\n"); + ret = -EINVAL; + goto err1; + } + + priv->dma_count = 0; + + /* setup buffer */ + for (i = 0; i < desc.buffer_info_num; i++) { + ret = dnn_attach_dma_buf(priv, i, desc.buffer_info); + if (ret) { + dev_err(priv->dev, "dma buf attach error: index=%d\n", i); + goto err2; + } + dev_dbg(priv->dev, "@buffer[%d]@: fd=%d %s iova=%llx\n", i, desc.buffer_info[i].fd, + desc.buffer_info[i].coherent ? "coherent" : "non-coherent", + (uint64_t)priv->buffer_iova[i]); + } + + /* get iova address of configuration area */ + hwd_desc.configuration = dnn_ipa_addr_to_iova(priv, desc.configuration); + if (hwd_desc.configuration == 0) { + dev_err(priv->dev, "Invalid IPA Address: configuration %s: %d\n", __func__, + __LINE__); + ret = -EINVAL; + goto err2; + } + + dev_dbg(priv->dev, "@config@: fd=%d %llx\n", + desc.buffer_info[desc.configuration.buffer_index].fd, + (uint64_t)hwd_desc.configuration); + + for (i = 0; i < DRV_DNN_BASE_ADDR_NUM; i++) { + ret = dnn_parse_entry_addr(priv, i, &desc, &hwd_desc); + if (ret) + goto err2; + } + + hwd_desc.configuration_size = desc.configuration_size; + hwd_desc.list_num = desc.list_num; + hwd_desc.eer_flags_addr = priv->err_flags; + hwd_desc.base_addr_flag = desc.base_addr_flag; + hwd_desc.config_done = desc.config_done; + + hwd_desc.configuration += desc.configuration_offset; + + dnn_start(priv, &hwd_desc); + + priv->poll_event = IPA_POLL_EVENT_NONE; + priv->status = DRV_IPA_STATE_BUSY; + enable_irq(priv->irq); + + mutex_unlock(&priv->lock); + + return ret; + +err2: + dnn_detach_dma_buf(priv); +err1: + mutex_unlock(&priv->lock); + return ret; +} + +static int dnn_ioctl_get_status(struct dnn_priv *priv, unsigned long arg) +{ + struct drv_dnn_status status; + struct hwd_dnn_status hwd_status; + int ret = 0, i; + + ret = mutex_lock_interruptible(&priv->lock); + if (ret) + return ret; + + if (priv->status == DRV_IPA_STATE_BUSY) + hwd_dnn_get_status(priv->id, &hwd_status); + else + hwd_status = priv->hwd_status; + + status.state = priv->status; + mutex_unlock(&priv->lock); + + status.eer_cmd = hwd_status.eer_cmd; + status.eer = hwd_status.eer; + for (i = 0; i < HWD_DNN_EER_FLAG_NUM; i++) + status.eer_flags[i] = priv->err_flags[i]; + + if (copy_to_user((void __user *)arg, &status, sizeof(struct drv_dnn_status))) { + dev_err(priv->dev, "status memory access error\n"); + ret = -EFAULT; + } + + return ret; +} + +static long dnn_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + struct dnn_priv *priv = container_of(fp->private_data, struct dnn_priv, miscdev); + int ret = 0; + + switch (cmd) { + case IOC_IPA_START: + ret = dnn_ioctl_start(priv, arg); + break; + case IOC_IPA_GET_STATUS: + ret = dnn_ioctl_get_status(priv, arg); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + + return ret; +} + +static __poll_t dnn_poll(struct file *fp, poll_table *wait) +{ + struct dnn_priv *priv = container_of(fp->private_data, struct dnn_priv, miscdev); + __poll_t mask = 0; + unsigned int poll_event; + + poll_wait(fp, &priv->waitq, wait); + + /* Barrier to avoid re-ordering of poll_wait() and event load + * Read barrier here and release barrier in poll_wait() together will + * prevent re-ordering + */ + smp_rmb(); + poll_event = priv->poll_event; + if (poll_event != IPA_POLL_EVENT_NONE) { + mask = EPOLLIN | EPOLLRDNORM; + if (poll_event == IPA_POLL_EVENT_ERROR) + mask |= EPOLLERR; + } + return mask; +} + +static const struct file_operations dnn_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = dnn_ioctl, + .poll = dnn_poll, +}; + +static int dnn_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dnn_priv *priv; + int i, ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + mutex_init(&priv->lock); + + /* update DMA mask */ + priv->dma_coherent = dev->dma_coherent; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); + if (ret) + return ret; + + priv->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->regs)) + return PTR_ERR(priv->regs); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) + return priv->irq; + + ret = devm_request_threaded_irq(dev, priv->irq, dnn_irq, dnn_irq_thread, 0, "dnn", priv); + if (ret) { + dev_err(dev, "irq request failed\n"); + return ret; + } + disable_irq(priv->irq); + + ret = of_property_read_u32(dev->of_node, "index", &priv->id); + if (ret) { + dev_err(dev, "failed to acquire irq resource\n"); + return ret; + } + + /* + * allocate uncached-memory to hold address-list for DNN operation + * to hold ptr to 32bit addr memory block + * for each base address (upto HWD_DNN_BASE_ADDR_NUM) + * for each iteration (up to HWD_DNN_LIST_NUM_MAX) + * uint32_t list_addr [HWD_DNN_BASE_ADDR_NUM] [HWD_DNN_LIST_NUM_MAX]; + */ + priv->list_vaddr[0] = + dma_alloc_wc(dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM, + &priv->list_paddr[0], GFP_KERNEL); + if (!priv->list_vaddr[0]) { + dev_err(dev, "dma_alloc_wc failed\n"); + return -ENOMEM; + } + for (i = 1; i < HWD_DNN_BASE_ADDR_NUM; i++) { + priv->list_vaddr[i] = priv->list_vaddr[0] + HWD_DNN_LIST_NUM_MAX * i; + priv->list_paddr[i] = priv->list_paddr[0] + sizeof(u32) * HWD_DNN_LIST_NUM_MAX * i; + } + + hwd_dnn_initialize(priv->id, priv->regs); + + snprintf(priv->name, sizeof(priv->name), "dnn%d", priv->id); + priv->miscdev.minor = MISC_DYNAMIC_MINOR; + priv->miscdev.name = priv->name; + priv->miscdev.fops = &dnn_fops; + ret = misc_register(&priv->miscdev); + if (ret) { + dev_err(dev, "misc registration failed\n"); + hwd_dnn_uninitialize(priv->id); + dma_free_wc(dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM, + priv->list_vaddr[0], priv->list_paddr[0]); + return ret; + } + + priv->dev = dev; + platform_set_drvdata(pdev, priv); + + init_waitqueue_head(&priv->waitq); + + priv->status = DRV_IPA_STATE_IDLE; + return 0; +} + +static int dnn_remove(struct platform_device *pdev) +{ + struct dnn_priv *priv = platform_get_drvdata(pdev); + + misc_deregister(&priv->miscdev); + hwd_dnn_uninitialize(priv->id); + dma_free_wc(&pdev->dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM, + priv->list_vaddr[0], priv->list_paddr[0]); + return 0; +} + +static const struct of_device_id dnn_of_match[] = { + { + .compatible = "toshiba,visconti-dnn", + }, + {}, +}; +MODULE_DEVICE_TABLE(platform, dnn_of_match); + +static struct platform_driver dnn_driver = { + .probe = dnn_probe, + .remove = dnn_remove, + .driver = { + .name = "visconti_dnn", + .of_match_table = of_match_ptr(dnn_of_match), + }, +}; +module_platform_driver(dnn_driver); + +MODULE_AUTHOR("Yuji Ishikawa yuji2.ishikawa@toshiba.co.jp"); +MODULE_DESCRIPTION("Toshiba Visconti DNN driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/soc/visconti/dnn/hwd_dnn.c b/drivers/soc/visconti/dnn/hwd_dnn.c new file mode 100644 index 000000000..832040329 --- /dev/null +++ b/drivers/soc/visconti/dnn/hwd_dnn.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* Toshiba Visconti DNN Accelerator Support + * + * (C) Copyright 2022 TOSHIBA CORPORATION + * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation + */ + +#include <linux/io.h> +#include <linux/errno.h> +#include <linux/types.h> + +#include "hwd_dnn.h" +#include "hwd_dnn_reg.h" + +#define HWD_DNN_INT_END BIT(0) +#define HWD_DNN_INT_EER BIT(24) +#define HWD_DNN_INT_UNUSED (0x003f0100U) +#define HWD_DNN_INT_MASK_CONFIG (HWD_DNN_INT_UNUSED | HWD_DNN_INT_EER) +#define HWD_DNN_INT_CLEAR_ALL (HWD_DNN_INT_END | HWD_DNN_INT_EER) +#define HWD_DNN_EXE_CONFIG_MODE (0x3U) + +#define HWD_DNN_CGATE_ORD_CONFIG BIT(17) + +/** + * struct hwd_dnn_resources - HWD driver internal resource structure + * + * @reg: control register + * @status: driver status + * @eer_flags_addr: Address of execution error flags + */ +struct hwd_dnn_resources { + struct hwd_dnn_reg *reg; + struct hwd_dnn_status status; + u32 *eer_flags_addr; +}; + +/* HWD driver internal resource */ +static struct hwd_dnn_resources hwd_dnn_resources[HWD_DNN_DEVICE_MAX] = {}; + +/** + * hwd_dnn_initialize() - Initialize DNN device + * + * @module_id: @ref hwd_dnn_device "id" of the h/w module + * @vaddr: register base virtual address + * Return: 0 operation completed successfully + */ +void hwd_dnn_initialize(u32 module_id, void *vaddr) +{ + struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id]; + + /* Initialize the device */ + res->reg = (struct hwd_dnn_reg *)vaddr; +} + +/** + * hwd_dnn_uninitialize() - Uninitialize DNN device + * + * @module_id: @ref hwd_dnn_device "id" of the h/w module + */ +void hwd_dnn_uninitialize(u32 module_id) +{ + struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id]; + + res->reg = NULL; +} + +/** + * hwd_dnn_start() - Start DNN device + * + * @module_id: @ref hwd_dnn_device "id" of the h/w module + * @desc: Pointer to descriptor structure + */ +void hwd_dnn_start(u32 module_id, const struct hwd_dnn_descriptor *desc) +{ + struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id]; + struct hwd_dnn_reg *reg; + u32 base_addr_flag; + int i; + + reg = res->reg; + + /* Configure the registers with user provided values */ + writel(desc->configuration & GENMASK(31, 0), ®->cfg_addr); + writel(desc->configuration_size, ®->cfg_size); + writel(desc->list_num, ®->cfg_rpt); + + base_addr_flag = desc->base_addr_flag; + for (i = 0; i < HWD_DNN_BASE_ADDR_NUM; i++) { + if (base_addr_flag & BIT(i)) { + /* 1: address list is specified */ + writel(0U, ®->base_addr[i]); + writel(desc->base_addr[i] & GENMASK(31, 0), ®->base_list[i]); + } else { + /* 0: region address is specified */ + writel(desc->base_addr[i] & GENMASK(31, 0), ®->base_addr[i]); + writel(0U, ®->base_list[i]); + } + } + + writel(base_addr_flag, ®->base_list_cfg); + + /* clear and unmask interrupts */ + writel(HWD_DNN_INT_CLEAR_ALL, ®->intstat); + writel(HWD_DNN_INT_MASK_CONFIG, ®->intmask); + + res->status.eer_cmd = 0; + res->status.eer = 0; + res->eer_flags_addr = desc->eer_flags_addr; + + /*Invalidate load command in Clock Gating Control Register */ + writel(HWD_DNN_CGATE_ORD_CONFIG, ®->cgate_ord); + + /* Sync memory barrier */ + dsb(st); + + /* Start the device */ + writel(HWD_DNN_EXE_CONFIG_MODE, ®->exe); +} + +/** + * hwd_dnn_irq_handler() - HWD DNN interrupt handler + * + * @module_id: id of the h/w module + * Return: HWD_DNN_EVENT_EXEC_DONE Bit field saying Configuration processing is completed + */ +u32 hwd_dnn_irq_handler(u32 module_id) +{ + struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id]; + struct hwd_dnn_reg *reg; + u32 event = 0; + u32 int_stat; + u32 cause; + int i; + + reg = res->reg; + + /* Read the interrupt causes */ + cause = readl(®->mask_intstat); + + /* Read the interrupt status register */ + int_stat = readl(®->intstat); + + /* + * Read the error command. + * Clearing DNN_INTSTAT.EER interrupt cause initializes the error command register. + * Read error command before clearing the interrupt cause. + */ + res->status.eer_cmd = readl(®->int_eer_cmd); + + /* Clear the interrupt causes */ + writel(cause, ®->intstat); + + if (cause & HWD_DNN_INT_END) { + /* Get EER flags, if requestd */ + if (res->eer_flags_addr) + for (i = 0; i < HWD_DNN_EER_FLAG_NUM; i++) + res->eer_flags_addr[i] = readl(®->eer_flag[i]); + + event = HWD_DNN_EVENT_EXEC_DONE; + } + + if (int_stat & HWD_DNN_INT_EER) { + /* Operation error occurred */ + res->status.eer = 1; + } + + return event; +} + +/** + * hwd_dnn_get_status() - HWD DNN Get Status + * + * @module_id: @ref hwd_dnn_device "id" of the h/w module + * @status: Pointer to status structure + */ +void hwd_dnn_get_status(u32 module_id, struct hwd_dnn_status *status) +{ + const struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id]; + + /* Update device status */ + status->eer_cmd = res->status.eer_cmd; + status->eer = res->status.eer; +} diff --git a/drivers/soc/visconti/dnn/hwd_dnn.h b/drivers/soc/visconti/dnn/hwd_dnn.h new file mode 100644 index 000000000..84425990c --- /dev/null +++ b/drivers/soc/visconti/dnn/hwd_dnn.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* Toshiba Visconti DNN Accelerator Support + * + * (C) Copyright 2022 TOSHIBA CORPORATION + * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation + */ + +#ifndef HWD_DNN_H +#define HWD_DNN_H + +/** + * enum hwd_dnn_device_id - device ids for available DNN Accelerators + */ +enum hwd_dnn_device_id { HWD_DNN_DEVICE_0 = 0, HWD_DNN_DEVICE_1 = 1, HWD_DNN_DEVICE_MAX = 2 }; + +/* DNN events returned by irq handler */ +#define HWD_DNN_EVENT_EXEC_DONE (1U) + +/* number of base address available for DNN */ +#define HWD_DNN_BASE_ADDR_NUM (8U) + +/* number of error flag registers */ +#define HWD_DNN_EER_FLAG_NUM (32U) + +/* DNN hardware can execute 1024 iterations for different dataset at a time */ +#define HWD_DNN_LIST_NUM_MAX (1024U) + +/** + * struct hwd_dnn_status - DNN HWD Driver Status + * + * @eer_cmd: Error Command + * @eer: Execution Error + * @reserved: Padding + */ +struct hwd_dnn_status { + u32 eer_cmd; + u32 eer : 1; + u32 reserved : 31; +}; + +/** + * struct hwd_dnn_descriptor - HWD DNN Descriptor + * + * @configuration: Configuration data + * @configuration_size: Configuration data size + * @list_num: Number of input/output list + * @base_addr: Base Address + * @eer_flags_addr: Address of storing execution error flags + * @base_addr_flag: Bit-fields of Base Address list config. If 1, address list. If 0, fixed address + * @config_done: Flags of called configuration + */ +struct hwd_dnn_descriptor { + u32 configuration; + u32 configuration_size; + u32 list_num; + u32 base_addr[HWD_DNN_BASE_ADDR_NUM]; + u32 *eer_flags_addr; + u32 base_addr_flag; + u16 config_done; +}; + +void hwd_dnn_initialize(u32 module_id, void *vaddr); +void hwd_dnn_uninitialize(u32 module_id); +void hwd_dnn_start(u32 module_id, const struct hwd_dnn_descriptor *desc); +u32 hwd_dnn_irq_handler(u32 module_id); +void hwd_dnn_get_status(u32 module_id, struct hwd_dnn_status *status); + +#endif /* HWD_DNN_H */ diff --git a/drivers/soc/visconti/dnn/hwd_dnn_reg.h b/drivers/soc/visconti/dnn/hwd_dnn_reg.h new file mode 100644 index 000000000..c1271e092 --- /dev/null +++ b/drivers/soc/visconti/dnn/hwd_dnn_reg.h @@ -0,0 +1,228 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* Toshiba Visconti DNN Accelerator Support + * + * (C) Copyright 2022 TOSHIBA CORPORATION + * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation + */ + +#ifndef HWD_DNN_REG_H +#define HWD_DNN_REG_H + +/** + * struct hwd_dnn_reg - DNN register address struct + */ +struct hwd_dnn_reg { + u32 version; + u32 exe; + u32 reserved_a_1; + u32 intstat; + u32 intmask; + u32 mask_intstat; + u32 reserved_a_2; + u32 int_eer_cmd; + u32 reserved_b_1[8]; + u32 cfg_cid; + u32 cfg_cnm; + u32 reserved_b_2[10]; + u32 reserved_a_3; + u32 cgate_ord; + u32 reserved_b_3[2]; + u32 eer_flag[32]; + u32 cfg_addr; + u32 cfg_size; + u32 cfg_rpt; + u32 reserved_b_4[1]; + u32 base_list_cfg; + u32 reserved_b_5[11]; + u32 base_addr[8]; + u32 reserved_b_6[24]; + u32 base_list[8]; + u32 reserved_b_7[8]; + u32 cmd_load; + u32 ld_format; + u32 reserved_b_8[14]; + u32 ld_g_size0; + u32 ld_g_size1; + u32 reserved_b_9[2]; + u32 reserved_a_4; + u32 reserved_b_10[1]; + u32 ld_g_base_id; + u32 ld_g_offset; + u32 ld_g_lptch; + u32 ld_g_cptch; + u32 ld_g_vptch; + u32 reserved_b_11[5]; + u32 ld_l_size0; + u32 ld_l_size1; + u32 reserved_b_12[2]; + u32 ld_l_addr; + u32 reserved_b_13[3]; + u32 ld_l_lptch; + u32 ld_l_cptch; + u32 ld_l_vptch; + u32 reserved_b_14[17]; + u32 reserved_a_5; + u32 reserved_b_15[3]; + u32 cmd_store; + u32 st_format; + u32 reserved_b_16[14]; + u32 st_g_size0; + u32 st_g_size1; + u32 reserved_b_17[2]; + u32 reserved_a_6; + u32 reserved_b_18[1]; + u32 st_g_base_id; + u32 st_g_offset; + u32 st_g_lptch; + u32 st_g_cptch; + u32 st_g_vptch; + u32 reserved_b_19[5]; + u32 st_l_size0; + u32 st_l_size1; + u32 reserved_b_20[2]; + u32 st_l_addr; + u32 reserved_b_21[3]; + u32 st_l_lptch; + u32 st_l_cptch; + u32 st_l_vptch; + u32 reserved_b_22[17]; + u32 reserved_a_7; + u32 reserved_b_23[3]; + u32 cmd_copy; + u32 reserved_b_24[15]; + u32 cp_src_size0; + u32 cp_src_size1; + u32 reserved_b_25[2]; + u32 cp_src_addr; + u32 reserved_b_26[3]; + u32 cp_src_lptch; + u32 cp_src_cptch; + u32 reserved_a_8; + u32 reserved_b_27[5]; + u32 cp_dst_size0; + u32 cp_dst_size1; + u32 reserved_b_28[2]; + u32 cp_dst_addr; + u32 reserved_b_29[3]; + u32 cp_dst_lptch; + u32 cp_dst_cptch; + u32 reserved_a_9; + u32 reserved_b_30[17]; + u32 reserved_a_10; + u32 reserved_b_31[3]; + u32 cmd_fill; + u32 reserved_b_32[1]; + u32 fl_cfg; + u32 reserved_b_33[13]; + u32 fl_src_size0; + u32 fl_src_size1; + u32 reserved_b_34[2]; + u32 fl_src_addr; + u32 reserved_b_35[3]; + u32 fl_src_lptch; + u32 fl_src_cptch; + u32 reserved_b_36[6]; + u32 fl_dst_size0; + u32 fl_dst_size1; + u32 reserved_b_37[2]; + u32 fl_dst_addr; + u32 reserved_b_38[3]; + u32 fl_dst_lptch; + u32 fl_dst_cptch; + u32 reserved_b_39[22]; + u32 cmd_sync; + u32 reserved_b_40[1]; + u32 cmd_srst; + u32 reserved_b_41[1]; + u32 cmd_halt; + u32 reserved_b_42[3]; + u32 cmd_finish; + u32 reserved_b_43[119]; + u32 cmd_exe; + u32 exe_mode; + u32 reserved_b_44[14]; + u32 exe_inf_size0; + u32 exe_inf_size1; + u32 reserved_b_45[2]; + u32 exe_inf_addr; + u32 reserved_b_46[3]; + u32 exe_inf_lptch; + u32 exe_inf_cptch; + u32 reserved_b_47[26]; + u32 exe_conv_bias_addr; + u32 reserved_b_48[27]; + u32 exe_conv_wt_size0; + u32 exe_conv_wt_size1; + u32 reserved_b_49[2]; + u32 exe_conv_wt_addr; + u32 reserved_b_50[3]; + u32 exe_conv_wt_lptch; + u32 exe_conv_wt_cptch; + u32 exe_conv_wt_vptch; + u32 reserved_b_51[9]; + u32 exe_conv_acv_addr; + u32 reserved_b_52[3]; + u32 exe_conv_acv_lptch; + u32 exe_conv_acv_cptch; + u32 reserved_b_53[18]; + u32 exe_conv_out; + u32 reserved_b_54[11]; + u32 actv_tbl_range; + u32 reserved_a_11; + u32 actv_tbl_prm; + u32 reserved_b_55[5]; + u32 exe_pool_size; + u32 reserved_b_56[43]; + u32 exe_pool_out; + u32 exe_pool_pe_crop; + u32 reserved_b_57[10]; + u32 exe_norm_g_prm; + u32 reserved_b_58[15]; + u32 exe_sav_max_prm; + u32 reserved_b_59[35]; + u32 exe_norm_out; + u32 reserved_b_60[23]; + u32 exe_outf_addr; + u32 reserved_b_61[3]; + u32 exe_outf_lptch; + u32 exe_outf_cptch; + u32 reserved_b_62[102]; + u32 reserved_a_12; + u32 reserved_b_63[8]; + u32 reserved_a_13; + u32 reserved_a_14; + u32 reserved_a_15; + u32 reserved_b_64[4]; + u32 reserved_a_16; + u32 reserved_a_17; + u32 reserved_b_65[46]; + u32 exe_pool_pa3_0; + u32 exe_pool_pa3_2; + u32 exe_pool_pa3_4; + u32 exe_pool_pa3_6; + u32 exe_pool_pa3_8; + u32 exe_pool_pa3_10; + u32 exe_pool_pa3_12; + u32 exe_pool_pa3_14; + u32 exe_pool_pa3_16; + u32 exe_pool_pa3_18; + u32 exe_pool_pa3_20; + u32 exe_pool_pa3_22; + u32 exe_pool_pa3_24; + u32 reserved_b_66[3]; + u32 exe_pool_pa2_0; + u32 exe_pool_pa2_2; + u32 exe_pool_pa2_4; + u32 exe_pool_pa2_6; + u32 exe_pool_pa2_8; + u32 exe_pool_pa2_10; + u32 exe_pool_pa2_12; + u32 exe_pool_pa2_14; + u32 exe_pool_pa2_16; + u32 exe_pool_pa2_18; + u32 exe_pool_pa2_20; + u32 exe_pool_pa2_22; + u32 exe_pool_pa2_24; +}; + +#endif /* HWD_DNN_REG_H */ diff --git a/drivers/soc/visconti/uapi/dnn.h b/drivers/soc/visconti/uapi/dnn.h new file mode 100644 index 000000000..972c39655 --- /dev/null +++ b/drivers/soc/visconti/uapi/dnn.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Toshiba Visconti DNN Accelerator Support + * + * (C) Copyright 2022 TOSHIBA CORPORATION + * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation + */ + +#ifndef _UAPI_LINUX_DNN_H +#define _UAPI_LINUX_DNN_H + +#include <linux/ioctl.h> +#include <linux/types.h> +#include "ipa.h" + +#define DRV_DNN_BIT_CONFIG_DESC_FINAL (0x8000U) +#define DRV_DNN_BUFFER_INDEX_MAX (15) + +#define DRV_DNN_BASE_ADDR_NUM (8U) /* DNN number of base address */ + +#define DRV_DNN_BASE_ADDR_PURPOSE_INPUT (1U) +#define DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT (2U) +#define DRV_DNN_BASE_ADDR_PURPOSE_AWB (3U) +#define DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY (4U) + +/** + * struct drv_dnn_status - DNN IPA status for IOC_IPA_GET_STATUS + * + * @state: State of driver + * @eer_cmd: Execution error command + * @eer: Execution error + * @reserved: Padding + * @eer_flags: Execution error flags + */ +struct drv_dnn_status { + enum drv_ipa_state state; + u32 eer_cmd; + u32 eer : 1; + u32 reserved : 31; + u32 eer_flags[32]; +}; + +struct drv_dnn_base_addr { + u32 purpose; + union { + struct drv_ipa_addr ipa_addr; + uintptr_t list_addr; + } addr; +}; + +/** + * struct drv_dnn_descriptor - DNN IPA Descriptor for IOC_IPA_START + * + * @configuration: Address of configuration data + * @configuration_offset: Configuration offset + * @configuration_size: Configuration data size + * @list_num: Number of input/output list + * @base_addr: Base addresses + * @base_addr_flag: Bit-fields of base_addr list config + * - if 0, fixed address + * - if 1, address list + * @config_done: Flags of called configuration + * @buffer_info: Table of buffer information + * @buffer_info_num: Number of buffer_info + */ +struct drv_dnn_descriptor { + struct drv_ipa_addr configuration; + u32 configuration_offset; + u32 configuration_size; + u32 list_num; + struct drv_dnn_base_addr base_addr[DRV_DNN_BASE_ADDR_NUM]; + u32 base_addr_flag; + u16 config_done; + struct drv_ipa_buffer_info buffer_info[DRV_DNN_BUFFER_INDEX_MAX]; + s32 buffer_info_num; +}; + +#endif /* _UAPI_LINUX_DNN_H */