Hello,
This patchset is a continuation of the work started by Andreas Herrmann to add support for dynamically resized bitmaps for IOMMU based DMA-mapping implementation for ARM architecture. Some more discussion and rationale has been discussed in the following thread: http://www.spinics.net/lists/arm-kernel/msg303732.html
The first patch adds support for on-demand extending IO address space bitmap. It is based on the original work by Andreas Herrmann, but I decided to drop arm_iommu_create_mapping() api change part. The second patch removes the 'order' hack, which was used to reduce the size of a bitmap. The first patch solved the problem of too large io address space bitmaps, so the 'order' hack is no longer needed. The parameters of the arm_iommu_create_mapping() function can be then simplified by dropping 'order' parameter without any functional change of the whole subsystem. This parameter was already a bit misunderstood, so the overall result is also a little improvement of the API.
Best regards Marek Szyprowski, PhD Samsung R&D Institute Poland
Andreas Herrmann (1): arm: dma-mapping: Add support to extend DMA IOMMU mappings
Marek Szyprowski (1): arm: dma-mapping: remove order parameter from arm_iommu_create_mapping()
arch/arm/include/asm/dma-iommu.h | 12 ++- arch/arm/mm/dma-mapping.c | 144 +++++++++++++++++++++++------ drivers/gpu/drm/exynos/exynos_drm_drv.h | 2 - drivers/gpu/drm/exynos/exynos_drm_iommu.c | 6 +- drivers/gpu/drm/exynos/exynos_drm_iommu.h | 1 - drivers/iommu/shmobile-iommu.c | 2 +- 6 files changed, 124 insertions(+), 43 deletions(-)
From: Andreas Herrmann andreas.herrmann@calxeda.com
Instead of using just one bitmap to keep track of IO virtual addresses (handed out for IOMMU use) introduce an array of bitmaps. This allows us to extend existing mappings when running out of iova space in the initial mapping etc.
If there is not enough space in the mapping to service an IO virtual address allocation request, __alloc_iova() tries to extend the mapping -- by allocating another bitmap -- and makes another allocation attempt using the freshly allocated bitmap.
This allows arm iommu drivers to start with a decent initial size when an dma_iommu_mapping is created and still to avoid running out of IO virtual addresses for the mapping.
Signed-off-by: Andreas Herrmann andreas.herrmann@calxeda.com [mszyprow: removed extensions parameter to arm_iommu_create_mapping() function, which will be modified in the next patch anyway, also some debug messages about extending bitmap] Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com --- arch/arm/include/asm/dma-iommu.h | 8 ++- arch/arm/mm/dma-mapping.c | 123 ++++++++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 21 deletions(-)
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index a8c56ac..686797c 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -13,8 +13,12 @@ struct dma_iommu_mapping { /* iommu specific data */ struct iommu_domain *domain;
- void *bitmap; - size_t bits; + unsigned long **bitmaps; /* array of bitmaps */ + unsigned int nr_bitmaps; /* nr of elements in array */ + unsigned int extensions; + size_t bitmap_size; /* size of a single bitmap */ + size_t bits; /* per bitmap */ + unsigned int size; /* per bitmap */ unsigned int order; dma_addr_t base;
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c9c6acdf..cc42bc2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1066,6 +1066,8 @@ fs_initcall(dma_debug_do_init);
/* IOMMU */
+static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); + static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, size_t size) { @@ -1073,6 +1075,8 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, unsigned int align = 0; unsigned int count, start; unsigned long flags; + dma_addr_t iova; + int i;
if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; @@ -1084,30 +1088,78 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, align = (1 << (order - mapping->order)) - 1;
spin_lock_irqsave(&mapping->lock, flags); - start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, - count, align); - if (start > mapping->bits) { - spin_unlock_irqrestore(&mapping->lock, flags); - return DMA_ERROR_CODE; + for (i = 0; i < mapping->nr_bitmaps; i++) { + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) + continue; + + bitmap_set(mapping->bitmaps[i], start, count); + break; }
- bitmap_set(mapping->bitmap, start, count); + /* + * No unused range found. Try to extend the existing mapping + * and perform a second attempt to reserve an IO virtual + * address range of size bytes. + */ + if (i == mapping->nr_bitmaps) { + if (extend_iommu_mapping(mapping)) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmaps[i], start, count); + } spin_unlock_irqrestore(&mapping->lock, flags);
- return mapping->base + (start << (mapping->order + PAGE_SHIFT)); + iova = mapping->base + (mapping->size * i); + iova += start << (mapping->order + PAGE_SHIFT); + + return iova; }
static inline void __free_iova(struct dma_iommu_mapping *mapping, dma_addr_t addr, size_t size) { - unsigned int start = (addr - mapping->base) >> - (mapping->order + PAGE_SHIFT); - unsigned int count = ((size >> PAGE_SHIFT) + - (1 << mapping->order) - 1) >> mapping->order; + unsigned int start, count; unsigned long flags; + dma_addr_t bitmap_base; + u32 bitmap_index; + + if (!size) + return; + + bitmap_index = (u32) (addr - mapping->base) / (u32) mapping->size; + BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); + + bitmap_base = mapping->base + mapping->size * bitmap_index; + + start = (addr - bitmap_base) >> (mapping->order + PAGE_SHIFT); + + if (addr + size > bitmap_base + mapping->size) { + /* + * The address range to be freed reaches into the iova + * range of the next bitmap. This should not happen as + * we don't allow this in __alloc_iova (at the + * moment). + */ + BUG(); + } else + count = ((size >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order;
spin_lock_irqsave(&mapping->lock, flags); - bitmap_clear(mapping->bitmap, start, count); + bitmap_clear(mapping->bitmaps[bitmap_index], start, count); spin_unlock_irqrestore(&mapping->lock, flags); }
@@ -1887,8 +1939,8 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, int order) { unsigned int count = size >> (PAGE_SHIFT + order); - unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); struct dma_iommu_mapping *mapping; + int extensions = 0; int err = -ENOMEM;
if (!count) @@ -1898,23 +1950,35 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, if (!mapping) goto err;
- mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!mapping->bitmap) + mapping->bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + mapping->bitmaps = kzalloc((extensions + 1) * sizeof(unsigned long *), + GFP_KERNEL); + if (!mapping->bitmaps) goto err2;
+ mapping->bitmaps[0] = kzalloc(mapping->bitmap_size, GFP_KERNEL); + if (!mapping->bitmaps[0]) + goto err3; + + mapping->nr_bitmaps = 1; + mapping->extensions = extensions; mapping->base = base; - mapping->bits = BITS_PER_BYTE * bitmap_size; + mapping->size = size; mapping->order = order; + mapping->bits = BITS_PER_BYTE * mapping->bitmap_size; + spin_lock_init(&mapping->lock);
mapping->domain = iommu_domain_alloc(bus); if (!mapping->domain) - goto err3; + goto err4;
kref_init(&mapping->kref); return mapping; +err4: + kfree(mapping->bitmaps[0]); err3: - kfree(mapping->bitmap); + kfree(mapping->bitmaps); err2: kfree(mapping); err: @@ -1924,14 +1988,35 @@ EXPORT_SYMBOL_GPL(arm_iommu_create_mapping);
static void release_iommu_mapping(struct kref *kref) { + int i; struct dma_iommu_mapping *mapping = container_of(kref, struct dma_iommu_mapping, kref);
iommu_domain_free(mapping->domain); - kfree(mapping->bitmap); + for (i = 0; i < mapping->nr_bitmaps; i++) + kfree(mapping->bitmaps[i]); + kfree(mapping->bitmaps); kfree(mapping); }
+static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) +{ + int next_bitmap; + + if (mapping->nr_bitmaps > mapping->extensions) + return -EINVAL; + + next_bitmap = mapping->nr_bitmaps; + mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, + GFP_ATOMIC); + if (!mapping->bitmaps[next_bitmap]) + return -ENOMEM; + + mapping->nr_bitmaps++; + + return 0; +} + void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) { if (mapping)
The 'order' parameter for IOMMU-aware dma-mapping implementation was introduced mainly as a hack to reduce size of the bitmap used for tracking IO virtual address space. Since now it is possible to dynamically resize the bitmap, this hack is not needed and can be removed without any impact on the client devices. This way the parameters for arm_iommu_create_mapping() becomes much easier to understand. 'size' parameter now means the maximum supported IO address space size.
The code will allocate (resize) bitmap in chunks, ensuring that a single chunk is not larger than a single memory page to avoid unreliable allocations of size larger than PAGE_SIZE in atomic context.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com --- arch/arm/include/asm/dma-iommu.h | 4 +-- arch/arm/mm/dma-mapping.c | 43 ++++++++++++++--------------- drivers/gpu/drm/exynos/exynos_drm_drv.h | 2 -- drivers/gpu/drm/exynos/exynos_drm_iommu.c | 6 ++-- drivers/gpu/drm/exynos/exynos_drm_iommu.h | 1 - drivers/iommu/shmobile-iommu.c | 2 +- 6 files changed, 25 insertions(+), 33 deletions(-)
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 686797c..eec0a12 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -19,7 +19,6 @@ struct dma_iommu_mapping { size_t bitmap_size; /* size of a single bitmap */ size_t bits; /* per bitmap */ unsigned int size; /* per bitmap */ - unsigned int order; dma_addr_t base;
spinlock_t lock; @@ -27,8 +26,7 @@ struct dma_iommu_mapping { };
struct dma_iommu_mapping * -arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, - int order); +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index cc42bc2..f62aa06 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1081,11 +1081,8 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT;
- count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + - (1 << mapping->order) - 1) >> mapping->order; - - if (order > mapping->order) - align = (1 << (order - mapping->order)) - 1; + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + align = (1 << order) - 1;
spin_lock_irqsave(&mapping->lock, flags); for (i = 0; i < mapping->nr_bitmaps; i++) { @@ -1123,7 +1120,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, spin_unlock_irqrestore(&mapping->lock, flags);
iova = mapping->base + (mapping->size * i); - iova += start << (mapping->order + PAGE_SHIFT); + iova += start << PAGE_SHIFT;
return iova; } @@ -1144,7 +1141,7 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
bitmap_base = mapping->base + mapping->size * bitmap_index;
- start = (addr - bitmap_base) >> (mapping->order + PAGE_SHIFT); + start = (addr - bitmap_base) >> PAGE_SHIFT;
if (addr + size > bitmap_base + mapping->size) { /* @@ -1155,8 +1152,7 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping, */ BUG(); } else - count = ((size >> PAGE_SHIFT) + - (1 << mapping->order) - 1) >> mapping->order; + count = size >> PAGE_SHIFT;
spin_lock_irqsave(&mapping->lock, flags); bitmap_clear(mapping->bitmaps[bitmap_index], start, count); @@ -1924,8 +1920,7 @@ struct dma_map_ops iommu_coherent_ops = { * arm_iommu_create_mapping * @bus: pointer to the bus holding the client device (for IOMMU calls) * @base: start address of the valid IO address space - * @size: size of the valid IO address space - * @order: accuracy of the IO addresses allocations + * @size: maximum size of the valid IO address space * * Creates a mapping structure which holds information about used/unused * IO address ranges, which is required to perform memory allocation and @@ -1935,37 +1930,41 @@ struct dma_map_ops iommu_coherent_ops = { * arm_iommu_attach_device function. */ struct dma_iommu_mapping * -arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, - int order) +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) { - unsigned int count = size >> (PAGE_SHIFT + order); + unsigned int bits = size >> PAGE_SHIFT; + unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); struct dma_iommu_mapping *mapping; - int extensions = 0; + int extensions = 1; int err = -ENOMEM;
- if (!count) + if (!bitmap_size) return ERR_PTR(-EINVAL);
+ if (bitmap_size > PAGE_SIZE) { + extensions = bitmap_size / PAGE_SIZE; + bitmap_size = PAGE_SIZE; + } + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); if (!mapping) goto err;
- mapping->bitmap_size = BITS_TO_LONGS(count) * sizeof(long); - mapping->bitmaps = kzalloc((extensions + 1) * sizeof(unsigned long *), + mapping->bitmap_size = bitmap_size; + mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *), GFP_KERNEL); if (!mapping->bitmaps) goto err2;
- mapping->bitmaps[0] = kzalloc(mapping->bitmap_size, GFP_KERNEL); + mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); if (!mapping->bitmaps[0]) goto err3;
mapping->nr_bitmaps = 1; mapping->extensions = extensions; mapping->base = base; - mapping->size = size; - mapping->order = order; - mapping->bits = BITS_PER_BYTE * mapping->bitmap_size; + mapping->size = bitmap_size << PAGE_SHIFT; + mapping->bits = BITS_PER_BYTE * bitmap_size;
spin_lock_init(&mapping->lock);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 0eaf5a2..a8f9dba 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -237,7 +237,6 @@ struct drm_exynos_file_private { * otherwise default one. * @da_space_size: size of device address space. * if 0 then default value is used for it. - * @da_space_order: order to device address space. */ struct exynos_drm_private { struct drm_fb_helper *fb_helper; @@ -255,7 +254,6 @@ struct exynos_drm_private {
unsigned long da_start; unsigned long da_space_size; - unsigned long da_space_order; };
/* diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.c b/drivers/gpu/drm/exynos/exynos_drm_iommu.c index fb8db03..b32b291 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.c +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.c @@ -36,12 +36,10 @@ int drm_create_iommu_mapping(struct drm_device *drm_dev) priv->da_start = EXYNOS_DEV_ADDR_START; if (!priv->da_space_size) priv->da_space_size = EXYNOS_DEV_ADDR_SIZE; - if (!priv->da_space_order) - priv->da_space_order = EXYNOS_DEV_ADDR_ORDER;
mapping = arm_iommu_create_mapping(&platform_bus_type, priv->da_start, - priv->da_space_size, - priv->da_space_order); + priv->da_space_size); + if (IS_ERR(mapping)) return PTR_ERR(mapping);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h index 598e60f..72376d4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h @@ -14,7 +14,6 @@
#define EXYNOS_DEV_ADDR_START 0x20000000 #define EXYNOS_DEV_ADDR_SIZE 0x40000000 -#define EXYNOS_DEV_ADDR_ORDER 0x0
#ifdef CONFIG_DRM_EXYNOS_IOMMU
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index 7a3b928..464acda 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -343,7 +343,7 @@ static int shmobile_iommu_add_device(struct device *dev) mapping = archdata->iommu_mapping; if (!mapping) { mapping = arm_iommu_create_mapping(&platform_bus_type, 0, - L1_LEN << 20, 0); + L1_LEN << 20); if (IS_ERR(mapping)) return PTR_ERR(mapping); archdata->iommu_mapping = mapping;
linaro-mm-sig@lists.linaro.org