Marek, Here is a patch that has fixes to get SDHC driver work as a DMA IOMMU client. Here is the overview of changes.
1. Converted the mutex to spinlock to handle atomic context calls and used spinlock in necessary places. 2. Implemented arm_iommu_map_page and arm_iommu_unmap_page, which are used by MMC host stack. 3. Fixed the bugs identified during testing with SDHC driver.
From: Krishna Reddy vdumpa@nvidia.com Date: Fri, 7 Oct 2011 17:25:59 -0700 Subject: [PATCH] ARM: dma-mapping: Implement arm_iommu_map_page/unmap_page and fix issues.
Change-Id: I47a1a0065538fa0a161dd6d551b38079bd8f84fd --- arch/arm/include/asm/dma-iommu.h | 3 +- arch/arm/mm/dma-mapping.c | 182 +++++++++++++++++++++----------------- 2 files changed, 102 insertions(+), 83 deletions(-)
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 0b2677e..ad1a4d9 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -7,6 +7,7 @@ #include <linux/scatterlist.h> #include <linux/dma-debug.h> #include <linux/kmemcheck.h> +#include <linux/spinlock_types.h>
#include <asm/memory.h>
@@ -19,7 +20,7 @@ struct dma_iommu_mapping { unsigned int order; dma_addr_t base;
- struct mutex lock; + spinlock_t lock; };
int arm_iommu_attach_device(struct device *dev, dma_addr_t base, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 020bde1..0befd88 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -739,32 +739,42 @@ fs_initcall(dma_debug_do_init);
/* IOMMU */
-static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, size_t size) +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) { - unsigned int order = get_order(size); unsigned int align = 0; unsigned int count, start; + unsigned long flags;
- if (order > mapping->order) - align = (1 << (order - mapping->order)) - 1; + count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order;
- count = ((size >> PAGE_SHIFT) + (1 << mapping->order) - 1) >> mapping->order; - - start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, count, align); - if (start > mapping->bits) + spin_lock_irqsave(&mapping->lock, flags); + start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, + 0, count, align); + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); return ~0; + }
bitmap_set(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags);
return mapping->base + (start << (mapping->order + PAGE_SHIFT)); }
-static inline void __free_iova(struct dma_iommu_mapping *mapping, dma_addr_t addr, size_t size) +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) { - unsigned int start = (addr - mapping->base) >> (mapping->order + PAGE_SHIFT); - unsigned int count = ((size >> PAGE_SHIFT) + (1 << mapping->order) - 1) >> mapping->order; + unsigned int start = (addr - mapping->base) >> + (mapping->order + PAGE_SHIFT); + unsigned int count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order; + unsigned long flags;
+ spin_lock_irqsave(&mapping->lock, flags); bitmap_clear(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); }
static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) @@ -867,7 +877,7 @@ __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) static dma_addr_t __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; - unsigned int count = size >> PAGE_SHIFT; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t dma_addr, iova; int i, ret = ~0;
@@ -892,13 +902,12 @@ fail: static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; - unsigned int count = size >> PAGE_SHIFT; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int i;
- for (i=0; i<count; i++) { - iommu_unmap(mapping->domain, iova, 0); - iova += PAGE_SIZE; - } + iova = iova & PAGE_MASK; + for (i=0; i<count; i++) + iommu_unmap(mapping->domain, iova + i * PAGE_SIZE, 0); __free_iova(mapping, iova, size); return 0; } @@ -906,7 +915,6 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); struct page **pages; void *addr = NULL; @@ -914,11 +922,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, *handle = ~0; size = PAGE_ALIGN(size);
- mutex_lock(&mapping->lock); - pages = __iommu_alloc_buffer(dev, size, gfp); if (!pages) - goto err_unlock; + goto exit;
*handle = __iommu_create_mapping(dev, pages, size); if (*handle == ~0) @@ -928,15 +934,13 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (!addr) goto err_mapping;
- mutex_unlock(&mapping->lock); return addr;
err_mapping: __iommu_remove_mapping(dev, *handle, size); err_buffer: __iommu_free_buffer(dev, pages, size); -err_unlock: - mutex_unlock(&mapping->lock); +exit: return NULL; }
@@ -944,11 +948,9 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - unsigned long user_size; struct arm_vmregion *c;
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); if (c) { @@ -981,11 +983,9 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; struct arm_vmregion *c; size = PAGE_ALIGN(size);
- mutex_lock(&mapping->lock); c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); if (c) { struct page **pages = c->priv; @@ -993,7 +993,6 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, __iommu_remove_mapping(dev, handle, size); __iommu_free_buffer(dev, pages, size); } - mutex_unlock(&mapping->lock); }
static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, @@ -1001,80 +1000,93 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, enum dma_data_direction dir) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; - dma_addr_t dma_addr, iova; + dma_addr_t iova; int ret = 0; + unsigned long i; + phys_addr_t phys = page_to_phys(sg_page(sg));
+ size = PAGE_ALIGN(size); *handle = ~0; - mutex_lock(&mapping->lock);
- iova = dma_addr = __alloc_iova(mapping, size); - if (dma_addr == 0) - goto fail; - - while (size) { - unsigned int phys = page_to_phys(sg_page(sg)); - unsigned int len = sg->offset + sg->length; + iova = __alloc_iova(mapping, size); + if (iova == 0) + return -ENOMEM;
- if (!arch_is_coherent()) - __dma_page_cpu_to_dev(sg_page(sg), sg->offset, sg->length, dir); - - while (len) { - ret = iommu_map(mapping->domain, iova, phys, 0, 0); - if (ret < 0) - goto fail; - iova += PAGE_SIZE; - len -= PAGE_SIZE; - size -= PAGE_SIZE; - } - sg = sg_next(sg); + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(sg_page(sg), sg->offset, + sg->length, dir); + for (i = 0; i < (size >> PAGE_SHIFT); i++) { + ret = iommu_map(mapping->domain, iova + i * PAGE_SIZE, + phys + i * PAGE_SIZE, 0, 0); + if (ret < 0) + goto fail; } - - *handle = dma_addr; - mutex_unlock(&mapping->lock); + *handle = iova;
return 0; fail: + while (i--) + iommu_unmap(mapping->domain, iova + i * PAGE_SIZE, 0); + __iommu_remove_mapping(dev, iova, size); - mutex_unlock(&mapping->lock); return ret; }
+static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + dma_addr_t dma_addr; + + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); + + BUG_ON((offset+size) > PAGE_SIZE); + dma_addr = __iommu_create_mapping(dev, &page, PAGE_SIZE); + return dma_addr + offset; +} + +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + phys_addr_t phys; + + phys = iommu_iova_to_phys(mapping->domain, handle); + __iommu_remove_mapping(dev, handle, size); + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(pfn_to_page(__phys_to_pfn(phys)), + phys & ~PAGE_MASK, size, dir); +} + int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct scatterlist *s = sg, *dma = sg, *start = sg; - int i, count = 1; - unsigned int offset = s->offset; - unsigned int size = s->offset + s->length; + struct scatterlist *s; + unsigned int size; + int i, count = 0;
- for (i = 1; i < nents; i++) { + for_each_sg(sg, s, nents, i) { s->dma_address = ~0; s->dma_length = 0; + size = s->offset + s->length;
- s = sg_next(s); - - if (s->offset || (size & (PAGE_SIZE - 1))) { - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) - goto bad_mapping; - - dma->dma_address += offset; - dma->dma_length = size; + if (__map_sg_chunk(dev, s, size, &s->dma_address, dir) < 0) + goto bad_mapping;
- size = offset = s->offset; - start = s; - dma = sg_next(dma); - count += 1; - } - size += sg->length; + s->dma_address += s->offset; + s->dma_length = s->length; + count++; } - __map_sg_chunk(dev, start, size, &dma->dma_address, dir); - d->dma_address += offset;
return count;
bad_mapping: - for_each_sg(sg, s, count-1, i) - __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + for_each_sg(sg, s, count, i) { + __iommu_remove_mapping(dev, sg_dma_address(s), + PAGE_ALIGN(sg_dma_len(s))); + } return 0; }
@@ -1086,9 +1098,11 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
for_each_sg(sg, s, nents, i) { if (sg_dma_len(s)) - __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + __iommu_remove_mapping(dev, sg_dma_address(s), + sg_dma_len(s)); if (!arch_is_coherent()) - __dma_page_dev_to_cpu(sg_page(sg), sg->offset, sg->length, dir); + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); } }
@@ -1108,7 +1122,8 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
for_each_sg(sg, s, nents, i) if (!arch_is_coherent()) - __dma_page_dev_to_cpu(sg_page(sg), sg->offset, sg->length, dir); + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); }
/** @@ -1126,13 +1141,16 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
for_each_sg(sg, s, nents, i) if (!arch_is_coherent()) - __dma_page_cpu_to_dev(sg_page(sg), sg->offset, sg->length, dir); + __dma_page_cpu_to_dev(sg_page(s), s->offset, + s->length, dir); }
struct dma_map_ops iommu_ops = { .alloc = arm_iommu_alloc_attrs, .free = arm_iommu_free_attrs, .mmap = arm_iommu_mmap_attrs, + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, .map_sg = arm_iommu_map_sg, .unmap_sg = arm_iommu_unmap_sg, .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, @@ -1157,7 +1175,7 @@ int arm_iommu_attach_device(struct device *dev, dma_addr_t base, size_t size, in mapping->base = base; mapping->bits = bitmap_size; mapping->order = order; - mutex_init(&mapping->lock); + spin_lock_init(&mapping->lock);
mapping->domain = iommu_domain_alloc(); if (!mapping->domain)