Hello!
Recent changes to ioremap and unification of vmalloc regions on ARM significantly reduces the possible size of the consistent dma region. They are significantly limited allowed dma coherent/writecombine allocations.
This experimental patchset replaces custom consistent dma regions usage in dma-mapping framework in favour of generic vmalloc areas created on demand for each coherent and writecombine allocations. The main purpose for this patchset is to remove 2MiB limit of dma coherent/writecombine allocations.
Atomic allocations are served from special pool preallocated on boot, becasue vmalloc areas cannot be reliably created in atomic context.
This patch is based on vanilla v3.4-rc7 release.
Atomic allocations have been tested with s3c-sdhci driver on Samsung UniversalC210 board with dmabounce code enabled to force dma_alloc_coherent() use on each dma_map_* call (some of them are made from interrupts).
Best regards Marek Szyprowski Samsung Poland R&D Center
Changelog:
v2: - added support for atomic allocations (served from preallocated pool) - minor cleanup here and there - rebased onto v3.4-rc7
v1: http://thread.gmane.org/gmane.linux.kernel.mm/76703 - initial version
Patch summary:
Marek Szyprowski (4): mm: vmalloc: use const void * for caller argument mm: vmalloc: export find_vm_area() function mm: vmalloc: add VM_DMA flag to indicate areas used by dma-mapping framework ARM: dma-mapping: remove custom consistent dma region
Documentation/kernel-parameters.txt | 4 + arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping.c | 360 ++++++++++++++++------------------- include/linux/vmalloc.h | 10 +- mm/vmalloc.c | 31 ++-- 5 files changed, 185 insertions(+), 196 deletions(-)
'const void *' is a safer type for caller function type. This patch updates all references to caller function type.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com --- include/linux/vmalloc.h | 8 ++++---- mm/vmalloc.c | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index dcdfc2b..2e28f4d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -32,7 +32,7 @@ struct vm_struct { struct page **pages; unsigned int nr_pages; phys_addr_t phys_addr; - void *caller; + const void *caller; };
/* @@ -62,7 +62,7 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, void *caller); + pgprot_t prot, int node, const void *caller); extern void vfree(const void *addr);
extern void *vmap(struct page **pages, unsigned int count, @@ -85,13 +85,13 @@ static inline size_t get_vm_area_size(const struct vm_struct *area)
extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, - unsigned long flags, void *caller); + unsigned long flags, const void *caller); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, - void *caller); + const void *caller); extern struct vm_struct *remove_vm_area(const void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 94dff88..8bc7f3ef 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1279,7 +1279,7 @@ DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist;
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, - unsigned long flags, void *caller) + unsigned long flags, const void *caller) { vm->flags = flags; vm->addr = (void *)va->va_start; @@ -1305,7 +1305,7 @@ static void insert_vmalloc_vmlist(struct vm_struct *vm) }
static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, - unsigned long flags, void *caller) + unsigned long flags, const void *caller) { setup_vmalloc_vm(vm, va, flags, caller); insert_vmalloc_vmlist(vm); @@ -1313,7 +1313,7 @@ static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long flags, unsigned long start, - unsigned long end, int node, gfp_t gfp_mask, void *caller) + unsigned long end, int node, gfp_t gfp_mask, const void *caller) { struct vmap_area *va; struct vm_struct *area; @@ -1374,7 +1374,7 @@ EXPORT_SYMBOL_GPL(__get_vm_area);
struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, - void *caller) + const void *caller) { return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, caller); @@ -1396,7 +1396,7 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) }
struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, - void *caller) + const void *caller) { return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, -1, GFP_KERNEL, caller); @@ -1567,9 +1567,9 @@ EXPORT_SYMBOL(vmap);
static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, - int node, void *caller); + int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, - pgprot_t prot, int node, void *caller) + pgprot_t prot, int node, const void *caller) { const int order = 0; struct page **pages; @@ -1642,7 +1642,7 @@ fail: */ void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, void *caller) + pgprot_t prot, int node, const void *caller) { struct vm_struct *area; void *addr; @@ -1698,7 +1698,7 @@ fail: */ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, - int node, void *caller) + int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp_mask, prot, node, caller);
On 05/17/2012 07:54 PM, Marek Szyprowski wrote:
'const void *' is a safer type for caller function type. This patch updates all references to caller function type.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com
Reviewed-by: Minchan Kim minchan@kernel.org
find_vm_area() function is usefull for other core subsystems (like dma-mapping) to get access to vm_area internals.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com --- include/linux/vmalloc.h | 1 + mm/vmalloc.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2e28f4d..6071e91 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -93,6 +93,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long start, unsigned long end, const void *caller); extern struct vm_struct *remove_vm_area(const void *addr); +extern struct vm_struct *find_vm_area(const void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8bc7f3ef..8cb7f22 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1402,7 +1402,15 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, -1, GFP_KERNEL, caller); }
-static struct vm_struct *find_vm_area(const void *addr) +/** + * find_vm_area - find a continuous kernel virtual area + * @addr: base address + * + * Search for the kernel VM area starting at @addr, and return it. + * It is up to the caller to do all required locking to keep the returned + * pointer valid. + */ +struct vm_struct *find_vm_area(const void *addr) { struct vmap_area *va;
On 05/17/2012 07:54 PM, Marek Szyprowski wrote:
find_vm_area() function is usefull for other core subsystems (like dma-mapping) to get access to vm_area internals.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com
We can't know how you want to use this function. It would be better to fold this patch into [4/4].
include/linux/vmalloc.h | 1 + mm/vmalloc.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2e28f4d..6071e91 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -93,6 +93,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long start, unsigned long end, const void *caller); extern struct vm_struct *remove_vm_area(const void *addr); +extern struct vm_struct *find_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8bc7f3ef..8cb7f22 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1402,7 +1402,15 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, -1, GFP_KERNEL, caller); } -static struct vm_struct *find_vm_area(const void *addr) +/**
- find_vm_area - find a continuous kernel virtual area
- @addr: base address
- Search for the kernel VM area starting at @addr, and return it.
- It is up to the caller to do all required locking to keep the returned
- pointer valid.
- */
+struct vm_struct *find_vm_area(const void *addr) { struct vmap_area *va;
Add new type of vm_area intented to be used for consisten mappings created by dma-mapping framework.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com --- include/linux/vmalloc.h | 1 + mm/vmalloc.c | 3 +++ 2 files changed, 4 insertions(+)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6071e91..8a9555a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -14,6 +14,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNLIST 0x00000020 /* vm_struct is not listed in vmlist */ +#define VM_DMA 0x00000040 /* used by dma-mapping framework */ /* bits [20..32] reserved for arch specific ioremap internals */
/* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8cb7f22..9c13bab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2582,6 +2582,9 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_IOREMAP) seq_printf(m, " ioremap");
+ if (v->flags & VM_DMA) + seq_printf(m, " dma"); + if (v->flags & VM_ALLOC) seq_printf(m, " vmalloc");
On 05/17/2012 07:54 PM, Marek Szyprowski wrote:
Add new type of vm_area intented to be used for consisten mappings created by dma-mapping framework.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com
include/linux/vmalloc.h | 1 + mm/vmalloc.c | 3 +++ 2 files changed, 4 insertions(+)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6071e91..8a9555a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -14,6 +14,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNLIST 0x00000020 /* vm_struct is not listed in vmlist */ +#define VM_DMA 0x00000040 /* used by dma-mapping framework */ /* bits [20..32] reserved for arch specific ioremap internals */
/* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8cb7f22..9c13bab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2582,6 +2582,9 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_IOREMAP) seq_printf(m, " ioremap");
- if (v->flags & VM_DMA)
seq_printf(m, " dma");
Hmm, VM_DMA would become generic flag? AFAIU, maybe VM_DMA would be used only on ARM arch. Of course, it isn't performance sensitive part but there in no reason to check it, either in other architecture except ARM.
I suggest following as
#ifdef CONFIG_ARM #define VM_DMA 0x00000040 #else #define VM_DMA 0x0 #end
Maybe it could remove check code at compile time.
if (v->flags & VM_ALLOC) seq_printf(m, " vmalloc");
Hi Minchan,
On Tuesday, May 22, 2012 9:08 AM Minchan Kim wrote:
On 05/17/2012 07:54 PM, Marek Szyprowski wrote:
Add new type of vm_area intented to be used for consisten mappings created by dma-mapping framework.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Kyungmin Park kyungmin.park@samsung.com
include/linux/vmalloc.h | 1 + mm/vmalloc.c | 3 +++ 2 files changed, 4 insertions(+)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6071e91..8a9555a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -14,6 +14,7 @@ struct vm_area_struct; /* vma defining user mapping in
mm_types.h */
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNLIST 0x00000020 /* vm_struct is not listed in vmlist */ +#define VM_DMA 0x00000040 /* used by dma-mapping framework */ /* bits [20..32] reserved for arch specific ioremap internals */
/* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8cb7f22..9c13bab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2582,6 +2582,9 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_IOREMAP) seq_printf(m, " ioremap");
- if (v->flags & VM_DMA)
seq_printf(m, " dma");
Hmm, VM_DMA would become generic flag? AFAIU, maybe VM_DMA would be used only on ARM arch.
Right now yes, it will be used only on ARM architecture, but maybe other architecture will start using it once it is available.
Of course, it isn't performance sensitive part but there in no reason to check it, either in other architecture except ARM.
I suggest following as
#ifdef CONFIG_ARM #define VM_DMA 0x00000040 #else #define VM_DMA 0x0 #end
Maybe it could remove check code at compile time.
I've been told to avoid such #ifdef construction if there is no really good reason for it. The only justification was significant impact on the performance, otherwise it would be just a good example of typical over-engineering.
if (v->flags & VM_ALLOC) seq_printf(m, " vmalloc");
Best regards
On Thu, May 24, 2012 at 02:26:12PM +0200, Marek Szyprowski wrote:
On Tuesday, May 22, 2012 9:08 AM Minchan Kim wrote:
Hmm, VM_DMA would become generic flag? AFAIU, maybe VM_DMA would be used only on ARM arch.
Right now yes, it will be used only on ARM architecture, but maybe other architecture will start using it once it is available.
There's very little about the code in question that is ARM-specific to begin with. I plan to adopt similar changes on SH once the work has settled one way or the other, so we'll probably use the VMA flag there, too.
On Thu, May 24, 2012 at 8:28 AM, Paul Mundt lethal@linux-sh.org wrote:
On Thu, May 24, 2012 at 02:26:12PM +0200, Marek Szyprowski wrote:
On Tuesday, May 22, 2012 9:08 AM Minchan Kim wrote:
Hmm, VM_DMA would become generic flag? AFAIU, maybe VM_DMA would be used only on ARM arch.
Right now yes, it will be used only on ARM architecture, but maybe other architecture will start using it once it is available.
There's very little about the code in question that is ARM-specific to begin with. I plan to adopt similar changes on SH once the work has settled one way or the other, so we'll probably use the VMA flag there, too.
I don't think VM_DMA is good idea because x86_64 has two dma zones. x86 unaware patches make no sense.
Hello,
On Sunday, May 27, 2012 2:35 PM KOSAKI Motohiro wrote:
On Thu, May 24, 2012 at 8:28 AM, Paul Mundt lethal@linux-sh.org wrote:
On Thu, May 24, 2012 at 02:26:12PM +0200, Marek Szyprowski wrote:
On Tuesday, May 22, 2012 9:08 AM Minchan Kim wrote:
Hmm, VM_DMA would become generic flag? AFAIU, maybe VM_DMA would be used only on ARM arch.
Right now yes, it will be used only on ARM architecture, but maybe other architecture will start using it once it is available.
There's very little about the code in question that is ARM-specific to begin with. I plan to adopt similar changes on SH once the work has settled one way or the other, so we'll probably use the VMA flag there, too.
I don't think VM_DMA is good idea because x86_64 has two dma zones. x86 unaware patches make no sense.
I see no problems to add VM_DMA64 later if x86_64 starts using vmalloc areas for creating kernel mappings for the dma buffers (I assume that there are 2 dma zones: one 32bit and one 64bit). Right now x86 and x86_64 don't use vmalloc areas for dma buffers, so I hardly see how this patch can be considered as 'x86 unaware'.
Best regards
On Mon, May 28, 2012 at 10:19:39AM +0200, Marek Szyprowski wrote:
Hello,
On Sunday, May 27, 2012 2:35 PM KOSAKI Motohiro wrote:
On Thu, May 24, 2012 at 8:28 AM, Paul Mundt lethal@linux-sh.org wrote:
On Thu, May 24, 2012 at 02:26:12PM +0200, Marek Szyprowski wrote:
On Tuesday, May 22, 2012 9:08 AM Minchan Kim wrote:
Hmm, VM_DMA would become generic flag? AFAIU, maybe VM_DMA would be used only on ARM arch.
Right now yes, it will be used only on ARM architecture, but maybe other architecture will start using it once it is available.
There's very little about the code in question that is ARM-specific to begin with. I plan to adopt similar changes on SH once the work has settled one way or the other, so we'll probably use the VMA flag there, too.
I don't think VM_DMA is good idea because x86_64 has two dma zones. x86 unaware patches make no sense.
I see no problems to add VM_DMA64 later if x86_64 starts using vmalloc areas for creating kernel mappings for the dma buffers (I assume that there are 2 dma zones: one 32bit and one 64bit). Right now x86 and x86_64 don't use vmalloc areas for dma buffers, so I hardly see how this patch can be considered as 'x86 unaware'.
Well they do - kind off. It is usually done by calling vmalloc_32 and then using the DMA API on top of those pages (or sometimes the non-portable virt_to_phys macro).
Introducing this and replacing the vmalloc_32 with this seems like a nice step in making those device drivers APIs more portable?
This patch changes dma-mapping subsystem to use generic vmalloc areas for all consistent dma allocations. This increases the total size limit of the consistent allocations and removes platform hacks and a lot of duplicated code.
Atomic allocations are served from special pool preallocated on boot, becasue vmalloc areas cannot be reliably created in atomic context.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com --- Documentation/kernel-parameters.txt | 4 + arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping.c | 360 ++++++++++++++++------------------- 3 files changed, 171 insertions(+), 195 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1601e5..ba58f50 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -515,6 +515,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a hypervisor. Default: yes
+ coherent_pool=nn[KMG] [ARM,KNL] + Sets the size of memory pool for coherent, atomic dma + allocations. + code_bytes [X86] How many bytes of object code to print in an oops report. Range: 0 - 8192 diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c9..92b0afb 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -210,7 +210,7 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, * DMA region above it's default value of 2MB. It must be called before the * memory allocator is initialised, i.e. before any core_initcall. */ -extern void __init init_consistent_dma_size(unsigned long size); +static inline void init_consistent_dma_size(unsigned long size) { }
#ifdef CONFIG_DMABOUNCE diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4..3be4de2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,8 @@ #include <linux/dma-mapping.h> #include <linux/highmem.h> #include <linux/slab.h> +#include <linux/io.h> +#include <linux/vmalloc.h>
#include <asm/memory.h> #include <asm/highmem.h> @@ -119,210 +121,178 @@ static void __dma_free_buffer(struct page *page, size_t size) }
#ifdef CONFIG_MMU - -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) - -/* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations - */ -static pte_t **consistent_pte; - -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M - -unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; - -void __init init_consistent_dma_size(unsigned long size) -{ - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); - - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ - BUG_ON(base < VMALLOC_END); - - /* Grow region to accommodate specified size */ - if (base < consistent_base) - consistent_base = base; -} - -#include "vmregion.h" - -static struct arm_vmregion_head consistent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_end = CONSISTENT_END, -}; - #ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif
-/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - int ret = 0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int i = 0; - unsigned long base = consistent_base; - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; - - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); - if (!consistent_pte) { - pr_err("%s: no memory\n", __func__); - return -ENOMEM; - } - - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); - consistent_head.vm_start = base; - - do { - pgd = pgd_offset(&init_mm, base); - - pud = pud_alloc(&init_mm, pgd, base); - if (!pud) { - printk(KERN_ERR "%s: no pud tables\n", __func__); - ret = -ENOMEM; - break; - } - - pmd = pmd_alloc(&init_mm, pud, base); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += PMD_SIZE; - } while (base < CONSISTENT_END); - - return ret; -} - -core_initcall(consistent_init); - static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { - struct arm_vmregion *c; - size_t align; - int bit; + struct vm_struct *area; + unsigned long addr;
- if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); + area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); + + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; +} + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + struct vm_struct *area; + + read_lock(&vmlist_lock); + area = find_vm_area(cpu_addr); + if (!area) { + pr_err("%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + dump_stack(); + read_unlock(&vmlist_lock); + return; + } + unmap_kernel_range((unsigned long)cpu_addr, size); + read_unlock(&vmlist_lock); + vunmap(cpu_addr); +} + +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long count; + void *vaddr; + struct page *page; +}; + +static struct dma_pool atomic_pool = { + .size = SZ_256K, +}; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool.size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +/* + * Initialise the coherent pool for atomic allocations. + */ +static int __init atomic_pool_init(void) +{ + struct dma_pool *pool = &atomic_pool; + pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); + unsigned long count = pool->size >> PAGE_SHIFT; + gfp_t gfp = GFP_KERNEL | GFP_DMA; + unsigned long *bitmap; + struct page *page; + void *ptr; + int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; + + page = __dma_alloc_buffer(NULL, pool->size, gfp); + if (!page) + goto no_page; + + ptr = __dma_alloc_remap(page, pool->size, gfp, prot, NULL); + if (ptr) { + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->page = page; + pool->bitmap = bitmap; + pool->count = count; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); + return 0; + } + + __dma_free_buffer(page, pool->size); +no_page: + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); + return -ENOMEM; +} +core_initcall(atomic_pool_init); + +static void *__alloc_from_pool(size_t size, struct page **ret_page) +{ + struct dma_pool *pool = &atomic_pool; + unsigned int count = size >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; + size_t align; + + if (!pool->vaddr) { + pr_err("%s: coherent pool not initialised!\n", __func__); dump_stack(); return NULL; }
/* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. + * Align the region allocation - allocations from pool are rather + * small, so align them to their order in pages, minimum is a page + * size. This helps reduce fragmentation of the DMA space. */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; + align = PAGE_SIZE << get_order(size);
- /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->vm_pages = page; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(page, prot), 0); - page++; - pte++; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (size -= PAGE_SIZE); - - dsb(); - - return (void *)c->vm_start; + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->count, + 0, count, (1 << align) - 1); + if (pageno < pool->count) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->page + pageno; } - return NULL; + spin_unlock_irqrestore(&pool->lock, flags); + + return ptr; }
-static void __dma_free_remap(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) { - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags;
- c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) { - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); + if (start < pool->vaddr || start > pool->vaddr + pool->size) + return 0; + + if (start + size > pool->vaddr + pool->size) { + pr_err("%s: freeing wrong coherent size from pool\n", __func__); dump_stack(); - return; + return 0; }
- if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT;
- idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags);
- ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (pte_none(pte) || !pte_present(pte)) - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - arm_vmregion_free(&consistent_head, c); + return 1; }
#else /* !CONFIG_MMU */
#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) #define __dma_free_remap(addr, size) do { } while (0) +#define __alloc_from_pool(size, ret_page) NULL +#define __free_from_pool(addr, size) 0
#endif /* CONFIG_MMU */
@@ -345,6 +315,16 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, *handle = ~0; size = PAGE_ALIGN(size);
+ /* + * Atomic allocations need special handling + */ + if (gfp & GFP_ATOMIC && !arch_is_coherent()) { + addr = __alloc_from_pool(size, &page); + if (addr) + *handle = pfn_to_dma(dev, page_to_pfn(page)); + return addr; + } + page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL; @@ -398,24 +378,16 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long user_size, kern_size; - struct arm_vmregion *c; + unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = dma_to_pfn(dev, dma_addr); + unsigned long off = vma->vm_pgoff;
- user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - unsigned long off = vma->vm_pgoff; - - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; - - if (off < kern_size && - user_size <= (kern_size - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(c->vm_pages) + off, - user_size << PAGE_SHIFT, - vma->vm_page_prot); - } + if (off < count && user_count <= (count - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); } #endif /* CONFIG_MMU */
@@ -444,13 +416,16 @@ EXPORT_SYMBOL(dma_mmap_writecombine); */ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) { - WARN_ON(irqs_disabled()); - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return;
size = PAGE_ALIGN(size);
+ if (__free_from_pool(cpu_addr, size)) + return; + + WARN_ON(irqs_disabled()); + if (!arch_is_coherent()) __dma_free_remap(cpu_addr, size);
@@ -726,9 +701,6 @@ EXPORT_SYMBOL(dma_set_mask);
static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU - arm_vmregion_create_proc("dma-mappings", &consistent_head); -#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; }
On 05/17/2012 07:54 PM, Marek Szyprowski wrote:
This patch changes dma-mapping subsystem to use generic vmalloc areas for all consistent dma allocations. This increases the total size limit of the consistent allocations and removes platform hacks and a lot of duplicated code.
I like this patch very much! There are just small nitpicks below.
Atomic allocations are served from special pool preallocated on boot, becasue vmalloc areas cannot be reliably created in atomic context.
typo because
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com
Documentation/kernel-parameters.txt | 4 + arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping.c | 360 ++++++++++++++++------------------- 3 files changed, 171 insertions(+), 195 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1601e5..ba58f50 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -515,6 +515,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a hypervisor. Default: yes
- coherent_pool=nn[KMG] [ARM,KNL]
Sets the size of memory pool for coherent, atomic dma
allocations.
- code_bytes [X86] How many bytes of object code to print in an oops report. Range: 0 - 8192
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c9..92b0afb 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -210,7 +210,7 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
- DMA region above it's default value of 2MB. It must be called before the
- memory allocator is initialised, i.e. before any core_initcall.
*/ -extern void __init init_consistent_dma_size(unsigned long size); +static inline void init_consistent_dma_size(unsigned long size) { } #ifdef CONFIG_DMABOUNCE diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4..3be4de2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,8 @@ #include <linux/dma-mapping.h> #include <linux/highmem.h> #include <linux/slab.h> +#include <linux/io.h> +#include <linux/vmalloc.h> #include <asm/memory.h> #include <asm/highmem.h> @@ -119,210 +121,178 @@ static void __dma_free_buffer(struct page *page, size_t size) } #ifdef CONFIG_MMU
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT)
-/*
- These are the page tables (2MB each) covering uncached, DMA consistent allocations
- */
-static pte_t **consistent_pte;
-#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M
-unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE;
-void __init init_consistent_dma_size(unsigned long size) -{
- unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M);
- BUG_ON(consistent_pte); /* Check we're called before DMA region init */
- BUG_ON(base < VMALLOC_END);
- /* Grow region to accommodate specified size */
- if (base < consistent_base)
consistent_base = base;
-}
-#include "vmregion.h"
-static struct arm_vmregion_head consistent_head = {
- .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_end = CONSISTENT_END,
-};
#ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif -/*
- Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void) -{
- int ret = 0;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- int i = 0;
- unsigned long base = consistent_base;
- unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
- consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
- if (!consistent_pte) {
pr_err("%s: no memory\n", __func__);
return -ENOMEM;
- }
- pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END);
- consistent_head.vm_start = base;
- do {
pgd = pgd_offset(&init_mm, base);
pud = pud_alloc(&init_mm, pgd, base);
if (!pud) {
printk(KERN_ERR "%s: no pud tables\n", __func__);
ret = -ENOMEM;
break;
}
pmd = pmd_alloc(&init_mm, pud, base);
if (!pmd) {
printk(KERN_ERR "%s: no pmd tables\n", __func__);
ret = -ENOMEM;
break;
}
WARN_ON(!pmd_none(*pmd));
pte = pte_alloc_kernel(pmd, base);
if (!pte) {
printk(KERN_ERR "%s: no pte tables\n", __func__);
ret = -ENOMEM;
break;
}
consistent_pte[i++] = pte;
base += PMD_SIZE;
- } while (base < CONSISTENT_END);
- return ret;
-}
-core_initcall(consistent_init);
static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) {
- struct arm_vmregion *c;
- size_t align;
- int bit;
- struct vm_struct *area;
- unsigned long addr;
- if (!consistent_pte) {
printk(KERN_ERR "%s: not initialised\n", __func__);
- area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller);
Out of curiosity. Do we always map dma area into user's address space?
- if (!area)
return NULL;
- addr = (unsigned long)area->addr;
- area->phys_addr = __pfn_to_phys(page_to_pfn(page));
- if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
vunmap((void *)addr);
return NULL;
- }
- return (void *)addr;
+}
+static void __dma_free_remap(void *cpu_addr, size_t size) +{
- struct vm_struct *area;
- read_lock(&vmlist_lock);
Why do we need vmlist_lock?
- area = find_vm_area(cpu_addr);
find_vm_area only checks vmalloced regions so we need more check.
if (!area || !(area->flags & VM_DMA))
- if (!area) {
pr_err("%s: trying to free invalid coherent area: %p\n",
__func__, cpu_addr);
dump_stack();
read_unlock(&vmlist_lock);
return;
- }
- unmap_kernel_range((unsigned long)cpu_addr, size);
- read_unlock(&vmlist_lock);
- vunmap(cpu_addr);
+}
+struct dma_pool {
- size_t size;
- spinlock_t lock;
- unsigned long *bitmap;
- unsigned long count;
Nitpick. What does count mean? nr_pages?
- void *vaddr;
- struct page *page;
+};
+static struct dma_pool atomic_pool = {
- .size = SZ_256K,
+};
AFAIUC, we could set it to 2M but you are reducing it to 256K. What's the justification for that default value?
+static int __init early_coherent_pool(char *p) +{
- atomic_pool.size = memparse(p, &p);
- return 0;
+} +early_param("coherent_pool", early_coherent_pool);
+/*
- Initialise the coherent pool for atomic allocations.
- */
+static int __init atomic_pool_init(void) +{
- struct dma_pool *pool = &atomic_pool;
- pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
- unsigned long count = pool->size >> PAGE_SHIFT;
- gfp_t gfp = GFP_KERNEL | GFP_DMA;
- unsigned long *bitmap;
- struct page *page;
- void *ptr;
- int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
- bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!bitmap)
goto no_bitmap;
- page = __dma_alloc_buffer(NULL, pool->size, gfp);
- if (!page)
goto no_page;
- ptr = __dma_alloc_remap(page, pool->size, gfp, prot, NULL);
- if (ptr) {
spin_lock_init(&pool->lock);
pool->vaddr = ptr;
pool->page = page;
pool->bitmap = bitmap;
pool->count = count;
pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
(unsigned)pool->size / 1024);
return 0;
- }
- __dma_free_buffer(page, pool->size);
+no_page:
- kfree(bitmap);
+no_bitmap:
- pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
(unsigned)pool->size / 1024);
- return -ENOMEM;
+} +core_initcall(atomic_pool_init);
+static void *__alloc_from_pool(size_t size, struct page **ret_page) +{
- struct dma_pool *pool = &atomic_pool;
- unsigned int count = size >> PAGE_SHIFT;
- unsigned int pageno;
- unsigned long flags;
- void *ptr = NULL;
- size_t align;
- if (!pool->vaddr) {
dump_stack(); return NULL; }pr_err("%s: coherent pool not initialised!\n", __func__);
/*
* Align the virtual region allocation - maximum alignment is
* a section size, minimum is a page size. This helps reduce
* fragmentation of the DMA space, and also prevents allocations
* smaller than a section from crossing a section boundary.
* Align the region allocation - allocations from pool are rather
* small, so align them to their order in pages, minimum is a page
*/* size. This helps reduce fragmentation of the DMA space.
- bit = fls(size - 1);
- if (bit > SECTION_SHIFT)
bit = SECTION_SHIFT;
- align = 1 << bit;
- align = PAGE_SIZE << get_order(size);
- /*
* Allocate a virtual address in the consistent mapping region.
*/
- c = arm_vmregion_alloc(&consistent_head, align, size,
gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
- if (c) {
pte_t *pte;
int idx = CONSISTENT_PTE_INDEX(c->vm_start);
u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
pte = consistent_pte[idx] + off;
c->vm_pages = page;
do {
BUG_ON(!pte_none(*pte));
set_pte_ext(pte, mk_pte(page, prot), 0);
page++;
pte++;
off++;
if (off >= PTRS_PER_PTE) {
off = 0;
pte = consistent_pte[++idx];
}
} while (size -= PAGE_SIZE);
dsb();
return (void *)c->vm_start;
- spin_lock_irqsave(&pool->lock, flags);
- pageno = bitmap_find_next_zero_area(pool->bitmap, pool->count,
0, count, (1 << align) - 1);
- if (pageno < pool->count) {
bitmap_set(pool->bitmap, pageno, count);
ptr = pool->vaddr + PAGE_SIZE * pageno;
}*ret_page = pool->page + pageno;
- return NULL;
- spin_unlock_irqrestore(&pool->lock, flags);
- return ptr;
} -static void __dma_free_remap(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) {
- struct arm_vmregion *c;
- unsigned long addr;
- pte_t *ptep;
- int idx;
- u32 off;
- struct dma_pool *pool = &atomic_pool;
- unsigned long pageno, count;
- unsigned long flags;
- c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
- if (!c) {
printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
__func__, cpu_addr);
- if (start < pool->vaddr || start > pool->vaddr + pool->size)
return 0;
- if (start + size > pool->vaddr + pool->size) {
dump_stack();pr_err("%s: freeing wrong coherent size from pool\n", __func__);
return;
}return 0;
- if ((c->vm_end - c->vm_start) != size) {
printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
__func__, c->vm_end - c->vm_start, size);
dump_stack();
size = c->vm_end - c->vm_start;
- }
- pageno = (start - pool->vaddr) >> PAGE_SHIFT;
- count = size >> PAGE_SHIFT;
- idx = CONSISTENT_PTE_INDEX(c->vm_start);
- off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
- ptep = consistent_pte[idx] + off;
- addr = c->vm_start;
- do {
pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
- spin_lock_irqsave(&pool->lock, flags);
- bitmap_clear(pool->bitmap, pageno, count);
- spin_unlock_irqrestore(&pool->lock, flags);
ptep++;
addr += PAGE_SIZE;
off++;
if (off >= PTRS_PER_PTE) {
off = 0;
ptep = consistent_pte[++idx];
}
if (pte_none(pte) || !pte_present(pte))
printk(KERN_CRIT "%s: bad page in kernel page table\n",
__func__);
- } while (size -= PAGE_SIZE);
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
- arm_vmregion_free(&consistent_head, c);
- return 1;
} #else /* !CONFIG_MMU */ #define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) #define __dma_free_remap(addr, size) do { } while (0) +#define __alloc_from_pool(size, ret_page) NULL +#define __free_from_pool(addr, size) 0 #endif /* CONFIG_MMU */ @@ -345,6 +315,16 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, *handle = ~0; size = PAGE_ALIGN(size);
- /*
* Atomic allocations need special handling
*/
- if (gfp & GFP_ATOMIC && !arch_is_coherent()) {
addr = __alloc_from_pool(size, &page);
if (addr)
*handle = pfn_to_dma(dev, page_to_pfn(page));
return addr;
- }
- page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL;
@@ -398,24 +378,16 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU
- unsigned long user_size, kern_size;
- struct arm_vmregion *c;
- unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long pfn = dma_to_pfn(dev, dma_addr);
- unsigned long off = vma->vm_pgoff;
- user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
- if (c) {
unsigned long off = vma->vm_pgoff;
kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
if (off < kern_size &&
user_size <= (kern_size - off)) {
ret = remap_pfn_range(vma, vma->vm_start,
page_to_pfn(c->vm_pages) + off,
user_size << PAGE_SHIFT,
vma->vm_page_prot);
}
- if (off < count && user_count <= (count - off)) {
ret = remap_pfn_range(vma, vma->vm_start,
pfn + off,
user_count << PAGE_SHIFT,
}vma->vm_page_prot);
#endif /* CONFIG_MMU */ @@ -444,13 +416,16 @@ EXPORT_SYMBOL(dma_mmap_writecombine); */ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) {
- WARN_ON(irqs_disabled());
- if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return;
size = PAGE_ALIGN(size);
- if (__free_from_pool(cpu_addr, size))
return;
- WARN_ON(irqs_disabled());
- if (!arch_is_coherent()) __dma_free_remap(cpu_addr, size);
@@ -726,9 +701,6 @@ EXPORT_SYMBOL(dma_set_mask); static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU
- arm_vmregion_create_proc("dma-mappings", &consistent_head);
-#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; }
Hello,
On Tuesday, May 22, 2012 9:33 AM Minchan Kim wrote:
On 05/17/2012 07:54 PM, Marek Szyprowski wrote:
This patch changes dma-mapping subsystem to use generic vmalloc areas for all consistent dma allocations. This increases the total size limit of the consistent allocations and removes platform hacks and a lot of duplicated code.
I like this patch very much!
Thanks!
There are just small nitpicks below.
Atomic allocations are served from special pool preallocated on boot, becasue vmalloc areas cannot be reliably created in atomic context.
typo because
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com
Documentation/kernel-parameters.txt | 4 + arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping.c | 360 ++++++++++++++++------------------- 3 files changed, 171 insertions(+), 195 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1601e5..ba58f50 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -515,6 +515,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a hypervisor. Default: yes
- coherent_pool=nn[KMG] [ARM,KNL]
Sets the size of memory pool for coherent, atomic dma
allocations.
- code_bytes [X86] How many bytes of object code to print in an oops report. Range: 0 - 8192
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c9..92b0afb 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -210,7 +210,7 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
- DMA region above it's default value of 2MB. It must be called before the
- memory allocator is initialised, i.e. before any core_initcall.
*/ -extern void __init init_consistent_dma_size(unsigned long size); +static inline void init_consistent_dma_size(unsigned long size) { }
#ifdef CONFIG_DMABOUNCE diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4..3be4de2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,8 @@ #include <linux/dma-mapping.h> #include <linux/highmem.h> #include <linux/slab.h> +#include <linux/io.h> +#include <linux/vmalloc.h>
#include <asm/memory.h> #include <asm/highmem.h> @@ -119,210 +121,178 @@ static void __dma_free_buffer(struct page *page, size_t size) }
#ifdef CONFIG_MMU
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT)
-/*
- These are the page tables (2MB each) covering uncached, DMA consistent allocations
- */
-static pte_t **consistent_pte;
-#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M
-unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE;
-void __init init_consistent_dma_size(unsigned long size) -{
- unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M);
- BUG_ON(consistent_pte); /* Check we're called before DMA region init */
- BUG_ON(base < VMALLOC_END);
- /* Grow region to accommodate specified size */
- if (base < consistent_base)
consistent_base = base;
-}
-#include "vmregion.h"
-static struct arm_vmregion_head consistent_head = {
- .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_end = CONSISTENT_END,
-};
#ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif
-/*
- Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void) -{
- int ret = 0;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- int i = 0;
- unsigned long base = consistent_base;
- unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
- consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
- if (!consistent_pte) {
pr_err("%s: no memory\n", __func__);
return -ENOMEM;
- }
- pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END);
- consistent_head.vm_start = base;
- do {
pgd = pgd_offset(&init_mm, base);
pud = pud_alloc(&init_mm, pgd, base);
if (!pud) {
printk(KERN_ERR "%s: no pud tables\n", __func__);
ret = -ENOMEM;
break;
}
pmd = pmd_alloc(&init_mm, pud, base);
if (!pmd) {
printk(KERN_ERR "%s: no pmd tables\n", __func__);
ret = -ENOMEM;
break;
}
WARN_ON(!pmd_none(*pmd));
pte = pte_alloc_kernel(pmd, base);
if (!pte) {
printk(KERN_ERR "%s: no pte tables\n", __func__);
ret = -ENOMEM;
break;
}
consistent_pte[i++] = pte;
base += PMD_SIZE;
- } while (base < CONSISTENT_END);
- return ret;
-}
-core_initcall(consistent_init);
static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) {
- struct arm_vmregion *c;
- size_t align;
- int bit;
- struct vm_struct *area;
- unsigned long addr;
- if (!consistent_pte) {
printk(KERN_ERR "%s: not initialised\n", __func__);
- area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller);
Out of curiosity. Do we always map dma area into user's address space?
Nope, but there is always such possibility that the driver calls dma_mmap_*() and lets user space to access the memory allocated for dma.
- if (!area)
return NULL;
- addr = (unsigned long)area->addr;
- area->phys_addr = __pfn_to_phys(page_to_pfn(page));
- if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
vunmap((void *)addr);
return NULL;
- }
- return (void *)addr;
+}
+static void __dma_free_remap(void *cpu_addr, size_t size) +{
- struct vm_struct *area;
- read_lock(&vmlist_lock);
Why do we need vmlist_lock?
In fact, this one is not really needed here. I've copied it from arch/arm/mm/ioremap.c and then replaced search loop with find_vm_area(). Now I see that find_vm_area() uses internal locks in find_vmap_area(), so the global vmlist_lock is not needed here.
- area = find_vm_area(cpu_addr);
find_vm_area only checks vmalloced regions so we need more check.
if (!area || !(area->flags & VM_DMA))
- if (!area) {
pr_err("%s: trying to free invalid coherent area: %p\n",
__func__, cpu_addr);
dump_stack();
read_unlock(&vmlist_lock);
return;
- }
- unmap_kernel_range((unsigned long)cpu_addr, size);
- read_unlock(&vmlist_lock);
- vunmap(cpu_addr);
+}
+struct dma_pool {
- size_t size;
- spinlock_t lock;
- unsigned long *bitmap;
- unsigned long count;
Nitpick. What does count mean? nr_pages?
Right, I will rename it to nr_pages as it is much better name.
- void *vaddr;
- struct page *page;
+};
+static struct dma_pool atomic_pool = {
- .size = SZ_256K,
+};
AFAIUC, we could set it to 2M but you are reducing it to 256K. What's the justification for that default value?
I want to reduce memory waste. This atomic_pool is very rarely used (non-atomic allocations don't use this pool, kernel mappings are created on fly for them). The original consistent dma size on ARM was set to 2MiB, but it covered both atomic and non-atomic allocations. Some time ago (in the context of CMA/Contiguous Memory Allocator in Cambourne during Linaro MM meeting) we have discussed the idea of pool for the atomic allocations and the conclusion was that the 1/8 of the original consistent dma size is probably more than enough. This value can be adjusted later if really needed or set with kernel boot parameter for some rare systems that needs more.
+static int __init early_coherent_pool(char *p) +{
- atomic_pool.size = memparse(p, &p);
- return 0;
+} +early_param("coherent_pool", early_coherent_pool);
+/*
- Initialise the coherent pool for atomic allocations.
- */
+static int __init atomic_pool_init(void) +{
- struct dma_pool *pool = &atomic_pool;
- pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
- unsigned long count = pool->size >> PAGE_SHIFT;
- gfp_t gfp = GFP_KERNEL | GFP_DMA;
- unsigned long *bitmap;
- struct page *page;
- void *ptr;
- int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
- bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!bitmap)
goto no_bitmap;
- page = __dma_alloc_buffer(NULL, pool->size, gfp);
- if (!page)
goto no_page;
- ptr = __dma_alloc_remap(page, pool->size, gfp, prot, NULL);
- if (ptr) {
spin_lock_init(&pool->lock);
pool->vaddr = ptr;
pool->page = page;
pool->bitmap = bitmap;
pool->count = count;
pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
(unsigned)pool->size / 1024);
return 0;
- }
- __dma_free_buffer(page, pool->size);
+no_page:
- kfree(bitmap);
+no_bitmap:
- pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
(unsigned)pool->size / 1024);
- return -ENOMEM;
+} +core_initcall(atomic_pool_init);
+static void *__alloc_from_pool(size_t size, struct page **ret_page) +{
struct dma_pool *pool = &atomic_pool;
unsigned int count = size >> PAGE_SHIFT;
unsigned int pageno;
unsigned long flags;
void *ptr = NULL;
size_t align;
if (!pool->vaddr) {
pr_err("%s: coherent pool not initialised!\n", __func__);
dump_stack(); return NULL; }
/*
* Align the virtual region allocation - maximum alignment is
* a section size, minimum is a page size. This helps reduce
* fragmentation of the DMA space, and also prevents allocations
* smaller than a section from crossing a section boundary.
* Align the region allocation - allocations from pool are rather
* small, so align them to their order in pages, minimum is a page
*/* size. This helps reduce fragmentation of the DMA space.
- bit = fls(size - 1);
- if (bit > SECTION_SHIFT)
bit = SECTION_SHIFT;
- align = 1 << bit;
- align = PAGE_SIZE << get_order(size);
- /*
* Allocate a virtual address in the consistent mapping region.
*/
- c = arm_vmregion_alloc(&consistent_head, align, size,
gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
- if (c) {
pte_t *pte;
int idx = CONSISTENT_PTE_INDEX(c->vm_start);
u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
pte = consistent_pte[idx] + off;
c->vm_pages = page;
do {
BUG_ON(!pte_none(*pte));
set_pte_ext(pte, mk_pte(page, prot), 0);
page++;
pte++;
off++;
if (off >= PTRS_PER_PTE) {
off = 0;
pte = consistent_pte[++idx];
}
} while (size -= PAGE_SIZE);
dsb();
return (void *)c->vm_start;
- spin_lock_irqsave(&pool->lock, flags);
- pageno = bitmap_find_next_zero_area(pool->bitmap, pool->count,
0, count, (1 << align) - 1);
- if (pageno < pool->count) {
bitmap_set(pool->bitmap, pageno, count);
ptr = pool->vaddr + PAGE_SIZE * pageno;
}*ret_page = pool->page + pageno;
- return NULL;
- spin_unlock_irqrestore(&pool->lock, flags);
- return ptr;
}
-static void __dma_free_remap(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) {
- struct arm_vmregion *c;
- unsigned long addr;
- pte_t *ptep;
- int idx;
- u32 off;
- struct dma_pool *pool = &atomic_pool;
- unsigned long pageno, count;
- unsigned long flags;
- c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
- if (!c) {
printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
__func__, cpu_addr);
- if (start < pool->vaddr || start > pool->vaddr + pool->size)
return 0;
- if (start + size > pool->vaddr + pool->size) {
dump_stack();pr_err("%s: freeing wrong coherent size from pool\n", __func__);
return;
}return 0;
- if ((c->vm_end - c->vm_start) != size) {
printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
__func__, c->vm_end - c->vm_start, size);
dump_stack();
size = c->vm_end - c->vm_start;
- }
- pageno = (start - pool->vaddr) >> PAGE_SHIFT;
- count = size >> PAGE_SHIFT;
- idx = CONSISTENT_PTE_INDEX(c->vm_start);
- off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
- ptep = consistent_pte[idx] + off;
- addr = c->vm_start;
- do {
pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
- spin_lock_irqsave(&pool->lock, flags);
- bitmap_clear(pool->bitmap, pageno, count);
- spin_unlock_irqrestore(&pool->lock, flags);
ptep++;
addr += PAGE_SIZE;
off++;
if (off >= PTRS_PER_PTE) {
off = 0;
ptep = consistent_pte[++idx];
}
if (pte_none(pte) || !pte_present(pte))
printk(KERN_CRIT "%s: bad page in kernel page table\n",
__func__);
- } while (size -= PAGE_SIZE);
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
- arm_vmregion_free(&consistent_head, c);
- return 1;
}
#else /* !CONFIG_MMU */
#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) #define __dma_free_remap(addr, size) do { } while (0) +#define __alloc_from_pool(size, ret_page) NULL +#define __free_from_pool(addr, size) 0
#endif /* CONFIG_MMU */
@@ -345,6 +315,16 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
gfp,
*handle = ~0; size = PAGE_ALIGN(size);
- /*
* Atomic allocations need special handling
*/
- if (gfp & GFP_ATOMIC && !arch_is_coherent()) {
addr = __alloc_from_pool(size, &page);
if (addr)
*handle = pfn_to_dma(dev, page_to_pfn(page));
return addr;
- }
- page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL;
@@ -398,24 +378,16 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU
- unsigned long user_size, kern_size;
- struct arm_vmregion *c;
- unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long pfn = dma_to_pfn(dev, dma_addr);
- unsigned long off = vma->vm_pgoff;
- user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
- if (c) {
unsigned long off = vma->vm_pgoff;
kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
if (off < kern_size &&
user_size <= (kern_size - off)) {
ret = remap_pfn_range(vma, vma->vm_start,
page_to_pfn(c->vm_pages) + off,
user_size << PAGE_SHIFT,
vma->vm_page_prot);
}
- if (off < count && user_count <= (count - off)) {
ret = remap_pfn_range(vma, vma->vm_start,
pfn + off,
user_count << PAGE_SHIFT,
}vma->vm_page_prot);
#endif /* CONFIG_MMU */
@@ -444,13 +416,16 @@ EXPORT_SYMBOL(dma_mmap_writecombine); */ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) {
WARN_ON(irqs_disabled());
if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return;
size = PAGE_ALIGN(size);
- if (__free_from_pool(cpu_addr, size))
return;
- WARN_ON(irqs_disabled());
- if (!arch_is_coherent()) __dma_free_remap(cpu_addr, size);
@@ -726,9 +701,6 @@ EXPORT_SYMBOL(dma_set_mask);
static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU
- arm_vmregion_create_proc("dma-mappings", &consistent_head);
-#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; }
Best regards
static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) {
- struct arm_vmregion *c;
- size_t align;
- int bit;
- struct vm_struct *area;
- unsigned long addr;
- if (!consistent_pte) {
printk(KERN_ERR "%s: not initialised\n", __func__);
- area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller);
In this patch, VM_DMA is only used here. So, is this no effect?
Hello,
On Wednesday, May 30, 2012 2:11 AM KOSAKI Motohiro wrote:
static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) {
- struct arm_vmregion *c;
- size_t align;
- int bit;
- struct vm_struct *area;
- unsigned long addr;
- if (!consistent_pte) {
printk(KERN_ERR "%s: not initialised\n", __func__);
- area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller);
In this patch, VM_DMA is only used here. So, is this no effect?
I introduced it mainly to let user know which areas have been allocated by the dma-mapping api.
I also plan to add a check suggested by Minchan Kim in __dma_free_remap() if the vmalloc area have been in fact allocated with VM_DMA set.
Best regards
(5/30/12 3:15 AM), Marek Szyprowski wrote:
Hello,
On Wednesday, May 30, 2012 2:11 AM KOSAKI Motohiro wrote:
static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) {
- struct arm_vmregion *c;
- size_t align;
- int bit;
- struct vm_struct *area;
- unsigned long addr;
- if (!consistent_pte) {
printk(KERN_ERR "%s: not initialised\n", __func__);
- area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller);
In this patch, VM_DMA is only used here. So, is this no effect?
I introduced it mainly to let user know which areas have been allocated by the dma-mapping api.
vma->flags are limited resource, it has only 32 (or 64) bits. Please don't use it for such unimportant thing.
I also plan to add a check suggested by Minchan Kim in __dma_free_remap() if the vmalloc area have been in fact allocated with VM_DMA set.
linaro-mm-sig@lists.linaro.org