On Thu, 06 Oct 2011 15:54:46 +0200 Marek Szyprowski m.szyprowski@samsung.com wrote:
The Contiguous Memory Allocator is a set of helper functions for DMA mapping framework that improves allocations of contiguous memory chunks.
CMA grabs memory on system boot, marks it with CMA_MIGRATE_TYPE and gives back to the system. Kernel is allowed to allocate movable pages within CMA's managed memory so that it can be used for example for page cache when DMA mapping do not use it. On dma_alloc_from_contiguous() request such pages are migrated out of CMA area to free required contiguous block and fulfill the request. This allows to allocate large contiguous chunks of memory at any time assuming that there is enough free memory available in the system.
This code is heavily based on earlier works by Michal Nazarewicz.
...
+#ifdef phys_to_pfn +/* nothing to do */ +#elif defined __phys_to_pfn +# define phys_to_pfn __phys_to_pfn +#elif defined __va +# define phys_to_pfn(x) page_to_pfn(virt_to_page(__va(x))) +#else +# error phys_to_pfn implementation needed +#endif
Yikes!
This hackery should not be here, please. If we need a phys_to_pfn() then let's write a proper one which lives in core MM and arch, then get it suitably reviewed and integrated and then maintained.
+struct cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
+};
+struct cma *dma_contiguous_default_area;
+#ifndef CONFIG_CMA_SIZE_ABSOLUTE +#define CONFIG_CMA_SIZE_ABSOLUTE 0 +#endif
+#ifndef CONFIG_CMA_SIZE_PERCENTAGE +#define CONFIG_CMA_SIZE_PERCENTAGE 0 +#endif
No, .c files should not #define CONFIG_ variables like this.
One approach is
#ifdef CONFIG_FOO #define BAR CONFIG_FOO #else #define BAR 0 #endif
but that's merely cosmetic fluff. A superior fix is to get the Kconfig correct, so CONFIG_FOO cannot ever be undefined if we're compiling this .c file.
+static unsigned long size_abs = CONFIG_CMA_SIZE_ABSOLUTE * SZ_1M; +static unsigned long size_percent = CONFIG_CMA_SIZE_PERCENTAGE; +static long size_cmdline = -1;
Maybe a little documentation for these, explaining their role in everything?
+static int __init early_cma(char *p) +{
- pr_debug("%s(%s)\n", __func__, p);
- size_cmdline = memparse(p, &p);
- return 0;
+} +early_param("cma", early_cma);
Did this get added to Documentation/kernel-parameters.txt?
+static unsigned long __init __cma_early_get_total_pages(void)
The leading __ seems unnecessay for a static function.
+{
- struct memblock_region *reg;
- unsigned long total_pages = 0;
- /*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
*/
- for_each_memblock(memory, reg)
total_pages += memblock_region_memory_end_pfn(reg) -
memblock_region_memory_base_pfn(reg);
- return total_pages;
+}
+/**
- dma_contiguous_reserve() - reserve area for contiguous memory handling
- This funtion reserves memory from early allocator. It should be
- called by arch specific code once the early allocator (memblock or bootmem)
- has been activated and all other subsystems have already allocated/reserved
- memory.
- */
Forgot to document the argument.
+void __init dma_contiguous_reserve(phys_addr_t limit) +{
- unsigned long selected_size = 0;
- unsigned long total_pages;
- pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
- total_pages = __cma_early_get_total_pages();
- size_percent *= (total_pages << PAGE_SHIFT) / 100;
- pr_debug("%s: total available: %ld MiB, size absolute: %ld MiB, size percentage: %ld MiB\n",
__func__, (total_pages << PAGE_SHIFT) / SZ_1M,
size_abs / SZ_1M, size_percent / SZ_1M);
+#ifdef CONFIG_CMA_SIZE_SEL_ABSOLUTE
- selected_size = size_abs;
+#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
- selected_size = size_percent;
+#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
- selected_size = min(size_abs, size_percent);
+#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
- selected_size = max(size_abs, size_percent);
+#endif
geeze, what's all that stuff?
Whatever it's doing, it seems a bad idea to relegate these decisions to Kconfig-time. The vast majority of users don't have control of their kernel configuration! The code would be more flexible and generic if this was done at runtime somehow.
- if (size_cmdline != -1)
selected_size = size_cmdline;
- if (!selected_size)
return;
- pr_debug("%s: reserving %ld MiB for global area\n", __func__,
selected_size / SZ_1M);
- dma_declare_contiguous(NULL, selected_size, 0, limit);
+};
...
+static struct cma *__cma_create_area(unsigned long base_pfn,
s/__//?
unsigned long count)
+{
- int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
- struct cma *cma;
- pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count);
- cma = kmalloc(sizeof *cma, GFP_KERNEL);
- if (!cma)
return ERR_PTR(-ENOMEM);
- cma->base_pfn = base_pfn;
- cma->count = count;
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!cma->bitmap)
goto no_mem;
- __cma_activate_area(base_pfn, count);
- pr_debug("%s: returned %p\n", __func__, (void *)cma);
- return cma;
+no_mem:
- kfree(cma);
- return ERR_PTR(-ENOMEM);
+}
...
+int __init dma_declare_contiguous(struct device *dev, unsigned long size,
phys_addr_t base, phys_addr_t limit)
+{
- struct cma_reserved *r = &cma_reserved[cma_reserved_count];
- unsigned long alignment;
- pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
(unsigned long)size, (unsigned long)base,
(unsigned long)limit);
- /* Sanity checks */
- if (cma_reserved_count == ARRAY_SIZE(cma_reserved))
return -ENOSPC;
I think a loud printk() is appropriate if the kernel fails in this manner.
- if (!size)
return -EINVAL;
- /* Sanitise input arguments */
- alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order);
- base = ALIGN(base, alignment);
- size = ALIGN(size, alignment);
- limit = ALIGN(limit, alignment);
- /* Reserve memory */
- if (base) {
if (memblock_is_region_reserved(base, size) ||
memblock_reserve(base, size) < 0) {
base = -EBUSY;
goto err;
}
- } else {
/*
* Use __memblock_alloc_base() since
* memblock_alloc_base() panic()s.
*/
phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
if (!addr) {
base = -ENOMEM;
goto err;
} else if (addr + size > ~(unsigned long)0) {
memblock_free(addr, size);
base = -EOVERFLOW;
EOVERFLOW is a numeric/float thing. It seems inappropriate to use it here.
goto err;
} else {
base = addr;
}
- }
- /*
* Each reserved area must be initialised later, when more kernel
* subsystems (like slab allocator) are available.
*/
- r->start = base;
- r->size = size;
- r->dev = dev;
- cma_reserved_count++;
- printk(KERN_INFO "CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,
(unsigned long)base);
- /*
* Architecture specific contiguous memory fixup.
*/
- dma_contiguous_early_fixup(base, size);
- return 0;
+err:
- printk(KERN_ERR "CMA: failed to reserve %ld MiB\n", size / SZ_1M);
- return base;
+}
...
+static inline struct cma *get_dev_cma_area(struct device *dev) +{
- if (dev && dev->cma_area)
return dev->cma_area;
- return dma_contiguous_default_area;
+}
+static inline void set_dev_cma_area(struct device *dev, struct cma *cma) +{
- if (dev)
dev->cma_area = cma;
- dma_contiguous_default_area = cma;
+}
dev_[get|set]_cma_area() would be better names.
+#endif +#endif +#endif
...
+#ifdef CONFIG_CMA
+#define MAX_CMA_AREAS (8)
What are the implications of this decision?
Should it be in Kconfig? Everything else is :)
...