Re: [Linaro-mm-sig] [RFCv2 PATCH 2/9 - 4/4] v4l: vb2-dma-contig: update and code refactoring

22 Mar 2012

Hi Tomasz,
Thanks for the patch.
On Thursday 22 March 2012 11:02:23 Laurent Pinchart wrote:
...
From: Tomasz Stanislawski t.stanislaws@samsung.com
This patch combines updates and fixes to dma-contig allocator.
Moreover the allocator code was refactored.
The most important changes are:

functions were reordered
move compression of scatterlist to separete function
add support for multichunk but contiguous scatterlists
simplified implementation of vb2-dma-contig context structure
let mmap method to use dma_mmap_writecombine
add support for scatterlist in userptr mode

Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com
   [mmap method]
Signed-off-by: Andrzej Pietrasiewicz andrzej.p@samsung.com
   [scatterlist in userptr mode]
Signed-off-by: Kamil Debski k.debski@samsung.com
   [bugfixing]
Signed-off-by: Tomasz Stanislawski t.stanislaws@samsung.com
   [core refactoring, helper functions]
Signed-off-by: Kyungmin Park kyungmin.park@samsung.com

drivers/media/video/videobuf2-dma-contig.c |  400
+++++++++++++++++++++++++--- 1 files changed, 365 insertions(+), 35
deletions(-)

diff --git a/drivers/media/video/videobuf2-dma-contig.c
b/drivers/media/video/videobuf2-dma-contig.c index c898e6f..9965465 100644
--- a/drivers/media/video/videobuf2-dma-contig.c
+++ b/drivers/media/video/videobuf2-dma-contig.c
@@ -10,9 +10,12 @@

the Free Software Foundation.

*/
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/dma-mapping.h>
#include <media/videobuf2-core.h>
 #include <media/videobuf2-memops.h>
@@ -22,16 +25,115 @@ struct vb2_dc_buf {
   void				*vaddr;
   unsigned long			size;
   dma_addr_t			dma_addr;

struct sg_table			*dma_sgt;

enum dma_data_direction		dma_dir;
/* MMAP related */
struct vb2_vmarea_handler	handler;
atomic_t			refcount;

struct sg_table			*sgt_base;
/* USERPTR related */
struct vm_area_struct		*vma;


};
/*********************************************/
+/*        scatterlist table functions        */
+/*********************************************/



+static struct sg_table *vb2_dc_pages_to_sgt(struct page **pages,

unsigned long n_pages, size_t offset, size_t offset2)

+{

struct sg_table *sgt;
int i, j; /* loop counters */
int cur_page, chunks;
int ret;
struct scatterlist *s;

sgt = kzalloc(sizeof *sgt, GFP_KERNEL);
if (!sgt)
return ERR_PTR(-ENOMEM);



/* compute number of chunks */
chunks = 1;
for (i = 1; i < n_pages; ++i)
if (pages[i] != pages[i - 1] + 1)


	++chunks;



ret = sg_alloc_table(sgt, chunks, GFP_KERNEL);
if (ret) {
kfree(sgt);


return ERR_PTR(-ENOMEM);


}

/* merging chunks and putting them into the scatterlist */
cur_page = 0;
for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
size_t size = PAGE_SIZE;



for (j = cur_page + 1; j < n_pages; ++j) {


	if (pages[j] != pages[j - 1] + 1)


		break;


	size += PAGE_SIZE;


}



/* cut offset if chunk starts at the first page */


if (cur_page == 0)


	size -= offset;


/* cut offset2 if chunk ends at the last page */


if (j == n_pages)


	size -= offset2;



sg_set_page(s, pages[cur_page], size, offset);


offset = 0;


cur_page = j;


}

return sgt;

+}



+static void vb2_dc_release_sgtable(struct sg_table *sgt)
+{

sg_free_table(sgt);
kfree(sgt);

+}



+static void vb2_dc_put_sgtable(struct sg_table *sgt, int dirty)
+{

struct scatterlist *s;
int i, j;

for_each_sg(sgt->sgl, s, sgt->nents, i) {
struct page *page = sg_page(s);


int n_pages = PAGE_ALIGN(s->offset + s->length) >> PAGE_SHIFT;



for (j = 0; j < n_pages; ++j, ++page) {


	if (dirty)


		set_page_dirty_lock(page);


	put_page(page);


}


}

vb2_dc_release_sgtable(sgt);

+}



+static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
+{

struct scatterlist *s;
dma_addr_t expected = sg_dma_address(sgt->sgl);
int i;
unsigned long size = 0;

for_each_sg(sgt->sgl, s, sgt->nents, i) {
if (sg_dma_address(s) != expected)


	break;


expected = sg_dma_address(s) + sg_dma_len(s);


size += sg_dma_len(s);


}
return size;

+}



+/*********************************************/
 /*         callbacks for all buffers         */
 /*********************************************/
@@ -45,8 +147,6 @@ static void *vb2_dc_cookie(void *buf_priv)
 static void *vb2_dc_vaddr(void *buf_priv)
 {
   struct vb2_dc_buf *buf = buf_priv;

if (!buf)

return 0;


return buf->vaddr;


}
@@ -58,6 +158,28 @@ static unsigned int vb2_dc_num_users(void *buf_priv)
   return atomic_read(&buf->refcount);
 }
+static void vb2_dc_prepare(void *buf_priv)
+{

struct vb2_dc_buf *buf = buf_priv;
struct sg_table *sgt = buf->dma_sgt;

if (!sgt)
return;



dma_sync_sg_for_device(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);

+}



+static void vb2_dc_finish(void *buf_priv)
+{

struct vb2_dc_buf *buf = buf_priv;
struct sg_table *sgt = buf->dma_sgt;

if (!sgt)
return;



dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);

+}



/*********************************************/
 /*        callbacks for MMAP buffers         */
 /*********************************************/
@@ -66,31 +188,70 @@ static void vb2_dc_put(void *buf_priv)
 {
   struct vb2_dc_buf *buf = buf_priv;

if (atomic_dec_and_test(&buf->refcount)) {
dma_free_coherent(buf->dev, buf->size, buf->vaddr,


		  buf->dma_addr);


kfree(buf);


}


if (!atomic_dec_and_test(&buf->refcount))
return;



vb2_dc_release_sgtable(buf->sgt_base);
dma_free_coherent(buf->dev, buf->size, buf->vaddr,
buf->dma_addr);


kfree(buf);

}
static void *vb2_dc_alloc(void *alloc_ctx, unsigned long size)
 {
   struct device *dev = alloc_ctx;
   struct vb2_dc_buf *buf;

int ret;

int n_pages;

struct page **pages = NULL;
buf = kzalloc(sizeof *buf, GFP_KERNEL);
if (!buf)
return ERR_PTR(-ENOMEM);



buf->vaddr = dma_alloc_coherent(dev, size, &buf->dma_addr, GFP_KERNEL);


buf->dev = dev;
buf->size = size;
buf->vaddr = dma_alloc_coherent(buf->dev, buf->size, &buf->dma_addr,
GFP_KERNEL);



ret = -ENOMEM;
if (!buf->vaddr) {


dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size);


kfree(buf);


return ERR_PTR(-ENOMEM);




dev_err(dev, "dma_alloc_coherent of size %ld failed\n",


	size);


goto fail_buf;

}


buf->dev = dev;
buf->size = size;


WARN_ON((unsigned long)buf->vaddr & ~PAGE_MASK);
WARN_ON(buf->dma_addr & ~PAGE_MASK);

n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;

pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
if (!pages) {
printk(KERN_ERR "failed to alloc page table\n");


goto fail_dma;


}

ret = dma_get_pages(dev, buf->vaddr, buf->dma_addr, pages, n_pages);

As the only purpose of this is to retrieve a list of pages that will be used 
to create a single-entry sgt, wouldn't it be possible to shortcut the code and 
get the physical address of the buffer directly ?
...

if (ret < 0) {
printk(KERN_ERR "failed to get buffer pages from DMA API\n");


goto fail_pages;


}
if (ret != n_pages) {
ret = -EFAULT;


printk(KERN_ERR "failed to get all pages from DMA API\n");


goto fail_pages;


}

buf->sgt_base = vb2_dc_pages_to_sgt(pages, n_pages, 0, 0);
if (IS_ERR(buf->sgt_base)) {
ret = PTR_ERR(buf->sgt_base);


printk(KERN_ERR "failed to prepare sg table\n");


goto fail_pages;


}

buf->sgt_base isn't used in this patch. I would move the buf->sgt_base 
creation code to the patch that uses it then, or to its own patch just before 
the patch that uses it.
...


/* pages are no longer needed */

kfree(pages);
buf->handler.refcount = &buf->refcount;
buf->handler.put = vb2_dc_put;


@@ -99,59 +260,226 @@ static void *vb2_dc_alloc(void *alloc_ctx, unsigned
long size) atomic_inc(&buf->refcount);
return buf;



+fail_pages:

kfree(pages);


+fail_dma:

dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->dma_addr);


+fail_buf:

kfree(buf);

return ERR_PTR(ret);

}
static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma)
 {
   struct vb2_dc_buf *buf = buf_priv;

int ret;

/*
* dma_mmap_* uses vm_pgoff as in-buffer offset, but we want to


* map whole buffer


*/


vma->vm_pgoff = 0;

ret = dma_mmap_writecombine(buf->dev, vma, buf->vaddr,
buf->dma_addr, buf->size);





if (!buf) {
printk(KERN_ERR "No buffer to map\n");


return -EINVAL;




if (ret) {
printk(KERN_ERR "Remapping memory failed, error: %d\n", ret);


return ret;

}


return vb2_mmap_pfn_range(vma, buf->dma_addr, buf->size,
		  &vb2_common_vm_ops, &buf->handler);




vma->vm_flags		|= VM_DONTEXPAND | VM_RESERVED;
vma->vm_private_data	= &buf->handler;
vma->vm_ops		= &vb2_common_vm_ops;

vma->vm_ops->open(vma);

printk(KERN_DEBUG "%s: mapped dma addr 0x%08lx at 0x%08lx, size %ld\n",
__func__, (unsigned long)buf->dma_addr, vma->vm_start,


buf->size);



return 0;

}
/*********************************************/
 /*       callbacks for USERPTR buffers       */
 /*********************************************/
+static inline int vma_is_io(struct vm_area_struct *vma)
+{

return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));

Isn't VM_PFNMAP enough ? Wouldn't it be possible (at least in theory) to get a 
discontinuous physical range with VM_IO ?
...
+}



+static int vb2_dc_get_pages(unsigned long start, struct page **pages,

int n_pages, struct vm_area_struct **copy_vma, int write)

+{

struct vm_area_struct *vma;
int n = 0; /* number of get pages */
int ret = -EFAULT;

/* entering critical section for mm access */
down_read(&current->mm->mmap_sem);

This will generate AB-BA deadlock warnings if lockdep is enabled. This 
function is called with the queue lock held, and the mmap() handler which 
takes the queue lock is called with current->mm->mmap_sem held.
This is a known issue with videobuf2, not specific to this patch. The warning 
is usually a false positive (which we still need to fix, as it worries users), 
but can become a real issue if an MMAP queue and a USERPTR queue are created 
by a driver with the same queue lock.
...

vma = find_vma(current->mm, start);
if (!vma) {
printk(KERN_ERR "no vma for address %lu\n", start);


goto cleanup;


}

if (vma_is_io(vma)) {
unsigned long pfn;



if (vma->vm_end - start < n_pages * PAGE_SIZE) {


	printk(KERN_ERR "vma is too small\n");


	goto cleanup;


}



for (n = 0; n < n_pages; ++n, start += PAGE_SIZE) {


	ret = follow_pfn(vma, start, &pfn);


	if (ret) {


		printk(KERN_ERR "no page for address %lu\n",


			start);


		goto cleanup;


	}


	pages[n] = pfn_to_page(pfn);


	get_page(pages[n]);



This worries me. When the VM_PFNMAP flag is set, the memory pages are not 
backed by a struct page. Creating a struct page pointer out of it can be an 
acceptable hack (for instance to store a page in an scatterlist with 
sg_set_page() and then retrieve its physical address with sg_phys()), but you 
should not expect the struct page to be valid for anything else. Calling 
get_page() on it will likely crash.
...

}


} else {
n = get_user_pages(current, current->mm, start & PAGE_MASK,


	n_pages, write, 1, pages, NULL);


if (n != n_pages) {


	printk(KERN_ERR "got only %d of %d user pages\n",


		n, n_pages);


	goto cleanup;


}


}

*copy_vma = vb2_get_vma(vma);
if (!*copy_vma) {
printk(KERN_ERR "failed to copy vma\n");


ret = -ENOMEM;


goto cleanup;


}

Do we really need to make a copy of the VMA ? The only reason why we store a 
pointer to it is to check the flags in vb2_dc_put_userptr(). We could store 
the flags instead and avoid vb2_get_dma()/vb2_put_dma() calls altogether.
...


/* leaving critical section for mm access */
up_read(&current->mm->mmap_sem);

return 0;


+cleanup:

up_read(&current->mm->mmap_sem);

/* putting user pages if used, can be done wothout the lock */
while (n)
put_page(pages[--n]);



return ret;

+}



static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,

			unsigned long size, int write)




unsigned long size, int write)

{
   struct vb2_dc_buf *buf;

struct vm_area_struct *vma;
dma_addr_t dma_addr = 0;
int ret;


unsigned long start, end, offset, offset2;

struct page **pages;

int n_pages;

int ret = 0;

struct sg_table *sgt;

unsigned long contig_size;
buf = kzalloc(sizeof *buf, GFP_KERNEL);
if (!buf)
return ERR_PTR(-ENOMEM);



ret = vb2_get_contig_userptr(vaddr, size, &vma, &dma_addr);


buf->dev = alloc_ctx;
buf->dma_dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;

start = (unsigned long)vaddr & PAGE_MASK;
offset = (unsigned long)vaddr & ~PAGE_MASK;
end = PAGE_ALIGN((unsigned long)vaddr + size);
offset2 = end - (unsigned long)vaddr - size;
n_pages = (end - start) >> PAGE_SHIFT;

pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
if (!pages) {
ret = -ENOMEM;


printk(KERN_ERR "failed to allocate pages table\n");


goto fail_buf;


}

/* extract page list from userspace mapping */
ret = vb2_dc_get_pages(start, pages, n_pages, &buf->vma, write);
if (ret) {


printk(KERN_ERR "Failed acquiring VMA for vaddr 0x%08lx\n",


		vaddr);


kfree(buf);


return ERR_PTR(ret);




printk(KERN_ERR "failed to get user pages\n");


goto fail_pages;


}

sgt = vb2_dc_pages_to_sgt(pages, n_pages, offset, offset2);
if (!sgt) {
printk(KERN_ERR "failed to create scatterlist table\n");


ret = -ENOMEM;


goto fail_get_pages;

}

This looks overly complex to me. You create a multi-chunk sgt out of the user 
pointer address and map it completely, and then check if it starts with a big 
enough contiguous chunk. Why don't you create an sgt with a single continuous 
chunk then ? In the VM_PFNMAP case you could check whether the area is 
contiguous when you follow the PFNs, stop at the first discontinuity, and 
create an sgt with a single element right there. You would then need to call 
vb2_dc_pages_to_sgt() in the normal case only, and stop at the first 
discontinuity as well.
...

/* pages are no longer needed */
kfree(pages);
pages = NULL;

sgt->nents = dma_map_sg(buf->dev, sgt->sgl, sgt->orig_nents,
buf->dma_dir);


if (sgt->nents <= 0) {
printk(KERN_ERR "failed to map scatterlist\n");


ret = -EIO;


goto fail_sgt;


}

contig_size = vb2_dc_get_contiguous_size(sgt);
if (contig_size < size) {
printk(KERN_ERR "contiguous mapping is too small %lu/%lu\n",


	contig_size, size);


ret = -EFAULT;


goto fail_map_sg;


}

buf->dma_addr = sg_dma_address(sgt->sgl);
buf->size = size;


buf->dma_addr = dma_addr;
buf->vma = vma;


buf->dma_sgt = sgt;


atomic_inc(&buf->refcount);
return buf;



+fail_map_sg:

dma_unmap_sg(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);

I think this will break in the VM_PFNMAP case on non-coherent architectures. 
arm_dma_unmap_page() will call __dma_page_dev_to_cpu() in that case, which can 
dereference struct page. As explain above, the struct page isn't valid with 
VM_PFNMAP. I haven't check the dma_map_sg() and dma_sync_sg_*() calls, but 
changes are they might break as well.
...



+fail_sgt:

vb2_dc_put_sgtable(sgt, 0);


+fail_get_pages:

while (pages && n_pages)
put_page(pages[--n_pages]);


vb2_put_vma(buf->vma);


+fail_pages:

kfree(pages); /* kfree is NULL-proof */


+fail_buf:

kfree(buf);

return ERR_PTR(ret);

}
-static void vb2_dc_put_userptr(void *mem_priv)
+static void vb2_dc_put_userptr(void *buf_priv)
 {

struct vb2_dc_buf *buf = mem_priv;

if (!buf)
return;




struct vb2_dc_buf *buf = buf_priv;

struct sg_table *sgt = buf->dma_sgt;

dma_unmap_sg(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);

vb2_dc_put_sgtable(sgt, !vma_is_io(buf->vma));
vb2_put_vma(buf->vma);
kfree(buf);


}
@@ -168,6 +496,8 @@ const struct vb2_mem_ops vb2_dma_contig_memops = {
   .mmap		= vb2_dc_mmap,
   .get_userptr	= vb2_dc_get_userptr,
   .put_userptr	= vb2_dc_put_userptr,

.prepare	= vb2_dc_prepare,
.finish		= vb2_dc_finish,
.num_users	= vb2_dc_num_users,

};
 EXPORT_SYMBOL_GPL(vb2_dma_contig_memops);
-- 
Regards,

Laurent Pinchart

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Re: [Linaro-mm-sig] [RFCv2 PATCH 2/9 - 4/4] v4l: vb2-dma-contig: update and code refactoring