This patch implements my attempt at dmabuf synchronization. The core idea is that a lot of devices will have their own methods of synchronization, but more complicated devices allow some way of fencing, so why not export those as dma-buf?
This patchset implements dmabufmgr, which is based on ttm's code. The ttm code deals with a lot more than just reservation however, I took out almost all the code not dealing with reservations.
I used the drm-intel-next-queued tree as base. It contains some i915 flushing changes. I would rather use linux-next, but the deferred fput code makes my system unbootable. That is unfortunate since it would reduce the deadlocks happening in dma_buf_put when 2 devices release each other's dmabuf.
The i915 changes implement a simple cpu wait only, the nouveau code imports the sync dmabuf read-only and maps it to affected channels, then performs a wait on it in hardware. Since the hardware may still be processing other commands, it could be the case that no hardware wait would have to be performed at all.
Only the nouveau nv84 code is tested, but the nvc0 code should work as well.
From: Maarten Lankhorst maarten.lankhorst@canonical.com
Core code based on ttm_bo and ttm_execbuf_util
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
--- drivers/base/Makefile | 2 +- drivers/base/dma-buf-mgr-eu.c | 263 +++++++++++++++++++++++++++++++++++++++++ drivers/base/dma-buf-mgr.c | 149 +++++++++++++++++++++++ drivers/base/dma-buf.c | 4 + include/linux/dma-buf-mgr.h | 150 +++++++++++++++++++++++ include/linux/dma-buf.h | 24 ++++ 6 files changed, 591 insertions(+), 1 deletion(-) create mode 100644 drivers/base/dma-buf-mgr-eu.c create mode 100644 drivers/base/dma-buf-mgr.c create mode 100644 include/linux/dma-buf-mgr.h
diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 5aa2d70..86e7598 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf.o +obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf.o dma-buf-mgr.o dma-buf-mgr-eu.o obj-$(CONFIG_ISA) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o diff --git a/drivers/base/dma-buf-mgr-eu.c b/drivers/base/dma-buf-mgr-eu.c new file mode 100644 index 0000000..ed5e01c --- /dev/null +++ b/drivers/base/dma-buf-mgr-eu.c @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * Based on ttm_bo.c which bears the following copyright notice, + * but is dual licensed: + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <linux/dma-buf-mgr.h> +#include <linux/sched.h> +#include <linux/export.h> + +static void dmabufmgr_eu_backoff_reservation_locked(struct list_head *list) +{ + struct dmabufmgr_validate *entry; + + list_for_each_entry(entry, list, head) { + struct dma_buf *bo = entry->bo; + if (!entry->reserved) + continue; + entry->reserved = false; + + bo->sync_buf = entry->sync_buf; + entry->sync_buf = NULL; + + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + } +} + +static int +dmabufmgr_eu_wait_unreserved_locked(struct list_head *list, + struct dma_buf *bo) +{ + int ret; + + spin_unlock(&dmabufmgr.lru_lock); + ret = dmabufmgr_bo_wait_unreserved(bo, true); + spin_lock(&dmabufmgr.lru_lock); + if (unlikely(ret != 0)) + dmabufmgr_eu_backoff_reservation_locked(list); + return ret; +} + +void +dmabufmgr_eu_backoff_reservation(struct list_head *list) +{ + if (list_empty(list)) + return; + + spin_lock(&dmabufmgr.lru_lock); + dmabufmgr_eu_backoff_reservation_locked(list); + spin_unlock(&dmabufmgr.lru_lock); +} +EXPORT_SYMBOL_GPL(dmabufmgr_eu_backoff_reservation); + +int +dmabufmgr_eu_reserve_buffers(struct list_head *list) +{ + struct dmabufmgr_validate *entry; + int ret; + u32 val_seq; + + if (list_empty(list)) + return 0; + + list_for_each_entry(entry, list, head) { + entry->reserved = false; + entry->sync_buf = NULL; + } + +retry: + spin_lock(&dmabufmgr.lru_lock); + val_seq = dmabufmgr.counter++; + + list_for_each_entry(entry, list, head) { + struct dma_buf *bo = entry->bo; + +retry_this_bo: + ret = dmabufmgr_bo_reserve_locked(bo, true, true, true, val_seq); + switch (ret) { + case 0: + break; + case -EBUSY: + ret = dmabufmgr_eu_wait_unreserved_locked(list, bo); + if (unlikely(ret != 0)) { + spin_unlock(&dmabufmgr.lru_lock); + return ret; + } + goto retry_this_bo; + case -EAGAIN: + dmabufmgr_eu_backoff_reservation_locked(list); + spin_unlock(&dmabufmgr.lru_lock); + ret = dmabufmgr_bo_wait_unreserved(bo, true); + if (unlikely(ret != 0)) + return ret; + goto retry; + default: + dmabufmgr_eu_backoff_reservation_locked(list); + spin_unlock(&dmabufmgr.lru_lock); + return ret; + } + + entry->reserved = true; + entry->sync_buf = bo->sync_buf; + entry->sync_ofs = bo->sync_ofs; + entry->sync_val = bo->sync_val; + bo->sync_buf = NULL; + } + spin_unlock(&dmabufmgr.lru_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(dmabufmgr_eu_reserve_buffers); + +static int +dmabufmgr_eu_wait_single(struct dmabufmgr_validate *val, bool intr, bool lazy, unsigned long timeout) +{ + uint32_t *map, *seq, ofs; + unsigned long sleep_time = NSEC_PER_MSEC / 1000; + size_t start; + int ret = 0; + + if (!val->sync_buf) + return 0; + + start = val->sync_ofs & PAGE_MASK; + ofs = val->sync_ofs & ~PAGE_MASK; + + ret = dma_buf_begin_cpu_access(val->sync_buf, start, + start + PAGE_SIZE, + DMA_FROM_DEVICE); + if (ret) + return ret; + + map = dma_buf_kmap(val->sync_buf, val->sync_ofs >> PAGE_SHIFT); + seq = &map[ofs/4]; + + while (1) { + val->retval = *seq; + if (val->retval - val->sync_val < 0x80000000U) + break; + + if (time_after_eq(jiffies, timeout)) { + ret = -EBUSY; + break; + } + + set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + + if (lazy) { + ktime_t t = ktime_set(0, sleep_time); + schedule_hrtimeout(&t, HRTIMER_MODE_REL); + if (sleep_time < NSEC_PER_MSEC) + sleep_time *= 2; + } else + cpu_relax(); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + + set_current_state(TASK_RUNNING); + + dma_buf_kunmap(val->sync_buf, val->sync_ofs >> PAGE_SHIFT, map); + dma_buf_end_cpu_access(val->sync_buf, start, + start + PAGE_SIZE, + DMA_FROM_DEVICE); + + val->waited = !ret; + if (!ret) { + dma_buf_put(val->sync_buf); + val->sync_buf = NULL; + } + return ret; +} + +int +dmabufmgr_eu_wait_completed_cpu(struct list_head *list, bool intr, bool lazy) +{ + struct dmabufmgr_validate *entry; + unsigned long timeout = jiffies + 4 * HZ; + int ret; + + list_for_each_entry(entry, list, head) { + ret = dmabufmgr_eu_wait_single(entry, intr, lazy, timeout); + if (ret && ret != -ERESTARTSYS) + pr_err("waiting returns %i %08x(exp %08x)\n", + ret, entry->retval, entry->sync_val); + if (ret) + goto err; + } + return 0; + +err: + list_for_each_entry_continue(entry, list, head) { + entry->waited = false; + entry->retval = -1; + } + return ret; +} +EXPORT_SYMBOL_GPL(dmabufmgr_eu_wait_completed_cpu); + +void +dmabufmgr_eu_fence_buffer_objects(struct dma_buf *sync_buf, u32 ofs, u32 seq, struct list_head *list) +{ + struct dmabufmgr_validate *entry; + struct dma_buf *bo; + + if (list_empty(list) || WARN_ON(!sync_buf)) + return; + + /* Don't use put with lock held, since the free function can + * deadlock, this might be alleviated whendeferred fput hits mainline + */ + list_for_each_entry(entry, list, head) { + if (entry->sync_buf) + dma_buf_put(entry->sync_buf); + entry->sync_buf = NULL; + entry->reserved = false; + } + + spin_lock(&dmabufmgr.lru_lock); + + list_for_each_entry(entry, list, head) { + bo = entry->bo; + + get_dma_buf(sync_buf); + bo->sync_buf = sync_buf; + bo->sync_ofs = ofs; + bo->sync_val = seq; + + dmabufmgr_bo_unreserve_locked(bo); + } + + spin_unlock(&dmabufmgr.lru_lock); +} +EXPORT_SYMBOL_GPL(dmabufmgr_eu_fence_buffer_objects); diff --git a/drivers/base/dma-buf-mgr.c b/drivers/base/dma-buf-mgr.c new file mode 100644 index 0000000..14756ff --- /dev/null +++ b/drivers/base/dma-buf-mgr.c @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * Based on ttm_bo.c which bears the following copyright notice, + * but is dual licensed: + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/dma-buf-mgr.h> +#include <linux/anon_inodes.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/list.h> + +/* Based on ttm_bo.c with vm_lock and fence_lock removed + * lru_lock takes care of fence_lock as well + */ +struct dmabufmgr dmabufmgr = { + .lru_lock = __SPIN_LOCK_UNLOCKED(dmabufmgr.lru_lock), + .counter = 1, +}; + +int +dmabufmgr_bo_reserve_locked(struct dma_buf *bo, + bool interruptible, bool no_wait, + bool use_sequence, u32 sequence) +{ + int ret; + + while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) { + /** + * Deadlock avoidance for multi-bo reserving. + */ + if (use_sequence && bo->seq_valid) { + /** + * We've already reserved this one. + */ + if (unlikely(sequence == bo->val_seq)) + return -EDEADLK; + /** + * Already reserved by a thread that will not back + * off for us. We need to back off. + */ + if (unlikely(sequence - bo->val_seq < (1 << 31))) + return -EAGAIN; + } + + if (no_wait) + return -EBUSY; + + spin_unlock(&dmabufmgr.lru_lock); + ret = dmabufmgr_bo_wait_unreserved(bo, interruptible); + spin_lock(&dmabufmgr.lru_lock); + + if (unlikely(ret)) + return ret; + } + + if (use_sequence) { + /** + * Wake up waiters that may need to recheck for deadlock, + * if we decreased the sequence number. + */ + if (unlikely((bo->val_seq - sequence < (1 << 31)) + || !bo->seq_valid)) + wake_up_all(&bo->event_queue); + + bo->val_seq = sequence; + bo->seq_valid = true; + } else { + bo->seq_valid = false; + } + + return 0; +} +EXPORT_SYMBOL_GPL(dmabufmgr_bo_reserve_locked); + +int +dmabufmgr_bo_reserve(struct dma_buf *bo, + bool interruptible, bool no_wait, + bool use_sequence, u32 sequence) +{ + int ret; + + spin_lock(&dmabufmgr.lru_lock); + ret = dmabufmgr_bo_reserve_locked(bo, interruptible, no_wait, + use_sequence, sequence); + spin_unlock(&dmabufmgr.lru_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dmabufmgr_bo_reserve); + +int +dmabufmgr_bo_wait_unreserved(struct dma_buf *bo, bool interruptible) +{ + if (interruptible) { + return wait_event_interruptible(bo->event_queue, + atomic_read(&bo->reserved) == 0); + } else { + wait_event(bo->event_queue, atomic_read(&bo->reserved) == 0); + return 0; + } +} +EXPORT_SYMBOL_GPL(dmabufmgr_bo_wait_unreserved); + +void dmabufmgr_bo_unreserve_locked(struct dma_buf *bo) +{ + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); +} +EXPORT_SYMBOL_GPL(dmabufmgr_bo_unreserve_locked); + +void dmabufmgr_bo_unreserve(struct dma_buf *bo) +{ + spin_lock(&dmabufmgr.lru_lock); + dmabufmgr_bo_unreserve_locked(bo); + spin_unlock(&dmabufmgr.lru_lock); +} +EXPORT_SYMBOL_GPL(dmabufmgr_bo_unreserve); diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index 24e88fe..01c4f71 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -40,6 +40,9 @@ static int dma_buf_release(struct inode *inode, struct file *file) dmabuf = file->private_data;
dmabuf->ops->release(dmabuf); + BUG_ON(waitqueue_active(&dmabuf->event_queue)); + if (dmabuf->sync_buf) + dma_buf_put(dmabuf->sync_buf); kfree(dmabuf); return 0; } @@ -119,6 +122,7 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
mutex_init(&dmabuf->lock); INIT_LIST_HEAD(&dmabuf->attachments); + init_waitqueue_head(&dmabuf->event_queue);
return dmabuf; } diff --git a/include/linux/dma-buf-mgr.h b/include/linux/dma-buf-mgr.h new file mode 100644 index 0000000..8caadc8 --- /dev/null +++ b/include/linux/dma-buf-mgr.h @@ -0,0 +1,150 @@ +/* + * Header file for dma buffer sharing framework. + * + * Copyright(C) 2011 Linaro Limited. All rights reserved. + * Author: Sumit Semwal sumit.semwal@ti.com + * + * Many thanks to linaro-mm-sig list, and specially + * Arnd Bergmann arnd@arndb.de, Rob Clark rob@ti.com and + * Daniel Vetter daniel@ffwll.ch for their support in creation and + * refining of this idea. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see http://www.gnu.org/licenses/. + */ +#ifndef __DMA_BUF_MGR_H__ +#define __DMA_BUF_MGR_H__ + +#include <linux/dma-buf.h> +#include <linux/list.h> + +struct dmabufmgr { + spinlock_t lru_lock; + + u32 counter; +}; +extern struct dmabufmgr dmabufmgr; + +/** execbuf util support for reservations + * based on ttm_execbuf_util + */ +struct dmabufmgr_validate { + struct list_head head; + struct dma_buf *bo; + bool reserved; + void *priv; + + /** synchronization dma_buf + ofs/val to wait on */ + struct dma_buf *sync_buf; + u32 sync_ofs, sync_val; + + /** status returned from dmabufmgr_eu_wait_completed_cpu */ + bool waited; + u32 retval; +}; + +#ifdef CONFIG_DMA_SHARED_BUFFER + +extern int +dmabufmgr_bo_reserve_locked(struct dma_buf *bo, + bool interruptible, bool no_wait, + bool use_sequence, u32 sequence); + +extern int +dmabufmgr_bo_reserve(struct dma_buf *bo, + bool interruptible, bool no_wait, + bool use_sequence, u32 sequence); + +extern void +dmabufmgr_bo_unreserve_locked(struct dma_buf *bo); + +extern void +dmabufmgr_bo_unreserve(struct dma_buf *bo); + +extern int +dmabufmgr_bo_wait_unreserved(struct dma_buf *bo, bool interruptible); + +/** reserve a linked list of struct dmabufmgr_validate entries */ +extern int +dmabufmgr_eu_reserve_buffers(struct list_head *list); + +/** Undo reservation */ +extern void +dmabufmgr_eu_backoff_reservation(struct list_head *list); + +/** Commit reservation */ +extern void +dmabufmgr_eu_fence_buffer_objects(struct dma_buf *sync_buf, u32 ofs, u32 val, struct list_head *list); + +/** Wait for completion on cpu + * intr: interruptible wait + * lazy: try once every tick instead of busywait + */ +extern int +dmabufmgr_eu_wait_completed_cpu(struct list_head *list, bool intr, bool lazy); + +#else /* CONFIG_DMA_SHARED_BUFFER */ + +static inline int +dmabufmgr_bo_reserve_locked(struct dma_buf *bo, + bool interruptible, bool no_wait, + bool use_sequence, u32 sequence) +{ + return -ENODEV; +} + +static inline int +dmabufmgr_bo_reserve(struct dma_buf *bo, + bool interruptible, bool no_wait, + bool use_sequence, u32 sequence) +{ + return -ENODEV; +} + +static inline void +dmabufmgr_bo_unreserve_locked(struct dma_buf *bo) +{} + +static inline void +dmabufmgr_bo_unreserve(struct dma_buf *bo) +{} + +static inline int +dmabufmgr_bo_wait_unreserved(struct dma_buf *bo, bool interruptible) +{} + +/** reserve a linked list of struct dmabufmgr_validate entries */ +static inline int +dmabufmgr_eu_reserve_buffers(struct list_head *list) +{ + return list_empty(list) ? 0 : -ENODEV; +} + +/** Undo reservation */ +static inline void +dmabufmgr_eu_backoff_reservation(struct list_head *list) +{} + +/** Commit reservation */ +static inline void +dmabufmgr_eu_fence_buffer_objects(struct dma_buf *sync_buf, u32 ofs, u32 val, struct list_head *list) +{} + +static inline int +dmabufmgr_eu_wait_completed_cpu(struct list_head *list, bool intr, bool lazy) +{ + return list_empty(list) ? 0 : -ENODEV; +} + +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#endif /* __DMA_BUF_MGR_H__ */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index eb48f38..544644d 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -113,6 +113,8 @@ struct dma_buf_ops { * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. * @priv: exporter specific private data for this buffer object. + * @bufmgr_entry: used by dmabufmgr + * @bufdev: used by dmabufmgr */ struct dma_buf { size_t size; @@ -122,6 +124,28 @@ struct dma_buf { /* mutex to serialize list manipulation and attach/detach */ struct mutex lock; void *priv; + + /** dmabufmgr members */ + wait_queue_head_t event_queue; + + /** + * dmabufmgr members protected by the dmabufmgr::lru_lock. + */ + u32 val_seq; + bool seq_valid; + + /** sync_buf can be set to 0 with just dmabufmgr::lru_lock held, + * but can only be set to non-null when unreserving with + * dmabufmgr::lru_lock held + */ + struct dma_buf *sync_buf; + u32 sync_ofs, sync_val; + + /** + * dmabufmgr members protected by the dmabufmgr::lru_lock + * only when written to. + */ + atomic_t reserved; };
/**
From: Maarten Lankhorst maarten.lankhorst@canonical.com
Export the hardware status page so others can read seqno.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
--- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 29 ++++++++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 87 ++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 42 ++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 145 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index aa308e1..d6bcfdc 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -66,12 +66,25 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, static void i915_gem_dmabuf_release(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_device *dev = obj->base.dev; + + mutex_lock(&dev->struct_mutex);
if (obj->base.export_dma_buf == dma_buf) { - /* drop the reference on the export fd holds */ obj->base.export_dma_buf = NULL; - drm_gem_object_unreference_unlocked(&obj->base); + } else { + drm_i915_private_t *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int i; + + for_each_ring(ring, dev_priv, i) + WARN_ON(ring->sync_buf == dma_buf); } + + /* drop the reference on the export fd holds */ + drm_gem_object_unreference(&obj->base); + + mutex_unlock(&dev->struct_mutex); }
static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) @@ -129,21 +142,25 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
static void *i915_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) { - return NULL; + struct drm_i915_gem_object *obj = dma_buf->priv; + return kmap_atomic(obj->pages[page_num]); }
static void i915_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) { - + kunmap_atomic(addr); } + static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) { - return NULL; + struct drm_i915_gem_object *obj = dma_buf->priv; + return kmap(obj->pages[page_num]); }
static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) { - + struct drm_i915_gem_object *obj = dma_buf->priv; + kunmap(obj->pages[page_num]); }
static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 88e2e11..245340e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,6 +33,7 @@ #include "i915_trace.h" #include "intel_drv.h" #include <linux/dma_remapping.h> +#include <linux/dma-buf-mgr.h>
struct change_domains { uint32_t invalidate_domains; @@ -556,7 +557,8 @@ err_unpin: static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, - struct list_head *objects) + struct list_head *objects, + struct list_head *prime_val) { drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_object *obj; @@ -564,6 +566,31 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; struct list_head ordered_objects;
+ list_for_each_entry(obj, objects, exec_list) { + struct dmabufmgr_validate *val; + + if (!(obj->base.import_attach || + obj->base.export_dma_buf)) + continue; + + val = kzalloc(sizeof(*val), GFP_KERNEL); + if (!val) + return -ENOMEM; + + if (obj->base.export_dma_buf) + val->bo = obj->base.export_dma_buf; + else + val->bo = obj->base.import_attach->dmabuf; + val->priv = obj; + list_add_tail(&val->head, prime_val); + } + + if (!list_empty(prime_val)) { + ret = dmabufmgr_eu_reserve_buffers(prime_val); + if (ret) + return ret; + } + INIT_LIST_HEAD(&ordered_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; @@ -712,6 +739,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, struct drm_file *file, struct intel_ring_buffer *ring, struct list_head *objects, + struct list_head *prime_val, struct eb_objects *eb, struct drm_i915_gem_exec_object2 *exec, int count) @@ -722,6 +750,16 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, int i, total, ret;
/* We may process another execbuffer during the unlock... */ + + if (!list_empty(prime_val)) + dmabufmgr_eu_backoff_reservation(prime_val); + while (!list_empty(prime_val)) { + struct dmabufmgr_validate *val; + val = list_first_entry(prime_val, typeof(*val), head); + list_del(&val->head); + kfree(val); + } + while (!list_empty(objects)) { obj = list_first_entry(objects, struct drm_i915_gem_object, @@ -786,7 +824,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, eb_add_object(eb, obj); }
- ret = i915_gem_execbuffer_reserve(ring, file, objects); + ret = i915_gem_execbuffer_reserve(ring, file, objects, prime_val); if (ret) goto err;
@@ -854,10 +892,10 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) return 0; }
- static int i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, - struct list_head *objects) + struct list_head *objects, + struct list_head *prime_val) { struct drm_i915_gem_object *obj; struct change_domains cd; @@ -941,7 +979,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain;
- obj->base.read_domains = obj->base.pending_read_domains; obj->base.write_domain = obj->base.pending_write_domain; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; @@ -1012,6 +1049,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { drm_i915_private_t *dev_priv = dev->dev_private; struct list_head objects; + struct list_head prime_val; struct eb_objects *eb; struct drm_i915_gem_object *batch_obj; struct drm_clip_rect *cliprects = NULL; @@ -1145,6 +1183,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* Look up object handles */ INIT_LIST_HEAD(&objects); + INIT_LIST_HEAD(&prime_val); for (i = 0; i < args->buffer_count; i++) { struct drm_i915_gem_object *obj;
@@ -1176,8 +1215,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_i915_gem_object, exec_list);
+ if (batch_obj->base.export_dma_buf || batch_obj->base.import_attach) { + DRM_DEBUG("Batch buffer should really not be prime..\n"); + ret = -EINVAL; + goto err; + } + /* Move the objects en-masse into the GTT, evicting if necessary. */ - ret = i915_gem_execbuffer_reserve(ring, file, &objects); + ret = i915_gem_execbuffer_reserve(ring, file, &objects, &prime_val); if (ret) goto err;
@@ -1186,8 +1231,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) { if (ret == -EFAULT) { ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, - &objects, eb, - exec, + &objects, + &prime_val, + eb, exec, args->buffer_count); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } @@ -1203,7 +1249,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
- ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); + ret = i915_gem_execbuffer_move_to_gpu(ring, &objects, &prime_val); if (ret) goto err;
@@ -1227,7 +1273,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, mode != dev_priv->relative_constants_mode) { ret = intel_ring_begin(ring, 4); if (ret) - goto err; + goto err;
intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); @@ -1248,6 +1294,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err;
+ ret = dmabufmgr_eu_wait_completed_cpu(&prime_val, true, true); + if (ret) + goto err; + trace_i915_gem_ring_dispatch(ring, seqno);
exec_start = batch_obj->gtt_offset + args->batch_start_offset; @@ -1272,8 +1322,25 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_execbuffer_move_to_active(&objects, ring, seqno); i915_gem_execbuffer_retire_commands(dev, file, ring); + if (!list_empty(&prime_val)) { + BUG_ON(!ring->sync_buf); + WARN_ON_ONCE(seqno == ring->outstanding_lazy_request); + + dmabufmgr_eu_fence_buffer_objects(ring->sync_buf, + ring->sync_seqno_ofs, + seqno, &prime_val); + } + goto out;
err: + dmabufmgr_eu_backoff_reservation(&prime_val); +out: + while (!list_empty(&prime_val)) { + struct dmabufmgr_validate *val; + val = list_first_entry(&prime_val, typeof(*val), head); + list_del(&val->head); + kfree(val); + } eb_destroy(eb); while (!list_empty(&objects)) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d42d821..24795e1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,6 +33,7 @@ #include "i915_drm.h" #include "i915_trace.h" #include "intel_drv.h" +#include <linux/dma-buf.h>
/* * 965+ support PIPE_CONTROL commands, which provide finer grained control @@ -383,6 +384,22 @@ init_pipe_control(struct intel_ring_buffer *ring) if (pc->cpu_page == NULL) goto err_unpin;
+#ifdef CONFIG_DMA_SHARED_BUFFER + if (IS_GEN5(ring->dev)) { + struct dma_buf *dmabuf; + dmabuf = i915_gem_prime_export(ring->dev, &obj->base, 0); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + kunmap(obj->pages[0]); + pc->cpu_page = NULL; + goto err_unpin; + } + drm_gem_object_reference(&obj->base); + ring->sync_buf = dmabuf; + ring->sync_seqno_ofs = 0; + } +#endif + pc->obj = obj; ring->private = pc; return 0; @@ -419,6 +436,8 @@ static int init_render_ring(struct intel_ring_buffer *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; int ret = init_ring_common(ring); + if (ret) + return ret;
if (INTEL_INFO(dev)->gen > 3) { I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); @@ -943,6 +962,14 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) if (obj == NULL) return;
+ if (ring->sync_buf) { + struct dma_buf *dmabuf; + + dmabuf = ring->sync_buf; + ring->sync_buf = NULL; + dma_buf_put(dmabuf); + } + kunmap(obj->pages[0]); i915_gem_object_unpin(obj); drm_gem_object_unreference(&obj->base); @@ -974,6 +1001,21 @@ static int init_status_page(struct intel_ring_buffer *ring) if (ring->status_page.page_addr == NULL) { goto err_unpin; } + +#ifdef CONFIG_DMA_SHARED_BUFFER + if (!IS_GEN5(ring->dev) || ring->init == init_ring_common) { + struct dma_buf *dmabuf; + dmabuf = i915_gem_prime_export(dev, &obj->base, 0); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + kunmap(obj->pages[0]); + goto err_unpin; + } + drm_gem_object_reference(&obj->base); + ring->sync_buf = dmabuf; + ring->sync_seqno_ofs = I915_GEM_HWS_INDEX * 4; + } +#endif ring->status_page.obj = obj; memset(ring->status_page.page_addr, 0, PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1d3c81f..c878b14 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -124,6 +124,9 @@ struct intel_ring_buffer { struct i915_hw_context *default_context; struct drm_i915_gem_object *last_context_obj;
+ struct dma_buf *sync_buf; + u32 sync_seqno_ofs; + void *private; };
From: Maarten Lankhorst maarten.lankhorst@canonical.com
The prime code no longer requires the bo to be backed by a gem object, and cpu access calls have been implemented. This will be needed for exporting fence bo's.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/nouveau_drv.h | 6 +- drivers/gpu/drm/nouveau/nouveau_prime.c | 106 +++++++++++++++++++++---------- 2 files changed, 79 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8613cb2..7c52eba 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -1374,11 +1374,15 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *, extern int nouveau_gem_ioctl_info(struct drm_device *, void *, struct drm_file *);
+extern int nouveau_gem_prime_export_bo(struct nouveau_bo *nvbo, int flags, + u32 size, struct dma_buf **ret); extern struct dma_buf *nouveau_gem_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags); extern struct drm_gem_object *nouveau_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); - +extern int nouveau_prime_import_bo(struct drm_device *dev, + struct dma_buf *dma_buf, + struct nouveau_bo **pnvbo, bool gem); /* nouveau_display.c */ int nouveau_display_create(struct drm_device *dev); void nouveau_display_destroy(struct drm_device *dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index a25cf2c..537154d3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -35,7 +35,8 @@ static struct sg_table *nouveau_gem_map_dma_buf(struct dma_buf_attachment *attac enum dma_data_direction dir) { struct nouveau_bo *nvbo = attachment->dmabuf->priv; - struct drm_device *dev = nvbo->gem->dev; + struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); + struct drm_device *dev = dev_priv->dev; int npages = nvbo->bo.num_pages; struct sg_table *sg; int nents; @@ -59,29 +60,37 @@ static void nouveau_gem_dmabuf_release(struct dma_buf *dma_buf) { struct nouveau_bo *nvbo = dma_buf->priv;
- if (nvbo->gem->export_dma_buf == dma_buf) { - nvbo->gem->export_dma_buf = NULL; + nouveau_bo_unpin(nvbo); + if (!nvbo->gem) + nouveau_bo_ref(NULL, &nvbo); + else { + if (nvbo->gem->export_dma_buf == dma_buf) + nvbo->gem->export_dma_buf = NULL; drm_gem_object_unreference_unlocked(nvbo->gem); } }
static void *nouveau_gem_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) { - return NULL; + struct nouveau_bo *nvbo = dma_buf->priv; + return kmap_atomic(nvbo->bo.ttm->pages[page_num]); }
static void nouveau_gem_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) { - + kunmap_atomic(addr); } + static void *nouveau_gem_kmap(struct dma_buf *dma_buf, unsigned long page_num) { - return NULL; + struct nouveau_bo *nvbo = dma_buf->priv; + return kmap(nvbo->bo.ttm->pages[page_num]); }
static void nouveau_gem_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) { - + struct nouveau_bo *nvbo = dma_buf->priv; + return kunmap(nvbo->bo.ttm->pages[page_num]); }
static int nouveau_gem_prime_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) @@ -92,7 +101,8 @@ static int nouveau_gem_prime_mmap(struct dma_buf *dma_buf, struct vm_area_struct static void *nouveau_gem_prime_vmap(struct dma_buf *dma_buf) { struct nouveau_bo *nvbo = dma_buf->priv; - struct drm_device *dev = nvbo->gem->dev; + struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); + struct drm_device *dev = dev_priv->dev; int ret;
mutex_lock(&dev->struct_mutex); @@ -116,7 +126,8 @@ out_unlock: static void nouveau_gem_prime_vunmap(struct dma_buf *dma_buf, void *vaddr) { struct nouveau_bo *nvbo = dma_buf->priv; - struct drm_device *dev = nvbo->gem->dev; + struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); + struct drm_device *dev = dev_priv->dev;
mutex_lock(&dev->struct_mutex); nvbo->vmapping_count--; @@ -140,10 +151,9 @@ static const struct dma_buf_ops nouveau_dmabuf_ops = { };
static int -nouveau_prime_new(struct drm_device *dev, - size_t size, +nouveau_prime_new(struct drm_device *dev, size_t size, struct sg_table *sg, - struct nouveau_bo **pnvbo) + struct nouveau_bo **pnvbo, bool gem) { struct nouveau_bo *nvbo; u32 flags = 0; @@ -156,12 +166,10 @@ nouveau_prime_new(struct drm_device *dev, if (ret) return ret; nvbo = *pnvbo; - - /* we restrict allowed domains on nv50+ to only the types - * that were requested at creation time. not possibly on - * earlier chips without busting the ABI. - */ nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_GART; + if (!gem) + return 0; + nvbo->gem = drm_gem_object_alloc(dev, nvbo->bo.mem.size); if (!nvbo->gem) { nouveau_bo_ref(NULL, pnvbo); @@ -172,22 +180,37 @@ nouveau_prime_new(struct drm_device *dev, return 0; }
-struct dma_buf *nouveau_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *obj, int flags) +int nouveau_gem_prime_export_bo(struct nouveau_bo *nvbo, int flags, + u32 size, struct dma_buf **buf) { - struct nouveau_bo *nvbo = nouveau_gem_object(obj); int ret = 0; + *buf = NULL;
/* pin buffer into GTT */ ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_TT); if (ret) - return ERR_PTR(-EINVAL); + return -EINVAL; + + *buf = dma_buf_export(nvbo, &nouveau_dmabuf_ops, size, flags); + if (!IS_ERR(*buf)) + return 0;
- return dma_buf_export(nvbo, &nouveau_dmabuf_ops, obj->size, flags); + nouveau_bo_unpin(nvbo); + return PTR_ERR(*buf); +} + +struct dma_buf *nouveau_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *obj, int flags) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(obj); + struct dma_buf *buf; + nouveau_gem_prime_export_bo(nvbo, flags, obj->size, &buf); + return buf; }
-struct drm_gem_object *nouveau_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) +int nouveau_prime_import_bo(struct drm_device *dev, + struct dma_buf *dma_buf, + struct nouveau_bo **pnvbo, bool gem) { struct dma_buf_attachment *attach; struct sg_table *sg; @@ -196,17 +219,22 @@ struct drm_gem_object *nouveau_gem_prime_import(struct drm_device *dev,
if (dma_buf->ops == &nouveau_dmabuf_ops) { nvbo = dma_buf->priv; - if (nvbo->gem) { + if (!gem) { + nouveau_bo_ref(nvbo, pnvbo); + return 0; + } + else if (nvbo->gem) { if (nvbo->gem->dev == dev) { drm_gem_object_reference(nvbo->gem); - return nvbo->gem; + *pnvbo = nvbo; + return 0; } } } /* need to attach */ attach = dma_buf_attach(dma_buf, dev->dev); if (IS_ERR(attach)) - return ERR_PTR(PTR_ERR(attach)); + return PTR_ERR(attach);
sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); if (IS_ERR(sg)) { @@ -214,18 +242,32 @@ struct drm_gem_object *nouveau_gem_prime_import(struct drm_device *dev, goto fail_detach; }
- ret = nouveau_prime_new(dev, dma_buf->size, sg, &nvbo); + ret = nouveau_prime_new(dev, dma_buf->size, sg, pnvbo, gem); if (ret) goto fail_unmap;
- nvbo->gem->import_attach = attach; - - return nvbo->gem; + if (gem) + (*pnvbo)->gem->import_attach = attach; + BUG_ON(attach->priv); + attach->priv = *pnvbo; + return 0;
fail_unmap: dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); fail_detach: dma_buf_detach(dma_buf, attach); - return ERR_PTR(ret); + return ret; +} + +struct drm_gem_object * +nouveau_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct nouveau_bo *nvbo = NULL; + int ret; + + ret = nouveau_prime_import_bo(dev, dma_buf, &nvbo, true); + if (ret) + return ERR_PTR(ret); + return nvbo->gem; }
From: Maarten Lankhorst maarten.lankhorst@canonical.com
This is needed to allow creation of read-only vm mappings in fence objects.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 +++--- drivers/gpu/drm/nouveau/nouveau_drv.h | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 7f80ed5..4318320 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1443,15 +1443,15 @@ nouveau_bo_vma_find(struct nouveau_bo *nvbo, struct nouveau_vm *vm) }
int -nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm, - struct nouveau_vma *vma) +nouveau_bo_vma_add_access(struct nouveau_bo *nvbo, struct nouveau_vm *vm, + struct nouveau_vma *vma, u32 access) { const u32 size = nvbo->bo.mem.num_pages << PAGE_SHIFT; struct nouveau_mem *node = nvbo->bo.mem.mm_node; int ret;
ret = nouveau_vm_get(vm, size, nvbo->page_shift, - NV_MEM_ACCESS_RW, vma); + access, vma); if (ret) return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 7c52eba..2c17989 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -1350,8 +1350,10 @@ extern int nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
extern struct nouveau_vma * nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *); -extern int nouveau_bo_vma_add(struct nouveau_bo *, struct nouveau_vm *, - struct nouveau_vma *); +#define nouveau_bo_vma_add(nvbo, vm, vma) \ + nouveau_bo_vma_add_access((nvbo), (vm), (vma), NV_MEM_ACCESS_RW) +extern int nouveau_bo_vma_add_access(struct nouveau_bo *, struct nouveau_vm *, + struct nouveau_vma *, u32 access); extern void nouveau_bo_vma_del(struct nouveau_bo *, struct nouveau_vma *);
/* nouveau_gem.c */
From: Maarten Lankhorst maarten.lankhorst@canonical.com
This can be used by nv84 and nvc0 to implement hardware fencing, earlier systems will require more thought but can fall back to software for now.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
--- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 +- drivers/gpu/drm/nouveau/nouveau_channel.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 2 +- drivers/gpu/drm/nouveau/nouveau_dma.h | 1 + drivers/gpu/drm/nouveau/nouveau_drv.h | 5 + drivers/gpu/drm/nouveau/nouveau_fence.c | 242 ++++++++++++++++++++++++++++- drivers/gpu/drm/nouveau/nouveau_fence.h | 44 +++++- drivers/gpu/drm/nouveau/nouveau_gem.c | 6 +- drivers/gpu/drm/nouveau/nouveau_prime.c | 2 + drivers/gpu/drm/nouveau/nv04_fence.c | 4 +- drivers/gpu/drm/nouveau/nv10_fence.c | 4 +- drivers/gpu/drm/nouveau/nv84_fence.c | 4 +- drivers/gpu/drm/nouveau/nvc0_fence.c | 4 +- 13 files changed, 304 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4318320..a97025a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -52,6 +52,9 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) DRM_ERROR("bo %p still attached to GEM object\n", bo);
nv10_mem_put_tile_region(dev, nvbo->tile, NULL); + + if (nvbo->fence_import_attach) + nouveau_fence_prime_del_bo(nvbo); kfree(nvbo); }
@@ -109,6 +112,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, INIT_LIST_HEAD(&nvbo->head); INIT_LIST_HEAD(&nvbo->entry); INIT_LIST_HEAD(&nvbo->vma_list); + INIT_LIST_HEAD(&nvbo->prime_chan_entries); nvbo->tile_mode = tile_mode; nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &dev_priv->ttm.bdev; @@ -480,7 +484,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan, struct nouveau_fence *fence = NULL; int ret;
- ret = nouveau_fence_new(chan, &fence); + ret = nouveau_fence_new(chan, &fence, false); if (ret) return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c index 629d8a2..85a8556 100644 --- a/drivers/gpu/drm/nouveau/nouveau_channel.c +++ b/drivers/gpu/drm/nouveau/nouveau_channel.c @@ -362,7 +362,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) struct nouveau_fence *fence = NULL; int ret;
- ret = nouveau_fence_new(chan, &fence); + ret = nouveau_fence_new(chan, &fence, false); if (!ret) { ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 69688ef..7c76776 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -466,7 +466,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, } FIRE_RING (chan);
- ret = nouveau_fence_new(chan, pfence); + ret = nouveau_fence_new(chan, pfence, false); if (ret) goto fail;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 8db68be..d02ffd3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -74,6 +74,7 @@ enum { NvEvoSema0 = 0x80000010, NvEvoSema1 = 0x80000011, NvNotify1 = 0x80000012, + NvSemaPrime = 0x8000001f,
/* G80+ display objects */ NvEvoVRAM = 0x01000000, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 2c17989..ad49594 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -126,6 +126,11 @@ struct nouveau_bo {
struct ttm_bo_kmap_obj dma_buf_vmap; int vmapping_count; + + /* fence related stuff */ + struct nouveau_bo *sync_bo; + struct list_head prime_chan_entries; + struct dma_buf_attachment *fence_import_attach; };
#define nouveau_bo_tile_layout(nvbo) \ diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 3c18049..d4c9c40 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -29,17 +29,64 @@
#include <linux/ktime.h> #include <linux/hrtimer.h> +#include <linux/dma-buf.h>
#include "nouveau_drv.h" #include "nouveau_ramht.h" #include "nouveau_fence.h" #include "nouveau_software.h" #include "nouveau_dma.h" +#include "nouveau_fifo.h" + +int nouveau_fence_prime_init(struct drm_device *dev, + struct nouveau_fence_priv *priv, u32 align) +{ + int ret = 0; +#ifdef CONFIG_DMA_SHARED_BUFFER + struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO); + u32 size = PAGE_ALIGN(pfifo->channels * align); + + mutex_init(&priv->prime_lock); + priv->prime_align = align; + ret = nouveau_bo_new(dev, size, 0, TTM_PL_FLAG_TT, + 0, 0, NULL, &priv->prime_bo); + if (ret) + return ret; + ret = nouveau_bo_map(priv->prime_bo); + if (ret) + goto err; + + ret = nouveau_gem_prime_export_bo(priv->prime_bo, 0400, size, + &priv->prime_buf); + if (ret) { + priv->prime_buf = NULL; + nouveau_bo_unmap(priv->prime_bo); + goto err; + } + return 0; + +err: + nouveau_bo_ref(NULL, &priv->prime_bo); +#endif + return ret; +} + +void nouveau_fence_prime_del(struct nouveau_fence_priv *priv) +{ + /* Our reference to prime_bo is released by freeing prime_buf */ + if (priv->prime_buf) + dma_buf_put(priv->prime_buf); + priv->prime_bo = NULL; + +}
void -nouveau_fence_context_del(struct nouveau_fence_chan *fctx) +nouveau_fence_context_del(struct drm_device *dev, + struct nouveau_fence_chan *fctx) { struct nouveau_fence *fence, *fnext; + struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE); + spin_lock(&fctx->lock); list_for_each_entry_safe(fence, fnext, &fctx->pending, head) { if (fence->work) @@ -49,6 +96,21 @@ nouveau_fence_context_del(struct nouveau_fence_chan *fctx) nouveau_fence_unref(&fence); } spin_unlock(&fctx->lock); + if (list_empty(&fctx->prime_sync_list)) + return; + + mutex_lock(&priv->prime_lock); + while (!list_empty(&fctx->prime_sync_list)) { + struct nouveau_fence_prime_bo_entry *entry; + entry = list_first_entry(&fctx->prime_sync_list, + struct nouveau_fence_prime_bo_entry, + chan_entry); + + list_del(&entry->chan_entry); + list_del(&entry->bo_entry); + kfree(entry); + } + mutex_unlock(&priv->prime_lock); }
void @@ -56,6 +118,7 @@ nouveau_fence_context_new(struct nouveau_fence_chan *fctx) { INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); + INIT_LIST_HEAD(&fctx->prime_sync_list); }
void @@ -81,7 +144,8 @@ nouveau_fence_update(struct nouveau_channel *chan) }
int -nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) +nouveau_fence_emit(struct nouveau_fence *fence, + struct nouveau_channel *chan, bool prime) { struct drm_device *dev = chan->dev; struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE); @@ -92,7 +156,7 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) fence->timeout = jiffies + (3 * DRM_HZ); fence->sequence = ++fctx->sequence;
- ret = priv->emit(fence); + ret = priv->emit(fence, prime); if (!ret) { kref_get(&fence->kref); spin_lock(&fctx->lock); @@ -165,6 +229,173 @@ nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan) return ret; }
+static int +nouveau_fence_prime_attach_sync(struct drm_device *dev, + struct nouveau_fence_priv *priv, + struct nouveau_bo *bo, + struct dma_buf *sync_buf) +{ + struct dma_buf_attachment *attach; + int ret; + + if (bo->sync_bo && + sync_buf == bo->sync_bo->fence_import_attach->dmabuf) + return 0; + + mutex_lock(&sync_buf->lock); + list_for_each_entry(attach, &sync_buf->attachments, node) { + if (attach->dev == dev->dev) { + nouveau_bo_ref(attach->priv, &bo->sync_bo); + mutex_unlock(&sync_buf->lock); + return 0; + } + } + mutex_unlock(&sync_buf->lock); + + nouveau_bo_ref(NULL, &bo->sync_bo); + get_dma_buf(sync_buf); + ret = nouveau_prime_import_bo(dev, sync_buf, &bo->sync_bo, 0); + if (ret) + dma_buf_put(sync_buf); + return ret; +} + +static int +nouveau_fence_prime_attach(struct nouveau_channel *chan, + struct nouveau_bo *bo, + struct dma_buf *sync_buf, + struct nouveau_fence_prime_bo_entry **pentry) +{ + struct nouveau_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; + struct nouveau_fence_priv *priv; + struct nouveau_fence_prime_bo_entry *entry; + struct nouveau_bo *sync; + int ret; + + /* new to chan or already existing */ + priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE); + ret = nouveau_fence_prime_attach_sync(chan->dev, priv, bo, sync_buf); + if (ret) + return ret; + + sync = bo->sync_bo; + list_for_each_entry (entry, &sync->prime_chan_entries, bo_entry) { + if (entry->chan == chan) { + *pentry = entry; + return 0; + } + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->chan = chan; + entry->bo = sync; + ret = priv->prime_add_import(entry); + if (!ret) { + list_add_tail(&entry->chan_entry, &fctx->prime_sync_list); + list_add_tail(&entry->bo_entry, &sync->prime_chan_entries); + *pentry = entry; + } else + kfree(entry); + return ret; +} + +int nouveau_fence_sync_prime(struct nouveau_channel *chan, + struct dmabufmgr_validate *val) +{ + struct drm_device *dev = chan->dev; + struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE); + struct nouveau_fence_prime_bo_entry *e = NULL; + int ret; + + if (!val->sync_buf) + return 0; + if (!priv || !priv->prime_sync || + !priv->prime_add_import || !priv->prime_del_import) + return -ENODEV; + + if (priv->prime_buf == val->sync_buf) + return priv->prime_sync(chan, val->sync_buf->priv, val->sync_ofs, val->sync_val, 0); + + mutex_lock(&priv->prime_lock); + ret = nouveau_fence_prime_attach(chan, val->priv, + val->sync_buf, &e); + if (!ret) + ret = priv->prime_sync(chan, e->bo, val->sync_ofs, + val->sync_val, e->sema_start); + mutex_unlock(&priv->prime_lock); + return ret; +} + +int nouveau_fence_prime_get(struct nouveau_fence *fence, + struct dma_buf **sync_buf, u32 *ofs, u32 *val) +{ + struct drm_device *dev = fence->channel->dev; + struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE); + + if (!priv->prime_sync) + return -ENODEV; + + get_dma_buf(priv->prime_buf); + *sync_buf = priv->prime_buf; + *ofs = priv->prime_align * fence->channel->id; + *val = fence->sequence; + return 0; +} + +static void +nouveau_fence_prime_del_import(struct nouveau_bo *nvbo) +{ + struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); + struct dma_buf_attachment *attach = nvbo->fence_import_attach; + struct nouveau_fence_priv *priv; + struct dma_buf *dma_buf; + + priv = (struct nouveau_fence_priv *)dev_priv->eng[NVOBJ_ENGINE_FENCE]; + + while (!list_empty(&nvbo->prime_chan_entries)) { + struct nouveau_fence_prime_bo_entry *entry; + entry = list_first_entry(&nvbo->prime_chan_entries, + struct nouveau_fence_prime_bo_entry, + bo_entry); + + priv->prime_del_import(entry); + list_del(&entry->chan_entry); + list_del(&entry->bo_entry); + kfree(entry); + } + + dma_buf_unmap_attachment(attach, nvbo->bo.sg, DMA_BIDIRECTIONAL); + dma_buf = attach->dmabuf; + dma_buf_detach(attach->dmabuf, attach); + dma_buf_put(dma_buf); +} + + +void nouveau_fence_prime_del_bo(struct nouveau_bo *nvbo) +{ + struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); + struct nouveau_fence_priv *priv; + priv = (struct nouveau_fence_priv *)dev_priv->eng[NVOBJ_ENGINE_FENCE]; + + BUG_ON(!priv->prime_del_import); + + /* Impossible situation: we are a sync_bo synced to another + * sync bo? + */ + BUG_ON(nvbo->sync_bo && nvbo->fence_import_attach); + + if (nvbo->sync_bo) { + mutex_lock(&priv->prime_lock); + nouveau_bo_ref(NULL, &nvbo->sync_bo); + mutex_unlock(&priv->prime_lock); + } + else if (nvbo->fence_import_attach) + nouveau_fence_prime_del_import(nvbo); +} + static void nouveau_fence_del(struct kref *kref) { @@ -188,7 +419,8 @@ nouveau_fence_ref(struct nouveau_fence *fence) }
int -nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence) +nouveau_fence_new(struct nouveau_channel *chan, + struct nouveau_fence **pfence, bool prime) { struct nouveau_fence *fence; int ret = 0; @@ -202,7 +434,7 @@ nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence) kref_init(&fence->kref);
if (chan) { - ret = nouveau_fence_emit(fence, chan); + ret = nouveau_fence_emit(fence, chan, prime); if (ret) nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 82ba733..016502e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -1,6 +1,8 @@ #ifndef __NOUVEAU_FENCE_H__ #define __NOUVEAU_FENCE_H__
+#include <linux/dma-buf-mgr.h> + struct nouveau_fence { struct list_head head; struct kref kref; @@ -13,34 +15,68 @@ struct nouveau_fence { void *priv; };
-int nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); +int nouveau_fence_new(struct nouveau_channel *, + struct nouveau_fence **, + bool prime); + struct nouveau_fence * nouveau_fence_ref(struct nouveau_fence *); void nouveau_fence_unref(struct nouveau_fence **);
-int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); +int nouveau_fence_emit(struct nouveau_fence *, + struct nouveau_channel *, bool prime); bool nouveau_fence_done(struct nouveau_fence *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *); +int nouveau_fence_sync_prime(struct nouveau_channel *, + struct dmabufmgr_validate *); void nouveau_fence_idle(struct nouveau_channel *); void nouveau_fence_update(struct nouveau_channel *); +int nouveau_fence_prime_get(struct nouveau_fence *fence, + struct dma_buf **sync_buf, u32 *ofs, u32 *val); +void nouveau_fence_prime_del_bo(struct nouveau_bo *bo);
struct nouveau_fence_chan { struct list_head pending; spinlock_t lock; u32 sequence; + struct list_head prime_sync_list; +}; + +struct nouveau_fence_prime_bo_entry { + struct list_head bo_entry; + struct list_head chan_entry; + struct nouveau_bo *bo; + struct nouveau_channel *chan; + + u64 sema_start, sema_len; + struct nouveau_vma vma; };
struct nouveau_fence_priv { struct nouveau_exec_engine engine; - int (*emit)(struct nouveau_fence *); + int (*emit)(struct nouveau_fence *, bool prime); int (*sync)(struct nouveau_fence *, struct nouveau_channel *, struct nouveau_channel *); u32 (*read)(struct nouveau_channel *); + int (*prime_sync)(struct nouveau_channel *chan, struct nouveau_bo *bo, + u32 ofs, u32 val, u64 sema_start); + int (*prime_add_import)(struct nouveau_fence_prime_bo_entry *); + void (*prime_del_import)(struct nouveau_fence_prime_bo_entry *); + + struct mutex prime_lock; + struct dma_buf *prime_buf; + struct nouveau_bo *prime_bo; + u32 prime_align; };
+int nouveau_fence_prime_init(struct drm_device *, + struct nouveau_fence_priv *, u32 align); +void nouveau_fence_prime_del(struct nouveau_fence_priv *priv); + void nouveau_fence_context_new(struct nouveau_fence_chan *); -void nouveau_fence_context_del(struct nouveau_fence_chan *); +void nouveau_fence_context_del(struct drm_device *, + struct nouveau_fence_chan *);
int nv04_fence_create(struct drm_device *dev); int nv04_fence_mthd(struct nouveau_channel *, u32, u32, u32); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 34d0bc5..11c9c2a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -55,8 +55,10 @@ nouveau_gem_object_del(struct drm_gem_object *gem) nouveau_bo_unpin(nvbo); }
- if (gem->import_attach) + if (gem->import_attach) { + nouveau_fence_prime_del_bo(nvbo); drm_prime_gem_destroy(gem, nvbo->bo.sg); + }
ttm_bo_unref(&bo);
@@ -780,7 +782,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } }
- ret = nouveau_fence_new(chan, &fence); + ret = nouveau_fence_new(chan, &fence, false); if (ret) { NV_ERROR(dev, "error fencing pushbuf: %d\n", ret); WIND_RING(chan); diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 537154d3..3b6be0e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -248,6 +248,8 @@ int nouveau_prime_import_bo(struct drm_device *dev,
if (gem) (*pnvbo)->gem->import_attach = attach; + else + (*pnvbo)->fence_import_attach = attach; BUG_ON(attach->priv); attach->priv = *pnvbo; return 0; diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index abe89db..959d072 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -38,7 +38,7 @@ struct nv04_fence_priv { };
static int -nv04_fence_emit(struct nouveau_fence *fence) +nv04_fence_emit(struct nouveau_fence *fence, bool prime) { struct nouveau_channel *chan = fence->channel; int ret = RING_SPACE(chan, 2); @@ -76,7 +76,7 @@ static void nv04_fence_context_del(struct nouveau_channel *chan, int engine) { struct nv04_fence_chan *fctx = chan->engctx[engine]; - nouveau_fence_context_del(&fctx->base); + nouveau_fence_context_del(chan->dev, &fctx->base); chan->engctx[engine] = NULL; kfree(fctx); } diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c index 8a1b750..b7742e7 100644 --- a/drivers/gpu/drm/nouveau/nv10_fence.c +++ b/drivers/gpu/drm/nouveau/nv10_fence.c @@ -40,7 +40,7 @@ struct nv10_fence_priv { };
static int -nv10_fence_emit(struct nouveau_fence *fence) +nv10_fence_emit(struct nouveau_fence *fence, bool prime) { struct nouveau_channel *chan = fence->channel; int ret = RING_SPACE(chan, 2); @@ -109,7 +109,7 @@ static void nv10_fence_context_del(struct nouveau_channel *chan, int engine) { struct nv10_fence_chan *fctx = chan->engctx[engine]; - nouveau_fence_context_del(&fctx->base); + nouveau_fence_context_del(chan->dev, &fctx->base); chan->engctx[engine] = NULL; kfree(fctx); } diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index c2f889b..b5cfbcb 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -39,7 +39,7 @@ struct nv84_fence_priv { };
static int -nv84_fence_emit(struct nouveau_fence *fence) +nv84_fence_emit(struct nouveau_fence *fence, bool prime) { struct nouveau_channel *chan = fence->channel; int ret = RING_SPACE(chan, 7); @@ -86,7 +86,7 @@ static void nv84_fence_context_del(struct nouveau_channel *chan, int engine) { struct nv84_fence_chan *fctx = chan->engctx[engine]; - nouveau_fence_context_del(&fctx->base); + nouveau_fence_context_del(chan->dev, &fctx->base); chan->engctx[engine] = NULL; kfree(fctx); } diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c index 47ab388..198e31f 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fence.c +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c @@ -40,7 +40,7 @@ struct nvc0_fence_chan { };
static int -nvc0_fence_emit(struct nouveau_fence *fence) +nvc0_fence_emit(struct nouveau_fence *fence, bool prime) { struct nouveau_channel *chan = fence->channel; struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; @@ -96,7 +96,7 @@ nvc0_fence_context_del(struct nouveau_channel *chan, int engine) struct nvc0_fence_chan *fctx = chan->engctx[engine];
nouveau_bo_vma_del(priv->bo, &fctx->vma); - nouveau_fence_context_del(&fctx->base); + nouveau_fence_context_del(chan->dev, &fctx->base); chan->engctx[engine] = NULL; kfree(fctx); }
From: Maarten Lankhorst maarten.lankhorst@canonical.com
Create a dma object for the prime semaphore and every imported sync bo.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/nv84_fence.c | 121 ++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index b5cfbcb..f739dfc 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -31,6 +31,7 @@
struct nv84_fence_chan { struct nouveau_fence_chan base; + u32 sema_start; };
struct nv84_fence_priv { @@ -42,21 +43,25 @@ static int nv84_fence_emit(struct nouveau_fence *fence, bool prime) { struct nouveau_channel *chan = fence->channel; - int ret = RING_SPACE(chan, 7); - if (ret == 0) { + int i, ret; + + ret = RING_SPACE(chan, prime ? 14 : 7); + if (ret) + return ret; + + for (i = 0; i < (prime ? 2 : 1); ++i) { BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); - OUT_RING (chan, NvSema); + OUT_RING (chan, i ? NvSemaPrime : NvSema); BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); OUT_RING (chan, upper_32_bits(chan->id * 16)); OUT_RING (chan, lower_32_bits(chan->id * 16)); OUT_RING (chan, fence->sequence); OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG); - FIRE_RING (chan); } + FIRE_RING (chan); return ret; }
- static int nv84_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *prev, struct nouveau_channel *chan) @@ -82,12 +87,94 @@ nv84_fence_read(struct nouveau_channel *chan) return nv_ro32(priv->mem, chan->id * 16); }
+static int +nv84_fence_prime_sync(struct nouveau_channel *chan, + struct nouveau_bo *bo, + u32 ofs, u32 val, u64 sema_start) +{ + struct nv84_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE); + int ret = RING_SPACE(chan, 7); + u32 sema = 0; + if (ret < 0) + return ret; + + if (bo == priv->base.prime_bo) { + sema = NvSema; + } else { + struct sg_table *sgt = bo->bo.sg; + struct scatterlist *sg; + u32 i; + sema = sema_start; + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + if (ofs < sg->offset + sg->length) { + ofs -= sg->offset; + break; + } + sema++; + } + } + + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); + OUT_RING (chan, sema); + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, 0); + OUT_RING (chan, ofs); + OUT_RING (chan, val); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL); + FIRE_RING (chan); + return ret; +} + +static void +nv84_fence_prime_del_import(struct nouveau_fence_prime_bo_entry *entry) { + u32 i; + for (i = entry->sema_start; i < entry->sema_start + entry->sema_len; ++i) + nouveau_ramht_remove(entry->chan, i); +} + +static int +nv84_fence_prime_add_import(struct nouveau_fence_prime_bo_entry *entry) { + struct sg_table *sgt = entry->bo->bo.sg; + struct nouveau_channel *chan = entry->chan; + struct nv84_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; + struct scatterlist *sg; + u32 i, sema; + int ret; + + sema = entry->sema_start = fctx->sema_start; + entry->sema_len = 0; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + struct nouveau_gpuobj *obj; + ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY, + sg_dma_address(sg), PAGE_SIZE, + NV_MEM_ACCESS_RO, + NV_MEM_TARGET_PCI, &obj); + if (ret) + goto err; + + ret = nouveau_ramht_insert(chan, sema, obj); + nouveau_gpuobj_ref(NULL, &obj); + if (ret) + goto err; + entry->sema_len++; + sema++; + } + fctx->sema_start += (entry->sema_len + 0xff) & ~0xff; + return 0; + +err: + nv84_fence_prime_del_import(entry); + return ret; +} + static void nv84_fence_context_del(struct nouveau_channel *chan, int engine) { struct nv84_fence_chan *fctx = chan->engctx[engine]; nouveau_fence_context_del(chan->dev, &fctx->base); chan->engctx[engine] = NULL; + kfree(fctx); }
@@ -104,6 +191,7 @@ nv84_fence_context_new(struct nouveau_channel *chan, int engine) return -ENOMEM;
nouveau_fence_context_new(&fctx->base); + fctx->sema_start = 0xc0000000 | (chan->id << 20);
ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY, priv->mem->vinst, priv->mem->size, @@ -112,7 +200,21 @@ nv84_fence_context_new(struct nouveau_channel *chan, int engine) if (ret == 0) { ret = nouveau_ramht_insert(chan, NvSema, obj); nouveau_gpuobj_ref(NULL, &obj); - nv_wo32(priv->mem, chan->id * 16, 0x00000000); + fctx->base.sequence = nv_ro32(priv->mem, chan->id * 16); + } + + if (priv->base.prime_bo) { + struct nouveau_mem *mem = priv->base.prime_bo->bo.mem.mm_node; + ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY, + mem->pages[0], PAGE_SIZE, + NV_MEM_ACCESS_RW, + NV_MEM_TARGET_PCI, &obj); + if (ret == 0) { + ret = nouveau_ramht_insert(chan, NvSemaPrime, obj); + nouveau_gpuobj_ref(NULL, &obj); + nouveau_bo_wr32(priv->base.prime_bo, chan->id * 4, + fctx->base.sequence); + } }
if (ret) @@ -138,6 +240,7 @@ nv84_fence_destroy(struct drm_device *dev, int engine) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv84_fence_priv *priv = nv_engine(dev, engine);
+ nouveau_fence_prime_del(&priv->base); nouveau_gpuobj_ref(NULL, &priv->mem); dev_priv->eng[engine] = NULL; kfree(priv); @@ -163,6 +266,10 @@ nv84_fence_create(struct drm_device *dev) priv->base.emit = nv84_fence_emit; priv->base.sync = nv84_fence_sync; priv->base.read = nv84_fence_read; + + priv->base.prime_sync = nv84_fence_prime_sync; + priv->base.prime_add_import = nv84_fence_prime_add_import; + priv->base.prime_del_import = nv84_fence_prime_del_import; dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
ret = nouveau_gpuobj_new(dev, NULL, 16 * pfifo->channels, @@ -170,6 +277,8 @@ nv84_fence_create(struct drm_device *dev) if (ret) goto out;
+ ret = nouveau_fence_prime_init(dev, &priv->base, 16); + out: if (ret) nv84_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
From: Maarten Lankhorst maarten.lankhorst@canonical.com
Create a read-only mapping for every imported bo, and create a prime bo in in system memory.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/nvc0_fence.c | 104 +++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c index 198e31f..dc6ccab 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fence.c +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c @@ -37,6 +37,7 @@ struct nvc0_fence_priv { struct nvc0_fence_chan { struct nouveau_fence_chan base; struct nouveau_vma vma; + struct nouveau_vma prime_vma; };
static int @@ -45,19 +46,23 @@ nvc0_fence_emit(struct nouveau_fence *fence, bool prime) struct nouveau_channel *chan = fence->channel; struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; u64 addr = fctx->vma.offset + chan->id * 16; - int ret; + int ret, i;
- ret = RING_SPACE(chan, 5); - if (ret == 0) { + ret = RING_SPACE(chan, prime ? 10 : 5); + if (ret) + return ret; + + for (i = 0; i < (prime ? 2 : 1); ++i) { + if (i) + addr = fctx->prime_vma.offset + chan->id * 16; BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); OUT_RING (chan, upper_32_bits(addr)); OUT_RING (chan, lower_32_bits(addr)); OUT_RING (chan, fence->sequence); OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG); - FIRE_RING (chan); } - - return ret; + FIRE_RING(chan); + return 0; }
static int @@ -95,6 +100,8 @@ nvc0_fence_context_del(struct nouveau_channel *chan, int engine) struct nvc0_fence_priv *priv = nv_engine(chan->dev, engine); struct nvc0_fence_chan *fctx = chan->engctx[engine];
+ if (priv->base.prime_bo) + nouveau_bo_vma_del(priv->base.prime_bo, &fctx->prime_vma); nouveau_bo_vma_del(priv->bo, &fctx->vma); nouveau_fence_context_del(chan->dev, &fctx->base); chan->engctx[engine] = NULL; @@ -115,10 +122,16 @@ nvc0_fence_context_new(struct nouveau_channel *chan, int engine) nouveau_fence_context_new(&fctx->base);
ret = nouveau_bo_vma_add(priv->bo, chan->vm, &fctx->vma); + if (!ret && priv->base.prime_bo) + ret = nouveau_bo_vma_add(priv->base.prime_bo, chan->vm, + &fctx->prime_vma); if (ret) nvc0_fence_context_del(chan, engine);
- nouveau_bo_wr32(priv->bo, chan->id * 16/4, 0x00000000); + fctx->base.sequence = nouveau_bo_rd32(priv->bo, chan->id * 16/4); + if (priv->base.prime_bo) + nouveau_bo_wr32(priv->base.prime_bo, chan->id * 16/4, + fctx->base.sequence); return ret; }
@@ -140,12 +153,55 @@ nvc0_fence_destroy(struct drm_device *dev, int engine) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nvc0_fence_priv *priv = nv_engine(dev, engine);
+ nouveau_fence_prime_del(&priv->base); nouveau_bo_unmap(priv->bo); + nouveau_bo_unpin(priv->bo); nouveau_bo_ref(NULL, &priv->bo); dev_priv->eng[engine] = NULL; kfree(priv); }
+static int +nvc0_fence_prime_sync(struct nouveau_channel *chan, + struct nouveau_bo *bo, + u32 ofs, u32 val, u64 sema_start) +{ + struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; + struct nvc0_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE); + int ret = RING_SPACE(chan, 5); + if (ret) + return ret; + + if (bo == priv->base.prime_bo) + sema_start = fctx->prime_vma.offset; + else + NV_ERROR(chan->dev, "syncing with %08Lx + %08x >= %08x\n", + sema_start, ofs, val); + sema_start += ofs; + + BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(sema_start)); + OUT_RING (chan, lower_32_bits(sema_start)); + OUT_RING (chan, val); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL | + NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD); + FIRE_RING (chan); + return ret; +} + +static void +nvc0_fence_prime_del_import(struct nouveau_fence_prime_bo_entry *entry) { + nouveau_bo_vma_del(entry->bo, &entry->vma); +} + +static int +nvc0_fence_prime_add_import(struct nouveau_fence_prime_bo_entry *entry) { + int ret = nouveau_bo_vma_add_access(entry->bo, entry->chan->vm, + &entry->vma, NV_MEM_ACCESS_RO); + entry->sema_start = entry->vma.offset; + return ret; +} + int nvc0_fence_create(struct drm_device *dev) { @@ -168,17 +224,35 @@ nvc0_fence_create(struct drm_device *dev) priv->base.read = nvc0_fence_read; dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+ priv->base.prime_sync = nvc0_fence_prime_sync; + priv->base.prime_add_import = nvc0_fence_prime_add_import; + priv->base.prime_del_import = nvc0_fence_prime_del_import; + ret = nouveau_bo_new(dev, 16 * pfifo->channels, 0, TTM_PL_FLAG_VRAM, 0, 0, NULL, &priv->bo); - if (ret == 0) { - ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM); - if (ret == 0) - ret = nouveau_bo_map(priv->bo); - if (ret) - nouveau_bo_ref(NULL, &priv->bo); - } + if (ret) + goto err; + ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM); + if (ret) + goto err_ref;
+ ret = nouveau_bo_map(priv->bo); if (ret) - nvc0_fence_destroy(dev, NVOBJ_ENGINE_FENCE); + goto err_unpin; + + ret = nouveau_fence_prime_init(dev, &priv->base, 16); + if (ret) + goto err_unmap; + return 0; + +err_unmap: + nouveau_bo_unmap(priv->bo); +err_unpin: + nouveau_bo_unpin(priv->bo); +err_ref: + nouveau_bo_ref(NULL, &priv->bo); +err: + dev_priv->eng[NVOBJ_ENGINE_FENCE] = NULL; + kfree(priv); return ret; }
From: Maarten Lankhorst maarten.lankhorst@canonical.com
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/nouveau_gem.c | 121 +++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 11c9c2a..e5d36bb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -31,6 +31,7 @@ #include "nouveau_drm.h" #include "nouveau_dma.h" #include "nouveau_fence.h" +#include <linux/dma-buf-mgr.h>
#define nouveau_gem_pushbuf_sync(chan) 0
@@ -277,6 +278,7 @@ struct validate_op { struct list_head vram_list; struct list_head gart_list; struct list_head both_list; + struct list_head prime_list; };
static void @@ -305,9 +307,36 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence) static void validate_fini(struct validate_op *op, struct nouveau_fence* fence) { + struct list_head *entry, *tmp; + struct nouveau_bo *nvbo; + struct dma_buf *sync_buf; + u32 ofs, val; + validate_fini_list(&op->vram_list, fence); validate_fini_list(&op->gart_list, fence); validate_fini_list(&op->both_list, fence); + + if (list_empty(&op->prime_list)) + return; + + if (fence && + !nouveau_fence_prime_get(fence, &sync_buf, &ofs, &val)) { + dmabufmgr_eu_fence_buffer_objects(sync_buf, ofs, val, + &op->prime_list); + dma_buf_put(sync_buf); + } else + dmabufmgr_eu_backoff_reservation(&op->prime_list); + + list_for_each_safe(entry, tmp, &op->prime_list) { + struct dmabufmgr_validate *val; + val = list_entry(entry, struct dmabufmgr_validate, head); + nvbo = val->priv; + + list_del(&val->head); + nvbo->reserved_by = NULL; + drm_gem_object_unreference_unlocked(nvbo->gem); + kfree(val); + } }
static int @@ -319,9 +348,9 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv, struct drm_nouveau_private *dev_priv = dev->dev_private; uint32_t sequence; int trycnt = 0; - int ret, i; + int i;
- sequence = atomic_add_return(1, &dev_priv->ttm.validate_sequence); + sequence = atomic_inc_return(&dev_priv->ttm.validate_sequence); retry: if (++trycnt > 100000) { NV_ERROR(dev, "%s failed and gave up.\n", __func__); @@ -332,6 +361,8 @@ retry: struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[i]; struct drm_gem_object *gem; struct nouveau_bo *nvbo; + int ret = 0, is_prime; + struct dmabufmgr_validate *validate = NULL;
gem = drm_gem_object_lookup(dev, file_priv, b->handle); if (!gem) { @@ -340,6 +371,7 @@ retry: return -ENOENT; } nvbo = gem->driver_private; + is_prime = gem->export_dma_buf || gem->import_attach;
if (nvbo->reserved_by && nvbo->reserved_by == file_priv) { NV_ERROR(dev, "multiple instances of buffer %d on " @@ -349,7 +381,21 @@ retry: return -EINVAL; }
- ret = ttm_bo_reserve(&nvbo->bo, true, false, true, sequence); + if (likely(!is_prime)) + ret = ttm_bo_reserve(&nvbo->bo, true, false, + true, sequence); + else { + validate = kzalloc(sizeof(*validate), GFP_KERNEL); + if (validate) { + if (gem->import_attach) + validate->bo = + gem->import_attach->dmabuf; + else + validate->bo = gem->export_dma_buf; + validate->priv = nvbo; + } else + ret = -ENOMEM; + } if (ret) { validate_fini(op, NULL); if (unlikely(ret == -EAGAIN)) @@ -366,6 +412,9 @@ retry: b->user_priv = (uint64_t)(unsigned long)nvbo; nvbo->reserved_by = file_priv; nvbo->pbbo_index = i; + if (is_prime) { + list_add_tail(&validate->head, &op->prime_list); + } else if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) && (b->valid_domains & NOUVEAU_GEM_DOMAIN_GART)) list_add_tail(&nvbo->entry, &op->both_list); @@ -473,6 +522,60 @@ validate_list(struct nouveau_channel *chan, struct list_head *list, }
static int +validate_prime(struct nouveau_channel *chan, struct list_head *list, + struct drm_nouveau_gem_pushbuf_bo *pbbo, uint64_t user_pbbo_ptr) +{ + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; + struct drm_nouveau_gem_pushbuf_bo __user *upbbo = + (void __force __user *)(uintptr_t)user_pbbo_ptr; + struct drm_device *dev = chan->dev; + struct dmabufmgr_validate *validate; + int ret, relocs = 0; + bool cpu_validate = false; + + ret = dmabufmgr_eu_reserve_buffers(list); + if (ret < 0) { + if (ret != -ERESTARTSYS) + NV_ERROR(dev, "failed to reserve prime: %d\n", ret); + return ret; + } + + list_for_each_entry(validate, list, head) { + struct nouveau_bo *nvbo = validate->priv; + struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[nvbo->pbbo_index]; + + if (!cpu_validate) + ret = nouveau_fence_sync_prime(chan, validate); + if (unlikely(ret == -ENODEV)) { + ret = dmabufmgr_eu_wait_completed_cpu(list, 1, 1); + cpu_validate = true; + } + if (unlikely(ret)) { + if (ret != -ERESTARTSYS) + NV_ERROR(dev, "failed prime sync: %d\n", ret); + return ret; + } + + if (dev_priv->card_type < NV_50) { + if (nvbo->bo.offset == b->presumed.offset && + b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART) + continue; + + b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART; + b->presumed.offset = nvbo->bo.offset; + b->presumed.valid = 0; + relocs++; + + if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed, + &b->presumed, sizeof(b->presumed))) + return -EFAULT; + } + } + + return relocs; +} + +static int nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, struct drm_file *file_priv, struct drm_nouveau_gem_pushbuf_bo *pbbo, @@ -485,6 +588,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, INIT_LIST_HEAD(&op->vram_list); INIT_LIST_HEAD(&op->gart_list); INIT_LIST_HEAD(&op->both_list); + INIT_LIST_HEAD(&op->prime_list);
if (nr_buffers == 0) return 0; @@ -523,6 +627,13 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, } relocs += ret;
+ ret = validate_prime(chan, &op->prime_list, pbbo, user_buffers); + if (unlikely(ret < 0)) { + validate_fini(op, NULL); + return ret; + } + relocs += ret; + *apply_relocs = relocs; return 0; } @@ -782,11 +893,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } }
- ret = nouveau_fence_new(chan, &fence, false); + ret = nouveau_fence_new(chan, &fence, !list_empty(&op.prime_list)); if (ret) { NV_ERROR(dev, "error fencing pushbuf: %d\n", ret); WIND_RING(chan); - goto out; + nouveau_fence_unref(&fence); }
out:
linaro-mm-sig@lists.linaro.org