Hi guys,
we are currently working an Freesync and direct scan out from system
memory on AMD APUs in A+A laptops.
On problem we stumbled over is that our display hardware needs to scan
out from uncached system memory and we currently don't have a way to
communicate that through DMA-buf.
For our specific use case at hand we are going to implement something
driver specific, but the question is should we have something more
generic for this?
After all the system memory access pattern is a PCIe extension and as
such something generic.
Regards,
Christian.
From: Michel Dänzer <mdaenzer(a)redhat.com>
This makes sure we don't hit the
BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
in dma_buf_release, which could be triggered by user space closing the
dma-buf file description while there are outstanding fence callbacks
from dma_buf_poll.
Cc: stable(a)vger.kernel.org
Signed-off-by: Michel Dänzer <mdaenzer(a)redhat.com>
---
drivers/dma-buf/dma-buf.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 6c520c9bd93c..ec25498a971f 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -65,12 +65,9 @@ static void dma_buf_release(struct dentry *dentry)
BUG_ON(dmabuf->vmapping_counter);
/*
- * Any fences that a dma-buf poll can wait on should be signaled
- * before releasing dma-buf. This is the responsibility of each
- * driver that uses the reservation objects.
- *
- * If you hit this BUG() it means someone dropped their ref to the
- * dma-buf while still having pending operation to the buffer.
+ * If you hit this BUG() it could mean:
+ * * There's a file reference imbalance in dma_buf_poll / dma_buf_poll_cb or somewhere else
+ * * dmabuf->cb_in/out.active are non-0 despite no pending fence callback
*/
BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
@@ -196,6 +193,7 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb;
+ struct dma_buf *dmabuf = container_of(dcb->poll, struct dma_buf, poll);
unsigned long flags;
spin_lock_irqsave(&dcb->poll->lock, flags);
@@ -203,6 +201,8 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
dcb->active = 0;
spin_unlock_irqrestore(&dcb->poll->lock, flags);
dma_fence_put(fence);
+ /* Paired with get_file in dma_buf_poll */
+ fput(dmabuf->file);
}
static bool dma_buf_poll_shared(struct dma_resv *resv,
@@ -278,6 +278,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
spin_unlock_irq(&dmabuf->poll.lock);
if (events & EPOLLOUT) {
+ /* Paired with fput in dma_buf_poll_cb */
+ get_file(dmabuf->file);
+
if (!dma_buf_poll_shared(resv, dcb) &&
!dma_buf_poll_excl(resv, dcb))
/* No callback queued, wake up any other waiters */
@@ -299,6 +302,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
spin_unlock_irq(&dmabuf->poll.lock);
if (events & EPOLLIN) {
+ /* Paired with fput in dma_buf_poll_cb */
+ get_file(dmabuf->file);
+
if (!dma_buf_poll_excl(resv, dcb))
/* No callback queued, wake up any other waiters */
dma_buf_poll_cb(NULL, &dcb->cb);
--
2.32.0
This patch limits the size of total memory that can be requested in a
single allocation from the system heap. This would prevent a
buggy/malicious client from depleting system memory by requesting for an
extremely large allocation which might destabilize the system.
The limit is set to half the size of the device's total RAM which is the
same as what was set by the deprecated ION system heap.
Signed-off-by: Hridya Valsaraju <hridya(a)google.com>
---
drivers/dma-buf/heaps/system_heap.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
index b7fbce66bcc0..099f5a8304b4 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -371,6 +371,12 @@ static struct dma_buf *system_heap_do_allocate(struct dma_heap *heap,
struct page *page, *tmp_page;
int i, ret = -ENOMEM;
+ if (len / PAGE_SIZE > totalram_pages() / 2) {
+ pr_err("pid %d requested too large an allocation(size %lu) from system heap\n",
+ current->pid, len);
+ return ERR_PTR(ret);
+ }
+
buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
if (!buffer)
return ERR_PTR(-ENOMEM);
--
2.32.0.432.gabb21c7263-goog
Entirely unused.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/dma-buf/Makefile | 2 +-
drivers/dma-buf/seqno-fence.c | 71 ----------------------
include/linux/seqno-fence.h | 109 ----------------------------------
3 files changed, 1 insertion(+), 181 deletions(-)
delete mode 100644 drivers/dma-buf/seqno-fence.c
delete mode 100644 include/linux/seqno-fence.h
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 40d81f23cacf..1ef021273a06 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
- dma-resv.o seqno-fence.o
+ dma-resv.o
obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o
obj-$(CONFIG_DMABUF_HEAPS) += heaps/
obj-$(CONFIG_SYNC_FILE) += sync_file.o
diff --git a/drivers/dma-buf/seqno-fence.c b/drivers/dma-buf/seqno-fence.c
deleted file mode 100644
index bfe14e94c488..000000000000
--- a/drivers/dma-buf/seqno-fence.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * seqno-fence, using a dma-buf to synchronize fencing
- *
- * Copyright (C) 2012 Texas Instruments
- * Copyright (C) 2012-2014 Canonical Ltd
- * Authors:
- * Rob Clark <robdclark(a)gmail.com>
- * Maarten Lankhorst <maarten.lankhorst(a)canonical.com>
- */
-
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/seqno-fence.h>
-
-static const char *seqno_fence_get_driver_name(struct dma_fence *fence)
-{
- struct seqno_fence *seqno_fence = to_seqno_fence(fence);
-
- return seqno_fence->ops->get_driver_name(fence);
-}
-
-static const char *seqno_fence_get_timeline_name(struct dma_fence *fence)
-{
- struct seqno_fence *seqno_fence = to_seqno_fence(fence);
-
- return seqno_fence->ops->get_timeline_name(fence);
-}
-
-static bool seqno_enable_signaling(struct dma_fence *fence)
-{
- struct seqno_fence *seqno_fence = to_seqno_fence(fence);
-
- return seqno_fence->ops->enable_signaling(fence);
-}
-
-static bool seqno_signaled(struct dma_fence *fence)
-{
- struct seqno_fence *seqno_fence = to_seqno_fence(fence);
-
- return seqno_fence->ops->signaled && seqno_fence->ops->signaled(fence);
-}
-
-static void seqno_release(struct dma_fence *fence)
-{
- struct seqno_fence *f = to_seqno_fence(fence);
-
- dma_buf_put(f->sync_buf);
- if (f->ops->release)
- f->ops->release(fence);
- else
- dma_fence_free(&f->base);
-}
-
-static signed long seqno_wait(struct dma_fence *fence, bool intr,
- signed long timeout)
-{
- struct seqno_fence *f = to_seqno_fence(fence);
-
- return f->ops->wait(fence, intr, timeout);
-}
-
-const struct dma_fence_ops seqno_fence_ops = {
- .get_driver_name = seqno_fence_get_driver_name,
- .get_timeline_name = seqno_fence_get_timeline_name,
- .enable_signaling = seqno_enable_signaling,
- .signaled = seqno_signaled,
- .wait = seqno_wait,
- .release = seqno_release,
-};
-EXPORT_SYMBOL(seqno_fence_ops);
diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h
deleted file mode 100644
index 3cca2b8fac43..000000000000
--- a/include/linux/seqno-fence.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * seqno-fence, using a dma-buf to synchronize fencing
- *
- * Copyright (C) 2012 Texas Instruments
- * Copyright (C) 2012 Canonical Ltd
- * Authors:
- * Rob Clark <robdclark(a)gmail.com>
- * Maarten Lankhorst <maarten.lankhorst(a)canonical.com>
- */
-
-#ifndef __LINUX_SEQNO_FENCE_H
-#define __LINUX_SEQNO_FENCE_H
-
-#include <linux/dma-fence.h>
-#include <linux/dma-buf.h>
-
-enum seqno_fence_condition {
- SEQNO_FENCE_WAIT_GEQUAL,
- SEQNO_FENCE_WAIT_NONZERO
-};
-
-struct seqno_fence {
- struct dma_fence base;
-
- const struct dma_fence_ops *ops;
- struct dma_buf *sync_buf;
- uint32_t seqno_ofs;
- enum seqno_fence_condition condition;
-};
-
-extern const struct dma_fence_ops seqno_fence_ops;
-
-/**
- * to_seqno_fence - cast a fence to a seqno_fence
- * @fence: fence to cast to a seqno_fence
- *
- * Returns NULL if the fence is not a seqno_fence,
- * or the seqno_fence otherwise.
- */
-static inline struct seqno_fence *
-to_seqno_fence(struct dma_fence *fence)
-{
- if (fence->ops != &seqno_fence_ops)
- return NULL;
- return container_of(fence, struct seqno_fence, base);
-}
-
-/**
- * seqno_fence_init - initialize a seqno fence
- * @fence: seqno_fence to initialize
- * @lock: pointer to spinlock to use for fence
- * @sync_buf: buffer containing the memory location to signal on
- * @context: the execution context this fence is a part of
- * @seqno_ofs: the offset within @sync_buf
- * @seqno: the sequence # to signal on
- * @cond: fence wait condition
- * @ops: the fence_ops for operations on this seqno fence
- *
- * This function initializes a struct seqno_fence with passed parameters,
- * and takes a reference on sync_buf which is released on fence destruction.
- *
- * A seqno_fence is a dma_fence which can complete in software when
- * enable_signaling is called, but it also completes when
- * (s32)((sync_buf)[seqno_ofs] - seqno) >= 0 is true
- *
- * The seqno_fence will take a refcount on the sync_buf until it's
- * destroyed, but actual lifetime of sync_buf may be longer if one of the
- * callers take a reference to it.
- *
- * Certain hardware have instructions to insert this type of wait condition
- * in the command stream, so no intervention from software would be needed.
- * This type of fence can be destroyed before completed, however a reference
- * on the sync_buf dma-buf can be taken. It is encouraged to re-use the same
- * dma-buf for sync_buf, since mapping or unmapping the sync_buf to the
- * device's vm can be expensive.
- *
- * It is recommended for creators of seqno_fence to call dma_fence_signal()
- * before destruction. This will prevent possible issues from wraparound at
- * time of issue vs time of check, since users can check dma_fence_is_signaled()
- * before submitting instructions for the hardware to wait on the fence.
- * However, when ops.enable_signaling is not called, it doesn't have to be
- * done as soon as possible, just before there's any real danger of seqno
- * wraparound.
- */
-static inline void
-seqno_fence_init(struct seqno_fence *fence, spinlock_t *lock,
- struct dma_buf *sync_buf, uint32_t context,
- uint32_t seqno_ofs, uint32_t seqno,
- enum seqno_fence_condition cond,
- const struct dma_fence_ops *ops)
-{
- BUG_ON(!fence || !sync_buf || !ops);
- BUG_ON(!ops->wait || !ops->enable_signaling ||
- !ops->get_driver_name || !ops->get_timeline_name);
-
- /*
- * ops is used in dma_fence_init for get_driver_name, so needs to be
- * initialized first
- */
- fence->ops = ops;
- dma_fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno);
- get_dma_buf(sync_buf);
- fence->sync_buf = sync_buf;
- fence->seqno_ofs = seqno_ofs;
- fence->condition = cond;
-}
-
-#endif /* __LINUX_SEQNO_FENCE_H */
--
2.25.1
From: Rob Clark <robdclark(a)chromium.org>
Based on discussion from a previous series[1] to add a "boost" mechanism
when, for example, vblank deadlines are missed. Instead of a boost
callback, this approach adds a way to set a deadline on the fence, by
which the waiter would like to see the fence signalled.
I've not yet had a chance to re-work the drm/msm part of this, but
wanted to send this out as an RFC in case I don't have a chance to
finish the drm/msm part this week.
Original description:
In some cases, like double-buffered rendering, missing vblanks can
trick the GPU into running at a lower frequence, when really we
want to be running at a higher frequency to not miss the vblanks
in the first place.
This is partially inspired by a trick i915 does, but implemented
via dma-fence for a couple of reasons:
1) To continue to be able to use the atomic helpers
2) To support cases where display and gpu are different drivers
[1] https://patchwork.freedesktop.org/series/90331/
Rob Clark (4):
dma-fence: Add deadline awareness
drm/vblank: Add helper to get next vblank time
drm/atomic-helper: Set fence deadline for vblank
drm/scheduler: Add fence deadline support
drivers/dma-buf/dma-fence.c | 39 +++++++++++++++++++++++++
drivers/gpu/drm/drm_atomic_helper.c | 36 +++++++++++++++++++++++
drivers/gpu/drm/drm_vblank.c | 31 ++++++++++++++++++++
drivers/gpu/drm/scheduler/sched_fence.c | 10 +++++++
drivers/gpu/drm/scheduler/sched_main.c | 3 ++
include/drm/drm_vblank.h | 1 +
include/linux/dma-fence.h | 17 +++++++++++
7 files changed, 137 insertions(+)
--
2.31.1
Am 27.07.21 um 17:37 schrieb Rob Clark:
> On Tue, Jul 27, 2021 at 8:19 AM Michel Dänzer <michel(a)daenzer.net> wrote:
>> On 2021-07-27 5:12 p.m., Rob Clark wrote:
>>> On Tue, Jul 27, 2021 at 7:50 AM Michel Dänzer <michel(a)daenzer.net> wrote:
>>>> On 2021-07-27 1:38 a.m., Rob Clark wrote:
>>>>> From: Rob Clark <robdclark(a)chromium.org>
>>>>>
>>>>> Based on discussion from a previous series[1] to add a "boost" mechanism
>>>>> when, for example, vblank deadlines are missed. Instead of a boost
>>>>> callback, this approach adds a way to set a deadline on the fence, by
>>>>> which the waiter would like to see the fence signalled.
>>>>>
>>>>> I've not yet had a chance to re-work the drm/msm part of this, but
>>>>> wanted to send this out as an RFC in case I don't have a chance to
>>>>> finish the drm/msm part this week.
>>>>>
>>>>> Original description:
>>>>>
>>>>> In some cases, like double-buffered rendering, missing vblanks can
>>>>> trick the GPU into running at a lower frequence, when really we
>>>>> want to be running at a higher frequency to not miss the vblanks
>>>>> in the first place.
>>>>>
>>>>> This is partially inspired by a trick i915 does, but implemented
>>>>> via dma-fence for a couple of reasons:
>>>>>
>>>>> 1) To continue to be able to use the atomic helpers
>>>>> 2) To support cases where display and gpu are different drivers
>>>>>
>>>>> [1] https://patchwork.freedesktop.org/series/90331/
>>>> Unfortunately, none of these approaches will have the full intended effect once Wayland compositors start waiting for client buffers to become idle before using them for an output frame (to prevent output frames from getting delayed by client work). See https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/1880 (shameless plug :) for a proof of concept of this for mutter. The boost will only affect the compositor's own GPU work, not the client work (which means no effect at all for fullscreen apps where the compositor can scan out the client buffers directly).
>>>>
>>> I guess you mean "no effect at all *except* for fullscreen..."?
>> I meant what I wrote: The compositor will wait for the next buffer to become idle, so there's no boost from this mechanism for the client drawing to that buffer. And since the compositor does no drawing of its own in this case, there's no boost from that either.
>>
>>
>>> I'd perhaps recommend that wayland compositors, in cases where only a
>>> single layer is changing, not try to be clever and just push the
>>> update down to the kernel.
>> Even just for the fullscreen direct scanout case, that would require some kind of atomic KMS API extension to allow queuing multiple page flips for the same CRTC.
>>
>> For other cases, this would also require a mechanism to cancel a pending atomic commit, for when another surface update comes in before the compositor's deadline, which affects the previously single updating surface as well.
>>
> Well, in the end, there is more than one compositor out there.. and if
> some wayland compositors are going this route, they can also implement
> the same mechanism in userspace using the sysfs that devfreq exports.
>
> But it sounds simpler to me for the compositor to have a sort of "game
> mode" for fullscreen games.. I'm less worried about UI interactive
> workloads, boosting the GPU freq upon sudden activity after a period
> of inactivity seems to work reasonably well there.
At least AMD hardware is already capable of flipping frames on GPU
events like finishing rendering (or uploading etc).
By waiting in userspace on the CPU before send the frame to the hardware
you are completely killing of such features.
For composing use cases that makes sense, but certainly not for full
screen applications as far as I can see.
Regards,
Christian.
>
> BR,
> -R
>
>> --
>> Earthling Michel Dänzer | https://redhat.com
>> Libre software enthusiast | Mesa and X developer
Instead of just a callback we can just glue in the gem helpers that
panfrost, v3d and lima currently use. There's really not that many
ways to skin this cat.
On the naming bikeshed: The idea for using _await_ to denote adding
dependencies to a job comes from i915, where that's used quite
extensively all over the place, in lots of datastructures.
v2/3: Rebased.
Reviewed-by: Steven Price <steven.price(a)arm.com> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
Cc: David Airlie <airlied(a)linux.ie>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: Andrey Grodzovsky <andrey.grodzovsky(a)amd.com>
Cc: Lee Jones <lee.jones(a)linaro.org>
Cc: Nirmoy Das <nirmoy.aiemd(a)gmail.com>
Cc: Boris Brezillon <boris.brezillon(a)collabora.com>
Cc: Luben Tuikov <luben.tuikov(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Jack Zhang <Jack.Zhang1(a)amd.com>
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
---
drivers/gpu/drm/scheduler/sched_entity.c | 18 +++-
drivers/gpu/drm/scheduler/sched_main.c | 103 +++++++++++++++++++++++
include/drm/gpu_scheduler.h | 31 ++++++-
3 files changed, 146 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 89e3f6eaf519..381fbf462ea7 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -211,6 +211,19 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
job->sched->ops->free_job(job);
}
+static struct dma_fence *
+drm_sched_job_dependency(struct drm_sched_job *job,
+ struct drm_sched_entity *entity)
+{
+ if (!xa_empty(&job->dependencies))
+ return xa_erase(&job->dependencies, job->last_dependency++);
+
+ if (job->sched->ops->dependency)
+ return job->sched->ops->dependency(job, entity);
+
+ return NULL;
+}
+
/**
* drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed
*
@@ -229,7 +242,7 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
struct drm_sched_fence *s_fence = job->s_fence;
/* Wait for all dependencies to avoid data corruptions */
- while ((f = job->sched->ops->dependency(job, entity)))
+ while ((f = drm_sched_job_dependency(job, entity)))
dma_fence_wait(f, false);
drm_sched_fence_scheduled(s_fence);
@@ -419,7 +432,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
*/
struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
{
- struct drm_gpu_scheduler *sched = entity->rq->sched;
struct drm_sched_job *sched_job;
sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
@@ -427,7 +439,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
return NULL;
while ((entity->dependency =
- sched->ops->dependency(sched_job, entity))) {
+ drm_sched_job_dependency(sched_job, entity))) {
trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
if (drm_sched_entity_add_dependency_cb(entity))
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 454cb6164bdc..84c30badb78e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -603,6 +603,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
INIT_LIST_HEAD(&job->list);
+ xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
+
return 0;
}
EXPORT_SYMBOL(drm_sched_job_init);
@@ -637,6 +639,98 @@ void drm_sched_job_arm(struct drm_sched_job *job)
}
EXPORT_SYMBOL(drm_sched_job_arm);
+/**
+ * drm_sched_job_await_fence - adds the fence as a job dependency
+ * @job: scheduler job to add the dependencies to
+ * @fence: the dma_fence to add to the list of dependencies.
+ *
+ * Note that @fence is consumed in both the success and error cases.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_await_fence(struct drm_sched_job *job,
+ struct dma_fence *fence)
+{
+ struct dma_fence *entry;
+ unsigned long index;
+ u32 id = 0;
+ int ret;
+
+ if (!fence)
+ return 0;
+
+ /* Deduplicate if we already depend on a fence from the same context.
+ * This lets the size of the array of deps scale with the number of
+ * engines involved, rather than the number of BOs.
+ */
+ xa_for_each(&job->dependencies, index, entry) {
+ if (entry->context != fence->context)
+ continue;
+
+ if (dma_fence_is_later(fence, entry)) {
+ dma_fence_put(entry);
+ xa_store(&job->dependencies, index, fence, GFP_KERNEL);
+ } else {
+ dma_fence_put(fence);
+ }
+ return 0;
+ }
+
+ ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
+ if (ret != 0)
+ dma_fence_put(fence);
+
+ return ret;
+}
+EXPORT_SYMBOL(drm_sched_job_await_fence);
+
+/**
+ * drm_sched_job_await_implicit - adds implicit dependencies as job dependencies
+ * @job: scheduler job to add the dependencies to
+ * @obj: the gem object to add new dependencies from.
+ * @write: whether the job might write the object (so we need to depend on
+ * shared fences in the reservation object).
+ *
+ * This should be called after drm_gem_lock_reservations() on your array of
+ * GEM objects used in the job but before updating the reservations with your
+ * own fences.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_await_implicit(struct drm_sched_job *job,
+ struct drm_gem_object *obj,
+ bool write)
+{
+ int ret;
+ struct dma_fence **fences;
+ unsigned int i, fence_count;
+
+ if (!write) {
+ struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv);
+
+ return drm_sched_job_await_fence(job, fence);
+ }
+
+ ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences);
+ if (ret || !fence_count)
+ return ret;
+
+ for (i = 0; i < fence_count; i++) {
+ ret = drm_sched_job_await_fence(job, fences[i]);
+ if (ret)
+ break;
+ }
+
+ for (; i < fence_count; i++)
+ dma_fence_put(fences[i]);
+ kfree(fences);
+ return ret;
+}
+EXPORT_SYMBOL(drm_sched_job_await_implicit);
+
+
/**
* drm_sched_job_cleanup - clean up scheduler job resources
* @job: scheduler job to clean up
@@ -652,6 +746,9 @@ EXPORT_SYMBOL(drm_sched_job_arm);
*/
void drm_sched_job_cleanup(struct drm_sched_job *job)
{
+ struct dma_fence *fence;
+ unsigned long index;
+
if (kref_read(&job->s_fence->finished.refcount)) {
/* drm_sched_job_arm() has been called */
dma_fence_put(&job->s_fence->finished);
@@ -661,6 +758,12 @@ void drm_sched_job_cleanup(struct drm_sched_job *job)
}
job->s_fence = NULL;
+
+ xa_for_each(&job->dependencies, index, fence) {
+ dma_fence_put(fence);
+ }
+ xa_destroy(&job->dependencies);
+
}
EXPORT_SYMBOL(drm_sched_job_cleanup);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 83afc3aa8e2f..74fb321dbc44 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -27,9 +27,12 @@
#include <drm/spsc_queue.h>
#include <linux/dma-fence.h>
#include <linux/completion.h>
+#include <linux/xarray.h>
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
+struct drm_gem_object;
+
struct drm_gpu_scheduler;
struct drm_sched_rq;
@@ -198,6 +201,16 @@ struct drm_sched_job {
enum drm_sched_priority s_priority;
struct drm_sched_entity *entity;
struct dma_fence_cb cb;
+ /**
+ * @dependencies:
+ *
+ * Contains the dependencies as struct dma_fence for this job, see
+ * drm_sched_job_await_fence() and drm_sched_job_await_implicit().
+ */
+ struct xarray dependencies;
+
+ /** @last_dependency: tracks @dependencies as they signal */
+ unsigned long last_dependency;
};
static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
@@ -220,9 +233,14 @@ enum drm_gpu_sched_stat {
*/
struct drm_sched_backend_ops {
/**
- * @dependency: Called when the scheduler is considering scheduling
- * this job next, to get another struct dma_fence for this job to
- * block on. Once it returns NULL, run_job() may be called.
+ * @dependency:
+ *
+ * Called when the scheduler is considering scheduling this job next, to
+ * get another struct dma_fence for this job to block on. Once it
+ * returns NULL, run_job() may be called.
+ *
+ * If a driver exclusively uses drm_sched_job_await_fence() and
+ * drm_sched_job_await_implicit() this can be ommitted and left as NULL.
*/
struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
struct drm_sched_entity *s_entity);
@@ -349,6 +367,13 @@ int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
void *owner);
void drm_sched_job_arm(struct drm_sched_job *job);
+int drm_sched_job_await_fence(struct drm_sched_job *job,
+ struct dma_fence *fence);
+int drm_sched_job_await_implicit(struct drm_sched_job *job,
+ struct drm_gem_object *obj,
+ bool write);
+
+
void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
struct drm_gpu_scheduler **sched_list,
unsigned int num_sched_list);
--
2.32.0
From: Rob Clark <robdclark(a)chromium.org>
Conversion to gpu_scheduler, and bonus removal of
drm_gem_object_put_locked()
v2: Fix priority mixup (msm UAPI has lower numeric priority value as
higher priority, inverse of drm/scheduler) and add some comments
in the UAPI header to clarify.
Now that we move active refcnt get into msm_gem_submit, add a
patch to mark all bos busy before pinning, to avoid evicting bos
used in same batch.
Fix bo locking for cmdstream dumping ($debugfs/n/{rd,hangrd})
v3: Add a patch to drop submit bo_list and instead use -EALREADY
to detect errors with same obj appearing multiple times in the
submit ioctl bos table. Otherwise, with struct_mutex locking
dropped, we'd need to move insertion into and removal from
bo_list under the obj lock.
v4: One last small tweak, drop unused wait_queue_head_t in
msm_fence_context
Rob Clark (13):
drm/msm: Docs and misc cleanup
drm/msm: Small submitqueue creation cleanup
drm/msm: drop drm_gem_object_put_locked()
drm: Drop drm_gem_object_put_locked()
drm/msm/submit: Simplify out-fence-fd handling
drm/msm: Consolidate submit bo state
drm/msm: Track "seqno" fences by idr
drm/msm: Return ERR_PTR() from submit_create()
drm/msm: Conversion to drm scheduler
drm/msm: Drop submit bo_list
drm/msm: Drop struct_mutex in submit path
drm/msm: Utilize gpu scheduler priorities
drm/msm/gem: Mark active before pinning
drivers/gpu/drm/drm_gem.c | 22 --
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/adreno/a5xx_debugfs.c | 4 +-
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 +-
drivers/gpu/drm/msm/adreno/a5xx_power.c | 2 +-
drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 7 +-
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 12 +-
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +-
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 4 +-
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 +-
drivers/gpu/drm/msm/msm_drv.c | 30 +-
drivers/gpu/drm/msm/msm_fence.c | 42 ---
drivers/gpu/drm/msm/msm_fence.h | 3 -
drivers/gpu/drm/msm/msm_gem.c | 94 +-----
drivers/gpu/drm/msm/msm_gem.h | 47 +--
drivers/gpu/drm/msm/msm_gem_submit.c | 344 ++++++++++++--------
drivers/gpu/drm/msm/msm_gpu.c | 46 +--
drivers/gpu/drm/msm/msm_gpu.h | 78 ++++-
drivers/gpu/drm/msm/msm_rd.c | 6 +-
drivers/gpu/drm/msm/msm_ringbuffer.c | 70 +++-
drivers/gpu/drm/msm/msm_ringbuffer.h | 12 +
drivers/gpu/drm/msm/msm_submitqueue.c | 53 ++-
include/drm/drm_gem.h | 2 -
include/uapi/drm/msm_drm.h | 14 +-
24 files changed, 516 insertions(+), 391 deletions(-)
--
2.31.1
From: Rob Clark <robdclark(a)chromium.org>
Conversion to gpu_scheduler, and bonus removal of
drm_gem_object_put_locked()
v2: Fix priority mixup (msm UAPI has lower numeric priority value as
higher priority, inverse of drm/scheduler) and add some comments
in the UAPI header to clarify.
Now that we move active refcnt get into msm_gem_submit, add a
patch to mark all bos busy before pinning, to avoid evicting bos
used in same batch.
Fix bo locking for cmdstream dumping ($debugfs/n/{rd,hangrd})
v3: Add a patch to drop submit bo_list and instead use -EALREADY
to detect errors with same obj appearing multiple times in the
submit ioctl bos table. Otherwise, with struct_mutex locking
dropped, we'd need to move insertion into and removal from
bo_list under the obj lock.
Rob Clark (13):
drm/msm: Docs and misc cleanup
drm/msm: Small submitqueue creation cleanup
drm/msm: drop drm_gem_object_put_locked()
drm: Drop drm_gem_object_put_locked()
drm/msm/submit: Simplify out-fence-fd handling
drm/msm: Consolidate submit bo state
drm/msm: Track "seqno" fences by idr
drm/msm: Return ERR_PTR() from submit_create()
drm/msm: Conversion to drm scheduler
drm/msm: Drop submit bo_list
drm/msm: Drop struct_mutex in submit path
drm/msm: Utilize gpu scheduler priorities
drm/msm/gem: Mark active before pinning
drivers/gpu/drm/drm_gem.c | 22 --
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/adreno/a5xx_debugfs.c | 4 +-
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 +-
drivers/gpu/drm/msm/adreno/a5xx_power.c | 2 +-
drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 7 +-
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 12 +-
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +-
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 4 +-
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 +-
drivers/gpu/drm/msm/msm_drv.c | 30 +-
drivers/gpu/drm/msm/msm_fence.c | 39 ---
drivers/gpu/drm/msm/msm_fence.h | 2 -
drivers/gpu/drm/msm/msm_gem.c | 94 +-----
drivers/gpu/drm/msm/msm_gem.h | 47 +--
drivers/gpu/drm/msm/msm_gem_submit.c | 344 ++++++++++++--------
drivers/gpu/drm/msm/msm_gpu.c | 46 +--
drivers/gpu/drm/msm/msm_gpu.h | 78 ++++-
drivers/gpu/drm/msm/msm_rd.c | 6 +-
drivers/gpu/drm/msm/msm_ringbuffer.c | 70 +++-
drivers/gpu/drm/msm/msm_ringbuffer.h | 12 +
drivers/gpu/drm/msm/msm_submitqueue.c | 53 ++-
include/drm/drm_gem.h | 2 -
include/uapi/drm/msm_drm.h | 14 +-
24 files changed, 516 insertions(+), 387 deletions(-)
--
2.31.1