Hi guys,
we are currently working an Freesync and direct scan out from system
memory on AMD APUs in A+A laptops.
On problem we stumbled over is that our display hardware needs to scan
out from uncached system memory and we currently don't have a way to
communicate that through DMA-buf.
For our specific use case at hand we are going to implement something
driver specific, but the question is should we have something more
generic for this?
After all the system memory access pattern is a PCIe extension and as
such something generic.
Regards,
Christian.
On Wed, 30 Jun 2021 04:28:39 -0700
Joe Perches <joe(a)perches.com> wrote:
> On Sat, 2021-06-12 at 08:42 -0700, Joe Perches wrote:
> > The __assign_str macro has an unusual ending semicolon but the vast
> > majority of uses of the macro already have semicolon termination.
>
> ping?
>
I wasn't sure I was the one to take this. I can, as I can run tests on
it as well. I have some last minute fixes sent to me on something else,
and I can apply this along with them.
-- Steve
Instead of just a callback we can just glue in the gem helpers that
panfrost, v3d and lima currently use. There's really not that many
ways to skin this cat.
On the naming bikeshed: The idea for using _await_ to denote adding
dependencies to a job comes from i915, where that's used quite
extensively all over the place, in lots of datastructures.
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
Cc: David Airlie <airlied(a)linux.ie>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: Andrey Grodzovsky <andrey.grodzovsky(a)amd.com>
Cc: Lee Jones <lee.jones(a)linaro.org>
Cc: Nirmoy Das <nirmoy.aiemd(a)gmail.com>
Cc: Boris Brezillon <boris.brezillon(a)collabora.com>
Cc: Luben Tuikov <luben.tuikov(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Jack Zhang <Jack.Zhang1(a)amd.com>
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
---
drivers/gpu/drm/scheduler/sched_entity.c | 18 +++-
drivers/gpu/drm/scheduler/sched_main.c | 103 +++++++++++++++++++++++
include/drm/gpu_scheduler.h | 31 ++++++-
3 files changed, 146 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index f7347c284886..b6f72fafd504 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -211,6 +211,19 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
job->sched->ops->free_job(job);
}
+static struct dma_fence *
+drm_sched_job_dependency(struct drm_sched_job *job,
+ struct drm_sched_entity *entity)
+{
+ if (!xa_empty(&job->dependencies))
+ return xa_erase(&job->dependencies, job->last_dependency++);
+
+ if (job->sched->ops->dependency)
+ return job->sched->ops->dependency(job, entity);
+
+ return NULL;
+}
+
/**
* drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed
*
@@ -229,7 +242,7 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
struct drm_sched_fence *s_fence = job->s_fence;
/* Wait for all dependencies to avoid data corruptions */
- while ((f = job->sched->ops->dependency(job, entity)))
+ while ((f = drm_sched_job_dependency(job, entity)))
dma_fence_wait(f, false);
drm_sched_fence_scheduled(s_fence);
@@ -419,7 +432,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
*/
struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
{
- struct drm_gpu_scheduler *sched = entity->rq->sched;
struct drm_sched_job *sched_job;
sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
@@ -427,7 +439,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
return NULL;
while ((entity->dependency =
- sched->ops->dependency(sched_job, entity))) {
+ drm_sched_job_dependency(sched_job, entity))) {
trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
if (drm_sched_entity_add_dependency_cb(entity))
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 70eefed17e06..370c336d383f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -603,6 +603,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
INIT_LIST_HEAD(&job->list);
+ xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
+
return 0;
}
EXPORT_SYMBOL(drm_sched_job_init);
@@ -626,6 +628,98 @@ void drm_sched_job_arm(struct drm_sched_job *job)
}
EXPORT_SYMBOL(drm_sched_job_arm);
+/**
+ * drm_sched_job_await_fence - adds the fence as a job dependency
+ * @job: scheduler job to add the dependencies to
+ * @fence: the dma_fence to add to the list of dependencies.
+ *
+ * Note that @fence is consumed in both the success and error cases.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_await_fence(struct drm_sched_job *job,
+ struct dma_fence *fence)
+{
+ struct dma_fence *entry;
+ unsigned long index;
+ u32 id = 0;
+ int ret;
+
+ if (!fence)
+ return 0;
+
+ /* Deduplicate if we already depend on a fence from the same context.
+ * This lets the size of the array of deps scale with the number of
+ * engines involved, rather than the number of BOs.
+ */
+ xa_for_each(&job->dependencies, index, entry) {
+ if (entry->context != fence->context)
+ continue;
+
+ if (dma_fence_is_later(fence, entry)) {
+ dma_fence_put(entry);
+ xa_store(&job->dependencies, index, fence, GFP_KERNEL);
+ } else {
+ dma_fence_put(fence);
+ }
+ return 0;
+ }
+
+ ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
+ if (ret != 0)
+ dma_fence_put(fence);
+
+ return ret;
+}
+EXPORT_SYMBOL(drm_sched_job_await_fence);
+
+/**
+ * drm_sched_job_await_implicit - adds implicit dependencies as job dependencies
+ * @job: scheduler job to add the dependencies to
+ * @obj: the gem object to add new dependencies from.
+ * @write: whether the job might write the object (so we need to depend on
+ * shared fences in the reservation object).
+ *
+ * This should be called after drm_gem_lock_reservations() on your array of
+ * GEM objects used in the job but before updating the reservations with your
+ * own fences.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_await_implicit(struct drm_sched_job *job,
+ struct drm_gem_object *obj,
+ bool write)
+{
+ int ret;
+ struct dma_fence **fences;
+ unsigned int i, fence_count;
+
+ if (!write) {
+ struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv);
+
+ return drm_sched_job_await_fence(job, fence);
+ }
+
+ ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences);
+ if (ret || !fence_count)
+ return ret;
+
+ for (i = 0; i < fence_count; i++) {
+ ret = drm_sched_job_await_fence(job, fences[i]);
+ if (ret)
+ break;
+ }
+
+ for (; i < fence_count; i++)
+ dma_fence_put(fences[i]);
+ kfree(fences);
+ return ret;
+}
+EXPORT_SYMBOL(drm_sched_job_await_implicit);
+
+
/**
* drm_sched_job_cleanup - clean up scheduler job resources
*
@@ -633,8 +727,17 @@ EXPORT_SYMBOL(drm_sched_job_arm);
*/
void drm_sched_job_cleanup(struct drm_sched_job *job)
{
+ struct dma_fence *fence;
+ unsigned long index;
+
dma_fence_put(&job->s_fence->finished);
job->s_fence = NULL;
+
+ xa_for_each(&job->dependencies, index, fence) {
+ dma_fence_put(fence);
+ }
+ xa_destroy(&job->dependencies);
+
}
EXPORT_SYMBOL(drm_sched_job_cleanup);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 80438d126c9d..e4d7e1496296 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -27,9 +27,12 @@
#include <drm/spsc_queue.h>
#include <linux/dma-fence.h>
#include <linux/completion.h>
+#include <linux/xarray.h>
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
+struct drm_gem_object;
+
struct drm_gpu_scheduler;
struct drm_sched_rq;
@@ -198,6 +201,16 @@ struct drm_sched_job {
enum drm_sched_priority s_priority;
struct drm_sched_entity *entity;
struct dma_fence_cb cb;
+ /**
+ * @dependencies:
+ *
+ * Contains the dependencies as struct dma_fence for this job, see
+ * drm_sched_job_await_fence() and drm_sched_job_await_implicit().
+ */
+ struct xarray dependencies;
+
+ /** @last_dependency: tracks @dependencies as they signal */
+ unsigned long last_dependency;
};
static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
@@ -220,9 +233,14 @@ enum drm_gpu_sched_stat {
*/
struct drm_sched_backend_ops {
/**
- * @dependency: Called when the scheduler is considering scheduling
- * this job next, to get another struct dma_fence for this job to
- * block on. Once it returns NULL, run_job() may be called.
+ * @dependency:
+ *
+ * Called when the scheduler is considering scheduling this job next, to
+ * get another struct dma_fence for this job to block on. Once it
+ * returns NULL, run_job() may be called.
+ *
+ * If a driver exclusively uses drm_sched_job_await_fence() and
+ * drm_sched_job_await_implicit() this can be ommitted and left as NULL.
*/
struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
struct drm_sched_entity *s_entity);
@@ -314,6 +332,13 @@ int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
void *owner);
void drm_sched_job_arm(struct drm_sched_job *job);
+int drm_sched_job_await_fence(struct drm_sched_job *job,
+ struct dma_fence *fence);
+int drm_sched_job_await_implicit(struct drm_sched_job *job,
+ struct drm_gem_object *obj,
+ bool write);
+
+
void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
struct drm_gpu_scheduler **sched_list,
unsigned int num_sched_list);
--
2.32.0.rc2
Integrated into the scheduler now and all users converted over.
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: David Airlie <airlied(a)linux.ie>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
---
drivers/gpu/drm/drm_gem.c | 96 ---------------------------------------
include/drm/drm_gem.h | 5 --
2 files changed, 101 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 68deb1de8235..24d49a2636e0 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1294,99 +1294,3 @@ drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
ww_acquire_fini(acquire_ctx);
}
EXPORT_SYMBOL(drm_gem_unlock_reservations);
-
-/**
- * drm_gem_fence_array_add - Adds the fence to an array of fences to be
- * waited on, deduplicating fences from the same context.
- *
- * @fence_array: array of dma_fence * for the job to block on.
- * @fence: the dma_fence to add to the list of dependencies.
- *
- * This functions consumes the reference for @fence both on success and error
- * cases.
- *
- * Returns:
- * 0 on success, or an error on failing to expand the array.
- */
-int drm_gem_fence_array_add(struct xarray *fence_array,
- struct dma_fence *fence)
-{
- struct dma_fence *entry;
- unsigned long index;
- u32 id = 0;
- int ret;
-
- if (!fence)
- return 0;
-
- /* Deduplicate if we already depend on a fence from the same context.
- * This lets the size of the array of deps scale with the number of
- * engines involved, rather than the number of BOs.
- */
- xa_for_each(fence_array, index, entry) {
- if (entry->context != fence->context)
- continue;
-
- if (dma_fence_is_later(fence, entry)) {
- dma_fence_put(entry);
- xa_store(fence_array, index, fence, GFP_KERNEL);
- } else {
- dma_fence_put(fence);
- }
- return 0;
- }
-
- ret = xa_alloc(fence_array, &id, fence, xa_limit_32b, GFP_KERNEL);
- if (ret != 0)
- dma_fence_put(fence);
-
- return ret;
-}
-EXPORT_SYMBOL(drm_gem_fence_array_add);
-
-/**
- * drm_gem_fence_array_add_implicit - Adds the implicit dependencies tracked
- * in the GEM object's reservation object to an array of dma_fences for use in
- * scheduling a rendering job.
- *
- * This should be called after drm_gem_lock_reservations() on your array of
- * GEM objects used in the job but before updating the reservations with your
- * own fences.
- *
- * @fence_array: array of dma_fence * for the job to block on.
- * @obj: the gem object to add new dependencies from.
- * @write: whether the job might write the object (so we need to depend on
- * shared fences in the reservation object).
- */
-int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
- struct drm_gem_object *obj,
- bool write)
-{
- int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
-
- if (!write) {
- struct dma_fence *fence =
- dma_resv_get_excl_unlocked(obj->resv);
-
- return drm_gem_fence_array_add(fence_array, fence);
- }
-
- ret = dma_resv_get_fences(obj->resv, NULL,
- &fence_count, &fences);
- if (ret || !fence_count)
- return ret;
-
- for (i = 0; i < fence_count; i++) {
- ret = drm_gem_fence_array_add(fence_array, fences[i]);
- if (ret)
- break;
- }
-
- for (; i < fence_count; i++)
- dma_fence_put(fences[i]);
- kfree(fences);
- return ret;
-}
-EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 240049566592..6d5e33b89074 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -409,11 +409,6 @@ int drm_gem_lock_reservations(struct drm_gem_object **objs, int count,
struct ww_acquire_ctx *acquire_ctx);
void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
struct ww_acquire_ctx *acquire_ctx);
-int drm_gem_fence_array_add(struct xarray *fence_array,
- struct dma_fence *fence);
-int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
- struct drm_gem_object *obj,
- bool write);
int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
u32 handle, u64 *offset);
--
2.32.0.rc2
On Wed, Jun 23, 2021 at 10:00:29PM +0300, Oded Gabbay wrote:
> I understand the argument and I agree that for the generic case, the
> top of the stack can't assume anything.
> Having said that, in this case the SGL is encapsulated inside a dma-buf object.
But the scatterlist is defined to have a valid page. If in dma-bufs you
can't do that dmabufs are completely broken. Apparently the gpu folks
can somehow live with that and deal with the pitfals, but for dma-buf
users outside of their little fiefdom were they arbitrarily break rules
it simply is not acceptable.