Condsider the following call sequence:
/* Upper layer */
dma_fence_begin_signalling();
lock(tainted_shared_lock);
/* Driver callback */
dma_fence_begin_signalling();
...
The driver might here use a utility that is annotated as intended for the
dma-fence signalling critical path. Now if the upper layer isn't correctly
annotated yet for whatever reason, resulting in
/* Upper layer */
lock(tainted_shared_lock);
/* Driver callback */
dma_fence_begin_signalling();
We will receive a false lockdep locking order violation notification from
dma_fence_begin_signalling(). However entering a dma-fence signalling
critical section itself doesn't block and could not cause a deadlock.
So use a successful read_trylock() annotation instead for
dma_fence_begin_signalling(). That will make sure that the locking order
is correctly registered in the first case, and doesn't register any
locking order in the second case.
The alternative is of course to make sure that the "Upper layer" is always
correctly annotated. But experience shows that's not easily achievable
in all cases.
Signed-off-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
---
drivers/dma-buf/dma-fence.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index f177c56269bb..17f632768ef9 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -308,8 +308,8 @@ bool dma_fence_begin_signalling(void)
if (in_atomic())
return true;
- /* ... and non-recursive readlock */
- lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_);
+ /* ... and non-recursive successful read_trylock */
+ lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_);
return false;
}
@@ -340,7 +340,7 @@ void __dma_fence_might_wait(void)
lock_map_acquire(&dma_fence_lockdep_map);
lock_map_release(&dma_fence_lockdep_map);
if (tmp)
- lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_);
+ lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_);
}
#endif
--
2.39.2
From: Rob Clark <robdclark(a)chromium.org>
Container fences have burner contexts, which makes the trick to store at
most one fence per context somewhat useless if we don't unwrap array or
chain fences.
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
---
tbh, I'm not sure why we weren't doing this already, unless there is
something I'm overlooking
drivers/gpu/drm/scheduler/sched_main.c | 43 +++++++++++++++++---------
1 file changed, 28 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index c2ee44d6224b..f59e5335afbb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -41,20 +41,21 @@
* 4. Entities themselves maintain a queue of jobs that will be scheduled on
* the hardware.
*
* The jobs in a entity are always scheduled in the order that they were pushed.
*/
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/completion.h>
+#include <linux/dma-fence-unwrap.h>
#include <linux/dma-resv.h>
#include <uapi/linux/sched/types.h>
#include <drm/drm_print.h>
#include <drm/drm_gem.h>
#include <drm/gpu_scheduler.h>
#include <drm/spsc_queue.h>
#define CREATE_TRACE_POINTS
#include "gpu_scheduler_trace.h"
@@ -665,41 +666,27 @@ void drm_sched_job_arm(struct drm_sched_job *job)
sched = entity->rq->sched;
job->sched = sched;
job->s_priority = entity->rq - sched->sched_rq;
job->id = atomic64_inc_return(&sched->job_id_count);
drm_sched_fence_init(job->s_fence, job->entity);
}
EXPORT_SYMBOL(drm_sched_job_arm);
-/**
- * drm_sched_job_add_dependency - adds the fence as a job dependency
- * @job: scheduler job to add the dependencies to
- * @fence: the dma_fence to add to the list of dependencies.
- *
- * Note that @fence is consumed in both the success and error cases.
- *
- * Returns:
- * 0 on success, or an error on failing to expand the array.
- */
-int drm_sched_job_add_dependency(struct drm_sched_job *job,
- struct dma_fence *fence)
+static int _add_dependency(struct drm_sched_job *job, struct dma_fence *fence)
{
struct dma_fence *entry;
unsigned long index;
u32 id = 0;
int ret;
- if (!fence)
- return 0;
-
/* Deduplicate if we already depend on a fence from the same context.
* This lets the size of the array of deps scale with the number of
* engines involved, rather than the number of BOs.
*/
xa_for_each(&job->dependencies, index, entry) {
if (entry->context != fence->context)
continue;
if (dma_fence_is_later(fence, entry)) {
dma_fence_put(entry);
@@ -709,20 +696,46 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
}
return 0;
}
ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
if (ret != 0)
dma_fence_put(fence);
return ret;
}
+
+/**
+ * drm_sched_job_add_dependency - adds the fence as a job dependency
+ * @job: scheduler job to add the dependencies to
+ * @fence: the dma_fence to add to the list of dependencies.
+ *
+ * Note that @fence is consumed in both the success and error cases.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_add_dependency(struct drm_sched_job *job,
+ struct dma_fence *fence)
+{
+ struct dma_fence_unwrap iter;
+ struct dma_fence *f;
+ int ret = 0;
+
+ dma_fence_unwrap_for_each (f, &iter, fence) {
+ ret = _add_dependency(job, f);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(drm_sched_job_add_dependency);
/**
* drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
* @job: scheduler job to add the dependencies to
* @resv: the dma_resv object to get the fences from
* @usage: the dma_resv_usage to use to filter the fences
*
* This adds all fences matching the given usage from @resv to @job.
* Must be called with the @resv lock held.
--
2.39.2
Hi Jonathan,
Here's the v3 of my patchset that introduces a new interface based on
DMABUF objects to complement the fileio API, and adds write() support to
the existing fileio API.
It changed quite a lot since V2; the IIO subsystem is now just a DMABUF
importer, and all the complexity related to handling creation, deletion
and export of DMABUFs (including DMA mapping etc.) is gone.
This new interface will be used by Libiio. The code is ready[1] and will
be merged to the main branch as soon as the kernel bits are accepted
upstream.
Note that Libiio (and its server counterpart, iiod) use this new
interface in two different ways:
- by memory-mapping the DMABUFs to access the sample data directly,
which is much faster than using the existing fileio API as the sample
data does not need to be copied;
- by passing the DMABUFs around directly to the USB stack, in a
device-to-device zero-copy fashion, using a new DMABUF interface for
the USB (FunctionFS to be exact) stack, which is being upstreamed in
parallel of this patchset [2].
As for write() support, Nuno (Cc'd) said he will work on upstreaming the
DAC counterpart of adc/adi-axi-adc.c in the next few weeks, so there
will be a user for the buffer write() support. I hope you are okay with
this - otherwise, we can just wait until this work is done, and I still
benefit from sending this patchset early to get feedback.
Finally, the dmaengine implementation for this new interface requires a
new dmaengine API function, since dmaengine_prep_slave_sg() will always
transfer the full scatterlist unconditionally, while we want to be able
to transfer an arbitrary amount of bytes from/to the DMABUF. Since
scatterlists seem to be going away soon, the new API function will take
an array of DMA addresses + lengths. I am open to suggestions if anybody
(especially Vinod) have a better design in mind.
Cheers,
-Paul
[1]: https://github.com/analogdevicesinc/libiio/pull/928
[2]: https://lore.kernel.org/linux-usb/425c1b8ea20002c6344a574cd094b4c715c67ba6.…
---
Changelog:
* Patches 01-02 are new;
* Patches [03/11], [05/11] didn't change;
* Patch [04/11]:
- Reorganize arguments to iio_dma_buffer_io()
- Change 'is_write' argument to 'is_from_user'
- Change (__force char *) to (__force __user char *), in
iio_dma_buffer_write(), since we only want to drop the "const".
* Patch [07/11]:
- Get rid of the old IOCTLs. The IIO subsystem does not create or
manage DMABUFs anymore, and only attaches/detaches externally
created DMABUFs.
- Add IIO_BUFFER_DMABUF_CYCLIC to the supported flags.
* Patch [09/11]:
Update code to provide the functions that will be used as callbacks
for the new IOCTLs.
* Patch [10/11]:
Use the new dmaengine_prep_slave_dma_array(), and adapt the code to
work with the new functions introduced in industrialio-buffer-dma.c.
* Patch [11/11]:
Update the documentation to reflect the new API.
---
Alexandru Ardelean (1):
iio: buffer-dma: split iio_dma_buffer_fileio_free() function
Paul Cercueil (10):
dmaengine: Add API function dmaengine_prep_slave_dma_array()
dmaengine: dma-axi-dmac: Implement device_prep_slave_dma_array
iio: buffer-dma: Get rid of outgoing queue
iio: buffer-dma: Enable buffer write support
iio: buffer-dmaengine: Support specifying buffer direction
iio: buffer-dmaengine: Enable write support
iio: core: Add new DMABUF interface infrastructure
iio: buffer-dma: Enable support for DMABUFs
iio: buffer-dmaengine: Support new DMABUF based userspace API
Documentation: iio: Document high-speed DMABUF based API
Documentation/iio/dmabuf_api.rst | 59 +++
Documentation/iio/index.rst | 2 +
drivers/dma/dma-axi-dmac.c | 41 ++
drivers/iio/adc/adi-axi-adc.c | 3 +-
drivers/iio/buffer/industrialio-buffer-dma.c | 331 +++++++++++---
.../buffer/industrialio-buffer-dmaengine.c | 77 +++-
drivers/iio/industrialio-buffer.c | 402 ++++++++++++++++++
include/linux/dmaengine.h | 16 +
include/linux/iio/buffer-dma.h | 40 +-
include/linux/iio/buffer-dmaengine.h | 5 +-
include/linux/iio/buffer_impl.h | 22 +
include/uapi/linux/iio/buffer.h | 22 +
12 files changed, 939 insertions(+), 81 deletions(-)
create mode 100644 Documentation/iio/dmabuf_api.rst
--
2.39.2
The type of size is unsigned int, if size is 0x40000000, there will
be an integer overflow, size will be zero after size *= sizeof(uint32_t),
will cause uninitialized memory to be referenced later.
Signed-off-by: hackyzh002 <hackyzh002(a)gmail.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 08eced097..89bcacc65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -192,7 +192,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
uint64_t *chunk_array_user;
uint64_t *chunk_array;
uint32_t uf_offset = 0;
- unsigned int size;
+ size_t size;
int ret;
int i;
--
2.34.1
The type of size is unsigned, if size is 0x40000000, there will be an
integer overflow, size will be zero after size *= sizeof(uint32_t),
will cause uninitialized memory to be referenced later
Signed-off-by: hackyzh002 <hackyzh002(a)gmail.com>
---
drivers/gpu/drm/radeon/radeon_cs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 46a27ebf4..a6700d727 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -270,7 +270,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
{
struct drm_radeon_cs *cs = data;
uint64_t *chunk_array_ptr;
- unsigned size, i;
+ u64 size;
+ unsigned i;
u32 ring = RADEON_CS_RING_GFX;
s32 priority = 0;
--
2.34.1