From: Rob Clark <robdclark(a)chromium.org>
Container fences have burner contexts, which makes the trick to store at
most one fence per context somewhat useless if we don't unwrap array or
chain fences.
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
---
tbh, I'm not sure why we weren't doing this already, unless there is
something I'm overlooking
drivers/gpu/drm/scheduler/sched_main.c | 43 +++++++++++++++++---------
1 file changed, 28 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index c2ee44d6224b..f59e5335afbb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -41,20 +41,21 @@
* 4. Entities themselves maintain a queue of jobs that will be scheduled on
* the hardware.
*
* The jobs in a entity are always scheduled in the order that they were pushed.
*/
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/completion.h>
+#include <linux/dma-fence-unwrap.h>
#include <linux/dma-resv.h>
#include <uapi/linux/sched/types.h>
#include <drm/drm_print.h>
#include <drm/drm_gem.h>
#include <drm/gpu_scheduler.h>
#include <drm/spsc_queue.h>
#define CREATE_TRACE_POINTS
#include "gpu_scheduler_trace.h"
@@ -665,41 +666,27 @@ void drm_sched_job_arm(struct drm_sched_job *job)
sched = entity->rq->sched;
job->sched = sched;
job->s_priority = entity->rq - sched->sched_rq;
job->id = atomic64_inc_return(&sched->job_id_count);
drm_sched_fence_init(job->s_fence, job->entity);
}
EXPORT_SYMBOL(drm_sched_job_arm);
-/**
- * drm_sched_job_add_dependency - adds the fence as a job dependency
- * @job: scheduler job to add the dependencies to
- * @fence: the dma_fence to add to the list of dependencies.
- *
- * Note that @fence is consumed in both the success and error cases.
- *
- * Returns:
- * 0 on success, or an error on failing to expand the array.
- */
-int drm_sched_job_add_dependency(struct drm_sched_job *job,
- struct dma_fence *fence)
+static int _add_dependency(struct drm_sched_job *job, struct dma_fence *fence)
{
struct dma_fence *entry;
unsigned long index;
u32 id = 0;
int ret;
- if (!fence)
- return 0;
-
/* Deduplicate if we already depend on a fence from the same context.
* This lets the size of the array of deps scale with the number of
* engines involved, rather than the number of BOs.
*/
xa_for_each(&job->dependencies, index, entry) {
if (entry->context != fence->context)
continue;
if (dma_fence_is_later(fence, entry)) {
dma_fence_put(entry);
@@ -709,20 +696,46 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
}
return 0;
}
ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
if (ret != 0)
dma_fence_put(fence);
return ret;
}
+
+/**
+ * drm_sched_job_add_dependency - adds the fence as a job dependency
+ * @job: scheduler job to add the dependencies to
+ * @fence: the dma_fence to add to the list of dependencies.
+ *
+ * Note that @fence is consumed in both the success and error cases.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_add_dependency(struct drm_sched_job *job,
+ struct dma_fence *fence)
+{
+ struct dma_fence_unwrap iter;
+ struct dma_fence *f;
+ int ret = 0;
+
+ dma_fence_unwrap_for_each (f, &iter, fence) {
+ ret = _add_dependency(job, f);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(drm_sched_job_add_dependency);
/**
* drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
* @job: scheduler job to add the dependencies to
* @resv: the dma_resv object to get the fences from
* @usage: the dma_resv_usage to use to filter the fences
*
* This adds all fences matching the given usage from @resv to @job.
* Must be called with the @resv lock held.
--
2.39.2
Hello,
Let me introduce the new ioctls, which are intended to allow gntdev to
map scatter-gather table on top of the existing dmabuf, referenced by
file descriptor.
When using dma-buf exporter to create dma-buf with backing storage and
map it to the grant refs, provided from the domain, we've met a problem,
that several HW (i.MX8 gpu in our case) do not support external buffer
and requires backing storage to be created using it's native tools.
That's why new ioctls were added to be able to pass existing dma-buffer
fd as input parameter and use it as backing storage to export to refs.
Following calls were added:
IOCTL_GNTDEV_DMABUF_MAP_REFS_TO_BUF - map existing buffer as the backing
storage and export it to the provided grant refs;
IOCTL_GNTDEV_DMABUF_MAP_RELEASE - detach buffer from the grant table and
set notification to unmap grant refs before releasing the external
buffer. After this call the external buffer should be detroyed.
IOCTL_GNTDEV_DMABUF_MAP_WAIT_RELEASED - wait for timeout until buffer is
completely destroyed and gnt refs unmapped so domain could free grant
pages. Should be called after buffer was destoyed.
Our setup is based on IMX8QM board. We're trying to implement zero-copy
support for DomU graphics using Wayland zwp_linux_dmabuf_v1_interface
implementation.
For dma-buf exporter we used i.MX8 gpu native tools to create backing
storage grant-refs, received from DomU. Buffer for the backing storage was
allocated using gbm_bo_create call because gpu do not support external
buffer and requires backing storage to be created using it's native tools
(eglCreateImageKHR returns EGL_NO_IMAGE_KHR for buffers, which were not
created using gbm_bo_create).
This behaviour was also tested on Qemu setup using
DRM_IOCTL_MODE_CREATE_DUMB call to create backing storage buffer.
---
Oleksii Moisieiev (3):
xen/grant-table: save page_count on map and use if during async
unmapping
dma-buf: add dma buffer release notifier callback
xen/grant-table: add new ioctls to map dmabuf to existing fd
drivers/dma-buf/dma-buf.c | 44 ++++
drivers/xen/gntdev-common.h | 8 +-
drivers/xen/gntdev-dmabuf.c | 416 +++++++++++++++++++++++++++++++++++-
drivers/xen/gntdev-dmabuf.h | 7 +
drivers/xen/gntdev.c | 101 ++++++++-
drivers/xen/grant-table.c | 73 +++++--
include/linux/dma-buf.h | 15 ++
include/uapi/xen/gntdev.h | 62 ++++++
include/xen/grant_table.h | 8 +
9 files changed, 703 insertions(+), 31 deletions(-)
--
2.25.1
Hi everyone!
This is my first take on the Rust abstractions for the DRM
subsystem. It includes the abstractions themselves, some minor
prerequisite changes to the C side, as well as the drm-asahi GPU driver
(for reference on how the abstractions are used, but not necessarily
intended to land together).
These patches apply on top of the tree at [1], which is based on
6.3-rc1 with a large number of Rust abstraction/support commits added on
top. Most of these are not prerequisites for the DRM abstractions
themselves, but rather only of the driver.
* #1-12 introduce the abstractions, module by module, with minor C
changes before the dependent abstraction.
* Patch 10 is a little addition to drm_sched that I ended up needing,
but I can pull it out of the abstraction into its own patch if
needed.
* #13-14 add a minor feature to drm/gem and its abstraction used
by the driver.
* #15-16 introduce the (unstable) asahi UAPI. This is obviously not
ready for merge yet, but comments are welcome!
* #17 adds a Rust helper macro to handle GPU core/firmware differences.
This probably belongs in the driver at this point, but right now it
has to live in rust/macros since there is no mechanism for per-driver
proc macros.
* #18 adds the driver proper, in one big commit, for reference purposes.
I've been working since mid last year on an Apple AGX GPU driver for
Linux, using the (at the time) out-of-tree Rust support. As part of this
effort, I've been writing safe Rust abstractions for portions of the DRM
subsystem.
Now that Rust itself is upstream, I'd like to get all the abstractions
upstreamed so we can eventually get the driver upstreamed!
These abstractions have been used by the driver since our release in
December [2], in a simpler synchronous-submission form:
* drm::ioctl
* drm::device
* drm::drv
* drm::file
* drm::{gem, gem::shmem}
* drm::mm
This series adds these too, which are used by the explicit sync refactor
of the driver (the version in this series):
* drm::syncobj
* drm::sched
* dma_fence
The major dependencies for the DRM abstractions themselves are:
* [3] rust: error: Add missing wrappers to convert to/from kernel error codes
* [4] rust: Miscellaneous macro improvements
* [5] rust: Add a Sealed trait
* [6] rust: device: Add a minimal RawDevice trait
* [7] rust: Enable the new_uninit feature for kernel and driver crates
* [8] rust: ioctl: Add ioctl number manipulation functions
* [9] rust: sync: Arc: Any downcasting and assume_init()
* rust: Add `container_of` and `offset_of` macros
* kernel::sync::mutex and dependencies
Most of these (the ones with links) have already been submitted, and I
expect all of them to land for 6.4 (the mutex one will likely be last,
since there is some refactoring that will happen over the current state
to make it more ergonomic to use). The mutex dep is only necessary for
drm::mm and dma_fence, and transitively drm::syncobj and drm::sched.
Things work! We've had most of the abstractions in production edge
kernels with the driver, and the new explicit sync stuff has passed
quite a few torture tests (this is how we found the drm_sched issue,
patch 11).
The abstractions are intended to be safe (safety review very welcome!).
While writing them, I tried to avoid making any changes to the C side
unless absolutely necessary. I understand that it will probably make
sense to adjust the C side to make some things easier, but I wanted to
start from this as a baseline.
Known issues:
- The existing Rust integration does not currently allow building
abstractions as modules, so the Rust abstractions are only available
for DRM components that are built in. I added some extra Kconfig
symbols to deal with this, so a driver built as a module can depende
on having those built in. This should go away in the future (but may
not be ready in time for submission... I understand this probably
shouldn't be a blocker though?).
- DRM relies heavily on the "subclassing" pattern for driver objects,
and this doesn't map well to Rust. I tried several approaches for
various bits, so we can see how they work out. In particular, whether
wrapper types should pretend to be smart pointers and Deref to their
inner driver-specific types, and whether they should be marked as
method receivers (Yuck, internal rustc implementation hacks! But
Arc<T> already does the same thing and it makes usage in
driver-implemented callbacks as `self` possible) are things I'd love
to discuss ^^.
- Only what I need for my driver is implemented (plus a small amount of
obvious extras where better API completeness makes sense). I think the
general idea with Rust abstractions is that we add things as they
become necessary.
- The plain GEM vs. GEM-shmem duality ended up with quite a hairy type
hierarchy. I'd love to figure out how to make this simpler...
- drm::mm ends up requiring a built-in mutex in the abstraction, instead
of delegating that to the user with the usual Rust mutability rules.
This is because nodes can be dropped at any time, and those operations
need to be synchronized. We could try to avoid forbidding those drops
or mark the node type !Send, but that would make it a lot less
ergonomic to use...
I'm looking for feedback on the abstractions of all kinds, so we can
move towards an upstreamable version. Optimistically, I'd love to get
this upstream for 6.5, and the driver for 6.6.
Please feel free to ask any questions about the Rust bits, since I know
a lot of this is new to many of the C folks!
This is a fairly complete driver for Apple AGX G13 and G14 series GPUs.
The driver today supports the Apple M1, M1 Pro, M1 Max, M1 Ultra, and M2
SoCs, across two firmware revisions each. It has an explicit sync UAPI
heavily inspired by the upcoming Intel Xe UAPI, designed with Vulkan
support in mind. On the Mesa side we currently have a Gallium driver
that is mostly already upstream (missing the UAPI bits mostly) and
passes the dEQP GLES2/EGL tests, with most of GLES3.0 passing in
downstream work-in-progress branches. This is a reverse engineered
community driver (we have no hardware documentation of any kind, other
than some hints from aspects shared with PowerVR).
While developing the driver, I tried to make use of Rust's safety and
lifetime features to provide not just CPU-side safety, but also
partial firmware-ABI safety. Thanks to this, it has turned out to be
a very stable driver even though GPU firmware crashes are fatal (no
restart capability, need to reboot!) and the FW/driver interface is a
huge mess of unsafe shared memory structures with complex pointer
chains. There are over 70 ABI types and 3000+ lines of firmware ABI type
definitions that vary between firmware builds and GPU cores...
In a simpler blocking-submission form, it has been shipping in Asahi
Linux edge kernels since December [2], with lots of users and zero (!)
reported oopses (and only a couple reports of GPU firmware crashes,
though that issue should now be fixed). It has survived OOM scenarios
(Rust makes error cleanup easy!), UAPI-level fuzzing, countless broken
Mesa builds, uptimes of 40+ days, and more.
The explicit sync refactor significantly increases performance (and
potential problems), but this version has survived a lot of torture
with dEQP/piglit tests and some manual corner case testing.
In other words, Rust works! ^^
There are some design notes on the driver and further links at [10].
[1] https://github.com/AsahiLinux/linux.git drm-rfc-base-20230307
[2] https://asahilinux.org/2022/12/gpu-drivers-now-in-asahi-linux/
[3] https://lore.kernel.org/rust-for-linux/20230224-rust-error-v1-0-f8f9a9a8730…
[4] https://lore.kernel.org/rust-for-linux/20230224-rust-macros-v1-0-b39fae46e1…
[5] https://lore.kernel.org/rust-for-linux/20230224-rust-iopt-rtkit-v1-0-49ced3…
[6] https://lore.kernel.org/rust-for-linux/20230224-rust-iopt-rtkit-v1-0-49ced3…
[7] https://lore.kernel.org/rust-for-linux/CQV7ZNT6LMXI.1XG4YXSH8I7JK@vincent-a…
[8] https://lore.kernel.org/rust-for-linux/61f734d6-1497-755f-3632-3f261b890846…
[9] https://lore.kernel.org/rust-for-linux/20230224-rust-arc-v1-0-568eea613a41@…
[10] https://github.com/AsahiLinux/docs/wiki/SW:AGX-driver-notes
Signed-off-by: Asahi Lina <lina(a)asahilina.net>
---
Asahi Lina (18):
rust: drm: ioctl: Add DRM ioctl abstraction
rust: drm: Add Device and Driver abstractions
rust: drm: file: Add File abstraction
rust: drm: gem: Add GEM object abstraction
drm/gem-shmem: Export VM ops functions
rust: drm: gem: shmem: Add DRM shmem helper abstraction
rust: drm: mm: Add DRM MM Range Allocator abstraction
rust: dma_fence: Add DMA Fence abstraction
rust: drm: syncobj: Add DRM Sync Object abstraction
drm/scheduler: Add can_run_job callback
drm/scheduler: Clean up jobs when the scheduler is torn down
rust: drm: sched: Add GPU scheduler abstraction
drm/gem: Add a flag to control whether objects can be exported
rust: drm: gem: Add set_exportable() method
drm/asahi: Add the Asahi driver UAPI [DO NOT MERGE]
rust: bindings: Bind the Asahi DRM UAPI
rust: macros: Add versions macro
drm/asahi: Add the Asahi driver for Apple AGX GPUs
drivers/gpu/drm/Kconfig | 19 +
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/asahi/Kconfig | 35 +
drivers/gpu/drm/asahi/Makefile | 3 +
drivers/gpu/drm/asahi/alloc.rs | 1046 ++++++++++++++++++++++++++
drivers/gpu/drm/asahi/asahi.rs | 53 ++
drivers/gpu/drm/asahi/buffer.rs | 694 ++++++++++++++++++
drivers/gpu/drm/asahi/channel.rs | 542 ++++++++++++++
drivers/gpu/drm/asahi/debug.rs | 129 ++++
drivers/gpu/drm/asahi/driver.rs | 166 +++++
drivers/gpu/drm/asahi/event.rs | 229 ++++++
drivers/gpu/drm/asahi/file.rs | 718 ++++++++++++++++++
drivers/gpu/drm/asahi/float.rs | 381 ++++++++++
drivers/gpu/drm/asahi/fw/buffer.rs | 170 +++++
drivers/gpu/drm/asahi/fw/channels.rs | 385 ++++++++++
drivers/gpu/drm/asahi/fw/compute.rs | 107 +++
drivers/gpu/drm/asahi/fw/event.rs | 100 +++
drivers/gpu/drm/asahi/fw/fragment.rs | 276 +++++++
drivers/gpu/drm/asahi/fw/initdata.rs | 1264 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/asahi/fw/job.rs | 56 ++
drivers/gpu/drm/asahi/fw/microseq.rs | 384 ++++++++++
drivers/gpu/drm/asahi/fw/mod.rs | 15 +
drivers/gpu/drm/asahi/fw/types.rs | 233 ++++++
drivers/gpu/drm/asahi/fw/vertex.rs | 177 +++++
drivers/gpu/drm/asahi/fw/workqueue.rs | 168 +++++
drivers/gpu/drm/asahi/gem.rs | 301 ++++++++
drivers/gpu/drm/asahi/gpu.rs | 1088 +++++++++++++++++++++++++++
drivers/gpu/drm/asahi/hw/mod.rs | 522 +++++++++++++
drivers/gpu/drm/asahi/hw/t600x.rs | 140 ++++
drivers/gpu/drm/asahi/hw/t8103.rs | 80 ++
drivers/gpu/drm/asahi/hw/t8112.rs | 82 +++
drivers/gpu/drm/asahi/initdata.rs | 777 ++++++++++++++++++++
drivers/gpu/drm/asahi/mem.rs | 133 ++++
drivers/gpu/drm/asahi/microseq.rs | 61 ++
drivers/gpu/drm/asahi/mmu.rs | 1249 +++++++++++++++++++++++++++++++
drivers/gpu/drm/asahi/object.rs | 704 ++++++++++++++++++
drivers/gpu/drm/asahi/place.rs | 343 +++++++++
drivers/gpu/drm/asahi/queue/common.rs | 52 ++
drivers/gpu/drm/asahi/queue/compute.rs | 371 ++++++++++
drivers/gpu/drm/asahi/queue/mod.rs | 725 ++++++++++++++++++
drivers/gpu/drm/asahi/queue/render.rs | 1173 +++++++++++++++++++++++++++++
drivers/gpu/drm/asahi/regs.rs | 387 ++++++++++
drivers/gpu/drm/asahi/slotalloc.rs | 292 ++++++++
drivers/gpu/drm/asahi/util.rs | 44 ++
drivers/gpu/drm/asahi/workqueue.rs | 880 ++++++++++++++++++++++
drivers/gpu/drm/drm_gem.c | 1 +
drivers/gpu/drm/drm_gem_shmem_helper.c | 9 +-
drivers/gpu/drm/drm_prime.c | 5 +
drivers/gpu/drm/scheduler/sched_main.c | 37 +-
include/drm/drm_gem.h | 8 +
include/drm/drm_gem_shmem_helper.h | 3 +
include/drm/gpu_scheduler.h | 8 +
include/uapi/drm/asahi_drm.h | 556 ++++++++++++++
rust/bindings/bindings_helper.h | 14 +
rust/helpers.c | 168 +++++
rust/kernel/dma_fence.rs | 532 ++++++++++++++
rust/kernel/drm/device.rs | 76 ++
rust/kernel/drm/drv.rs | 342 +++++++++
rust/kernel/drm/file.rs | 113 +++
rust/kernel/drm/gem/mod.rs | 384 ++++++++++
rust/kernel/drm/gem/shmem.rs | 381 ++++++++++
rust/kernel/drm/ioctl.rs | 147 ++++
rust/kernel/drm/mm.rs | 309 ++++++++
rust/kernel/drm/mod.rs | 13 +
rust/kernel/drm/sched.rs | 358 +++++++++
rust/kernel/drm/syncobj.rs | 77 ++
rust/kernel/lib.rs | 4 +
rust/macros/lib.rs | 7 +
rust/macros/versions.rs | 267 +++++++
69 files changed, 20569 insertions(+), 5 deletions(-)
---
base-commit: c9eb15274c9861026682a6b3e645891fccf88e07
change-id: 20230307-rust-drm-b5af3c2a9e55
Thank you,
~~ Lina
From: Rob Clark <robdclark(a)chromium.org>
Inspired by https://lore.kernel.org/dri-devel/20200604081224.863494-10-daniel.vetter@ff…
it seemed like a good idea to get rid of memory allocation in job_run()
fence signaling path, and use lockdep annotations to yell at us about
anything that could deadlock against shrinker/reclaim. Anything that
can trigger reclaim, or block on any other thread that has triggered
reclaim, can block the GPU shrinker from releasing memory if it is
waiting the job to complete, causing deadlock.
The first patch pre-allocates the hw_fence, splitting allocation and
initialization, to avoid allocation in the job_run() path. The next
eight decouple the obj lock from job_run(), as the obj lock is required
to pin/unpin backing pages (ie. holding an obj lock in job_run() could
deadlock the shrinker by blocking forward progress towards pinned buffers
becoming idle). Followed by two so that we could idr_preload() in order
to avoid memory allocations under locks indirectly connected to the
shrinker path.
Next are three paths to decouple initialization (where allocations are
needed) from GPU runpm and devfreq, to avoid allocations in the fence
signaling path. Followed by various PM devfreq/qos and interconnect
locking fixes to decouple initialization (allocation) from runtime.
And finally, the last patch is a modified version of danvet's patch to
add lockdep annotations to gpu scheduler, but does so conditionally so
that drivers can opt-in.
v2: Switch from embedding hw_fence in submit/job object to preallocating
the hw_fence. Rework "fenced unpin" locking to drop obj lock from
fence signaling path (ie. the part that was still WIP in the first
iteration of the patchset). Adds the final patch to enable fence
signaling annotations now that job_run() and job_free() are safe.
The PM devfreq/QoS and interconnect patches are unchanged.
Rob Clark (23):
drm/msm: Pre-allocate hw_fence
drm/msm: Move submit bo flags update from obj lock
drm/msm/gem: Tidy up VMA API
drm/msm: Decouple vma tracking from obj lock
drm/msm/gem: Simplify vmap vs LRU tracking
drm/gem: Export drm_gem_lru_move_tail_locked()
drm/msm/gem: Move update_lru()
drm/msm/gem: Protect pin_count/madv by LRU lock
drm/msm/gem: Avoid obj lock in job_run()
drm/msm: Switch idr_lock to spinlock
drm/msm: Use idr_preload()
drm/msm/gpu: Move fw loading out of hw_init() path
drm/msm/gpu: Move BO allocation out of hw_init
drm/msm/a6xx: Move ioremap out of hw_init path
PM / devfreq: Drop unneed locking to appease lockdep
PM / devfreq: Teach lockdep about locking order
PM / QoS: Fix constraints alloc vs reclaim locking
PM / QoS: Decouple request alloc from dev_pm_qos_mtx
PM / QoS: Teach lockdep about dev_pm_qos_mtx locking order
soc: qcom: smd-rpm: Use GFP_ATOMIC in write path
interconnect: Fix locking for runpm vs reclaim
interconnect: Teach lockdep about icc_bw_lock order
drm/sched: Add (optional) fence signaling annotation
drivers/base/power/qos.c | 83 +++++++++---
drivers/devfreq/devfreq.c | 52 ++++----
drivers/gpu/drm/drm_gem.c | 11 +-
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 48 ++++---
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 18 ++-
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 46 ++++---
drivers/gpu/drm/msm/adreno/adreno_device.c | 6 +
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 9 +-
drivers/gpu/drm/msm/msm_drv.c | 6 +-
drivers/gpu/drm/msm/msm_fence.c | 12 +-
drivers/gpu/drm/msm/msm_fence.h | 3 +-
drivers/gpu/drm/msm/msm_gem.c | 145 ++++++++++++++-------
drivers/gpu/drm/msm/msm_gem.h | 29 +++--
drivers/gpu/drm/msm/msm_gem_submit.c | 27 ++--
drivers/gpu/drm/msm/msm_gem_vma.c | 91 ++++++++++---
drivers/gpu/drm/msm/msm_gpu.h | 8 +-
drivers/gpu/drm/msm/msm_ringbuffer.c | 9 +-
drivers/gpu/drm/msm/msm_submitqueue.c | 2 +-
drivers/gpu/drm/scheduler/sched_main.c | 9 ++
drivers/interconnect/core.c | 18 ++-
drivers/soc/qcom/smd-rpm.c | 2 +-
include/drm/drm_gem.h | 1 +
include/drm/gpu_scheduler.h | 2 +
23 files changed, 416 insertions(+), 221 deletions(-)
--
2.39.2
Hi,
This small patchset adds three new IOCTLs that can be used to attach,
detach, or transfer from/to a DMABUF object.
Changes since v1:
- patch [2/3] is new. I had to reuse a piece of code that was already
duplicated in the driver, so I factorized the code.
- Make ffs_dma_resv_lock() static
- Add MODULE_IMPORT_NS(DMA_BUF);
- The attach/detach functions are now performed without locking the
eps_lock spinlock. The transfer function starts with the spinlock
unlocked, then locks it before allocating and queueing the USB
transfer.
Cheers,
-Paul
Paul Cercueil (3):
usb: gadget: Support already-mapped DMA SGs
usb: gadget: functionfs: Factorize wait-for-endpoint code
usb: gadget: functionfs: Add DMABUF import interface
drivers/usb/gadget/function/f_fs.c | 467 ++++++++++++++++++++++++++--
drivers/usb/gadget/udc/core.c | 7 +-
include/linux/usb/gadget.h | 2 +
include/uapi/linux/usb/functionfs.h | 14 +-
4 files changed, 468 insertions(+), 22 deletions(-)
--
2.39.2
From: Sui Jingfeng <suijingfeng(a)loongson.cn>
Loongson display controller IP has been integrated in both Loongson
North Bridge chipset(ls7a1000 and ls7a2000) and Loongson SoCs(ls2k1000
and ls2k2000 etc), it even has been included in Loongson BMC products.
This display controller is a PCI device, it has two display pipe. For
the DC in LS7A1000 and LS2K1000 each way has a DVO output interface
which provide RGB888 signals, vertical & horizontal synchronisations,
and the pixel clock. Each CRTC is able to support 1920x1080@60Hz,
the maximum resolution is 2048x2048 according to the hardware spec.
For the DC in LS7A2000, each display pipe is equipped with a built-in
HDMI encoder which is compliant with HDMI 1.4 specification, thus it
support 3840x2160@30Hz. The first display pipe is also equipped with
a transparent vga encoder which is parallel with the HDMI encoder.
The DC in LS7A2000 is more complete, besides above feature, it has
two hardware cursors, two hardware vblank counter and two scanout
position recorders.
v1 -> v2:
1) Use hpd status reg when polling for ls7a2000
2) Fix all warnings emerged when compile with W=1
v2 -> v3:
1) Add COMPILE_TEST in Kconfig and make the driver off by default
2) Alphabetical sorting headers (Thomas)
3) Untangle register access functions as much as possible (Thomas)
4) Switch to TTM based memory manager and prefer cached mapping
for Loongson SoC (Thomas)
5) Add chip id detection method, now all models are distinguishable.
6) Revise builtin HDMI phy driver, nearly all main stream mode
below 4K@30Hz is tested, this driver supported these mode very
well including clone display mode and extend display mode.
v3 -> v4:
1) Quickly fix a small mistake.
v4 -> v5:
1) Drop potential support for Loongson 2K series SoC temporary,
this part should be resend with the DT binding patch in the future.
2) Add per display pipe debugfs support to the builtin HDMI encoder.
3) Rewrite atomic_update() for hardware cursors plane(Thomas)
4) Rewrite encoder and connector initialization part, untangle it
according to the chip(Thomas).
v5 -> v6:
1) Remove stray code which didn't get used, say lsdc_of_get_reserved_ram
2) Fix all typos I could found, make sentences and code more readable
3) Untangle lsdc_hdmi*_connector_detect() function according to the pipe
4) After a serious consideration, we rename this driver as loongson.
Because we also have drivers toward the LoongGPU IP in LS7A2000 and
LS2K2000. Besides, there are also drivers about the external encoder,
HDMI audio driver and vbios support etc. This patch only provide DC
driver part, my teammate Li Yi believe that loongson will be more
suitable for loongson graphics than lsdc in the long run.
loongson.ko = LSDC + LoongGPU + encoders driver + vbios/DT ...
v6 -> v7:
1) Add prime support, self-sharing is works. sharing buffer with etnaviv
is also tested, and its works with limitation.
2) Implement buffer objects tracking with list_head.
3) S3(sleep to RAM) is tested on ls3a5000+ls7a2000 evb and it works.
4) Rewrite lsdc_bo_move, since ttm core stop allocating resources
during BO creation. Patch V1 ~ V6 of this series no longer works
on latest kernel. Thus, we send V7 to revival them.
v7 -> v8:
1) Zero a compile warnnings on 32-bit platform, compile with W=1
2) Revise lsdc_bo_gpu_offset() and minor cleanup
3) Pageflip tested on the virtual terminal with following commands
modetest -M loongson -s 32:1920x1080 -v
modetest -M loongson -s 34:1920x1080 -v -F tiles
It works like a charm, when running pageflip test with dual screnn
configuration, another two additional bo created by the modetest
emerged, VRAM usage up to 40+MB, well we have at least 64MB, still
enough.
# cat bos
bo[0000]: size: 8112kB VRAM
bo[0001]: size: 16kB VRAM
bo[0002]: size: 16kB VRAM
bo[0003]: size: 16208kB VRAM
bo[0004]: size: 8112kB VRAM
bo[0005]: size: 8112kB VRAM
v8 -> v9:
1) Select I2C and I2C_ALGOBIT in Kconfig and should depend on MMU.
2) Using pci_get_domain_bus_and_slot to get the GPU device.
3) Other minor improvements.
Sui Jingfeng (2):
MAINTAINERS: add maintainers for DRM LOONGSON driver
drm: add kms driver for loongson display controller
MAINTAINERS | 7 +
drivers/gpu/drm/Kconfig | 2 +
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/loongson/Kconfig | 17 +
drivers/gpu/drm/loongson/Makefile | 16 +
drivers/gpu/drm/loongson/lsdc_crtc.c | 381 ++++++++++++++++
drivers/gpu/drm/loongson/lsdc_debugfs.c | 261 +++++++++++
drivers/gpu/drm/loongson/lsdc_drv.c | 508 +++++++++++++++++++++
drivers/gpu/drm/loongson/lsdc_drv.h | 324 ++++++++++++++
drivers/gpu/drm/loongson/lsdc_gem.c | 294 +++++++++++++
drivers/gpu/drm/loongson/lsdc_gem.h | 26 ++
drivers/gpu/drm/loongson/lsdc_i2c.c | 171 +++++++
drivers/gpu/drm/loongson/lsdc_irq.c | 86 ++++
drivers/gpu/drm/loongson/lsdc_irq.h | 12 +
drivers/gpu/drm/loongson/lsdc_output.c | 563 ++++++++++++++++++++++++
drivers/gpu/drm/loongson/lsdc_output.h | 14 +
drivers/gpu/drm/loongson/lsdc_plane.c | 432 ++++++++++++++++++
drivers/gpu/drm/loongson/lsdc_pll.c | 338 ++++++++++++++
drivers/gpu/drm/loongson/lsdc_pll.h | 76 ++++
drivers/gpu/drm/loongson/lsdc_probe.c | 86 ++++
drivers/gpu/drm/loongson/lsdc_probe.h | 11 +
drivers/gpu/drm/loongson/lsdc_regs.h | 370 ++++++++++++++++
drivers/gpu/drm/loongson/lsdc_ttm.c | 426 ++++++++++++++++++
drivers/gpu/drm/loongson/lsdc_ttm.h | 71 +++
24 files changed, 4493 insertions(+)
create mode 100644 drivers/gpu/drm/loongson/Kconfig
create mode 100644 drivers/gpu/drm/loongson/Makefile
create mode 100644 drivers/gpu/drm/loongson/lsdc_crtc.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_debugfs.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_drv.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_drv.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_gem.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_gem.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_i2c.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_irq.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_irq.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_output.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_output.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_plane.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_pll.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_pll.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_probe.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_probe.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_regs.h
create mode 100644 drivers/gpu/drm/loongson/lsdc_ttm.c
create mode 100644 drivers/gpu/drm/loongson/lsdc_ttm.h
--
2.25.1
Fixes the following W=1 kernel build warning(s):
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c:248: warning: Function parameter or member 'job' not described in 'sdma_v6_0_ring_emit_ib'
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c:248: warning: Function parameter or member 'flags' not described in 'sdma_v6_0_ring_emit_ib'
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c:945: warning: Function parameter or member 'timeout' not described in 'sdma_v6_0_ring_test_ib'
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c:1124: warning: Function parameter or member 'ring' not described in 'sdma_v6_0_ring_pad_ib'
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c:1175: warning: Function parameter or member 'vmid' not described in 'sdma_v6_0_ring_emit_vm_flush'
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c:1175: warning: Function parameter or member 'pd_addr' not described in 'sdma_v6_0_ring_emit_vm_flush'
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan(a)amd.com>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: Stanley Yang <Stanley.Yang(a)amd.com>
Cc: amd-gfx(a)lists.freedesktop.org
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 40e6b22daa226..efea4ef30a787 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -233,7 +233,7 @@ static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
amdgpu_ring_write(ring, ring->funcs->nop);
}
-/**
+/*
* sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
*
* @ring: amdgpu ring pointer
@@ -933,7 +933,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
-/**
+/*
* sdma_v6_0_ring_test_ib - test an IB on the DMA engine
*
* @ring: amdgpu_ring structure holding ring information
@@ -1114,7 +1114,7 @@ static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
}
-/**
+/*
* sdma_v6_0_ring_pad_ib - pad the IB
* @ib: indirect buffer to fill with padding
*
@@ -1162,7 +1162,7 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
}
-/**
+/*
* sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
*
* @ring: amdgpu_ring pointer
--
2.40.0.348.gf938b09366-goog
Fixes the following W=1 kernel build warning(s):
drivers/gpu/drm/i915/i915_vma.c:756: warning: Function parameter or member 'ww' not described in 'i915_vma_insert'
drivers/gpu/drm/i915/i915_vma.c:1744: warning: Function parameter or member 'vma' not described in 'i915_vma_destroy_locked'
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: intel-gfx(a)lists.freedesktop.org
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
drivers/gpu/drm/i915/i915_vma.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f51fd9fd4c89c..20a44788999e5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -739,6 +739,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
/**
* i915_vma_insert - finds a slot for the vma in its address space
* @vma: the vma
+ * @ww: An optional struct i915_gem_ww_ctx
* @size: requested size in bytes (can be larger than the VMA)
* @alignment: required alignment
* @flags: mask of PIN_* flags to use
@@ -1714,7 +1715,7 @@ static void release_references(struct i915_vma *vma, struct intel_gt *gt,
i915_vma_free(vma);
}
-/**
+/*
* i915_vma_destroy_locked - Remove all weak reference to the vma and put
* the initial reference.
*
--
2.40.0.348.gf938b09366-goog