On Mon, 2026-06-01 at 10:36 +0000, Alice Ryhl wrote:
On Sat, May 30, 2026 at 04:35:11PM +0200, Philipp Stanner wrote:
[…]
+use pin_init::pin_init_from_closure;
+use core::{ + marker::PhantomData, // + ops::Deref, + ptr, + ptr::{ + drop_in_place, + NonNull, // + }, + sync::atomic::{ + AtomicU64, + Ordering, // + },
Use atomics from the kernel crate instead.
OK.
+};
+use bindings::ECANCELED;
+use kernel::str::CString; +use kernel::sync::{ + aref::{ + ARef, + AlwaysRefCounted, // + }, + Arc, + ArcBorrow, // +};
+/// VTable for dma_fence backend_ops callbacks. +// +// Mandatory dma_fence backend_ops are implemented implicitly through +// [`FenceCtx`]. Additional ones shall get implemented on this trait, which then +// shall be demanded for the fence context data. +pub trait FenceCtxOps {}
This empty trait is unused.
(discussed in the other thread with Boris)
+/// A dma-fence context. A fence context takes care of associating related fences with each other, +/// providing each with raising sequence numbers and a common identifier. +#[pin_data(PinnedDrop)] +pub struct FenceCtx<F: Send + Sync, C: Send + Sync> {
No need to list any trait bounds here. You can list them on `impl` blocks only.
ACK.
[…]
+ { + // Uses `pin_init_from_closure` instead of `try_pin_init!` so that on + // `-ENOENT` (already signaled) the callback can be read back from the + // partially-initialized slot and returned through the error. + // + // SAFETY: `pin_init_from_closure` requires: + // - On `Ok(())`: the slot is fully initialized and valid for `Drop`. + // - On `Err(_)`: the slot is clean, i.e.: no partially-initialized fields + // remain, and the slot can be deallocated without dropping. + // + // We uphold this as follows: + // - On success: all three fields are initialized. Ok(()) is returned. + // - On ENOENT (already signaled): `callback` and `fence` are read back + // from the slot via `ptr::read`, leaving the slot clean. `cb` was + // initialized by `dma_fence_add_callback` (it calls + // `INIT_LIST_HEAD(&cb->node)` even on error), but `cb` is + // `Opaque<dma_fence_cb>` which has no `Drop`, so not dropping it is + // fine. The callback is returned through `AlreadySignaled(T)`. + // - On other errors: same cleanup as ENOENT, error returned as + // `Other(e)`. + unsafe { + pin_init_from_closure(move |slot: *mut Self| { + let slot_callback = &raw mut (*slot).callback; + let slot_fence = &raw mut (*slot).fence; + let slot_cb = &raw mut (*slot).cb;
+ // Write callback and fence first — must be visible before + // dma_fence_add_callback makes the registration live. + core::ptr::write(slot_callback, callback); + core::ptr::write(slot_fence, ARef::from(fence));
Here you are incrementing the fence refcount. It's better to change the function argument to ARef<Fence> so that the user can avoid this increment if they happen to own a refcount they're willing to give up.
Agreed, will do
+ let ret = to_result(bindings::dma_fence_add_callback( + fence.inner.get(), + Opaque::cast_into(slot_cb), + Some(Self::dma_fence_callback), + ));
+ match ret { + Ok(()) => Ok(()), + Err(e) => { + // Read back what we wrote to leave the slot clean. + let cb_back = core::ptr::read(slot_callback); + let _fence_back = core::ptr::read(slot_fence);
This can be drop_in_place().
+ if e.to_errno() == ENOENT.to_errno() { + Err(CallbackError::AlreadySignaled(cb_back)) + } else { + Err(CallbackError::Other(e)) + } + } + } + }) + } + }
+ /// Raw dma fence callback that is called by the C code. + /// + /// # Safety + /// + /// This is only called by the dma_fence subsystem with valid pointers. + unsafe extern "C" fn dma_fence_callback( + _fence: *mut bindings::dma_fence, + cb: *mut bindings::dma_fence_cb, + ) { + let ptr = Opaque::cast_from(cb).cast_mut();
+ // SAFETY: All `cb` we can receive here have been created in such a way + // that they are embedded into a `FenceCbRegistration`. The backend + // ensures synchronisation so whoever holds the registration object + // cannot drop it while this code is running. See `FenceCbRegistration::drop`. + unsafe { + let reg: *mut Self = container_of!(ptr, Self, cb);
+ (*reg).callback.called(); + } + }
+ /// Returns a reference to the fence this callback is registered on. + pub fn fence(self: Pin<&Self>) -> &Fence {
Can be simplified to `fn fence(&self) -> &Fence`.
+ &self.get_ref().fence + } +}
+#[pinned_drop] +impl<T: FenceCb> PinnedDrop for FenceCbRegistration<T> { + fn drop(self: Pin<&mut Self>) { + // Always call dma_fence_remove_callback, even if `callback` has already + // been taken by `dma_fence_callback`. This is necessary for + // synchronization: `dma_fence_remove_callback` acquires `fence->lock`, + // which ensures that any in-flight `dma_fence_signal` (which calls our + // callback while holding the same lock) has completed before we free + // the struct. + // + // Without this, Drop can race with a concurrent signal: + // CPU0 (signal, lock held): take() -> signaled(fence_ref) (in progress) + // CPU1 (drop): sees is_some()==false -> skips lock -> frees struct + // CPU0: accesses fence_ref -> use-after-free + // + // When the callback has already fired, the signal path detached the + // list node via INIT_LIST_HEAD, so dma_fence_remove_callback just sees + // an empty node and returns false — the lock acquisition is the only + // thing that matters. + // + // SAFETY: The fence pointer is valid and the cb was initialized by + // dma_fence_add_callback during construction. + unsafe { + bindings::dma_fence_remove_callback(self.fence.as_raw(), self.cb.get()); + }
Formatting nit: Usually the ; goes outside the unsafe block.
I could have sworn that it was rustfmt who did that? Maybe because the ; was inside to begin with.
+ } +}
+// SAFETY: FenceCbRegistration can be sent between threads +unsafe impl<T: FenceCb> Send for FenceCbRegistration<T> {}
+// SAFETY: &FenceCbRegistration can be shared between threads if &T can. +unsafe impl<T: FenceCb> Sync for FenceCbRegistration<T> where T: Sync {}
There's no &FenceCbRegistration<T> -> &T accessor, so I don't think this bound is required.
unsafe impl<T: FenceCb> Sync for FenceCbRegistration<T> {}
There also can't be such an accessor in the future because the closure takes a &mut T.
Hm, very correct. The entire design only allows serial access.
+/// The receiving counterpart of a [`DriverFence`], designed to register callbacks +/// on, check the signalled state etc. A [`Fence`] cannot be signalled. +/// A [`Fence`] is always refcounted. +pub struct Fence { + /// The actual dma_fence passed to C. + inner: Opaquebindings::dma_fence, +}
+// SAFETY: Fences are literally designed to be shared between threads. +unsafe impl Send for Fence {} +// SAFETY: Fences are literally designed to be shared between threads. +unsafe impl Sync for Fence {}
+impl Fence { + /// Check whether the fence was signalled at the moment of the function call. + pub fn is_signaled(&self) -> bool { + // SAFETY: self is by definition still valid. The backend ensures proper + // locking. + unsafe { bindings::dma_fence_is_signaled(self.as_raw()) } + }
+ fn as_raw(&self) -> *mut bindings::dma_fence { + self.inner.get() + }
+ /// Create a [`Fence`] from a raw C [`bindings::dma_fence`]. + /// + /// # Safety + /// + /// `ptr` must point to an initialized fence that is embedded into a [`Fence`]. + pub unsafe fn from_raw<'a>(ptr: *mut bindings::dma_fence) -> &'a Self { + // SAFETY: Safe as per the function's overall safety requirements. + unsafe { &*ptr.cast() } + } +}
+// SAFETY: These implement the C backends refcounting methods which are proven to work correctly. +unsafe impl AlwaysRefCounted for Fence { + fn inc_ref(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct dma_fence`. + unsafe { bindings::dma_fence_get(self.as_raw()) } + }
+ /// # Safety + /// + /// `ptr`must be a valid pointer to a [`DriverFence`]. + unsafe fn dec_ref(ptr: NonNull<Self>) { + // SAFETY: `ptr` is never a NULL pointer; and when `dec_ref()` is called + // the fence is by definition still valid. + let fence = unsafe { (*ptr.as_ptr()).inner.get() };
+ // SAFETY: Valid because `fence` was created validly above. + unsafe { bindings::dma_fence_put(fence) } + } +}
+#[repr(C)] // Necessary to guarantee that `inner` always comes first so that we can cast. +#[pin_data] +struct DriverFenceData<F: Send + Sync, C: Send + Sync> {
Ditto here about trait bounds. (And everywhere else.)
+ #[pin] + /// The inner fence. + inner: Fence, + /// Pointer to access the FenceCtx. Useful for obtaining name parameters. + // The FenceCtx lives as long as at least all its fences, hence this is safe. + fctx: Arc<FenceCtx<F, C>>, + /// The API user's data. As required by [`DriverFenceAllowedData`], this either + /// does not need drop, or must live in a [`rcu::RcuBox`]. It is essential + /// that the data only performs operations legal in atomic context in its + /// [`Drop`] implementation. + #[pin] + data: F, +}
+/// A trait to enforce that all data in a [`DriverFence`] either does not need +/// drop, or lives in a [`RcuBox`]. +pub trait DriverFenceAllowedData: private::Sealed {}
+mod private { + pub trait Sealed {} +}
+impl<F: Copy> DriverFenceAllowedData for F {} +impl<F: Send> DriverFenceAllowedData for RcuBox<F> {}
+impl<F: Copy> private::Sealed for F {} +impl<F: Send> private::Sealed for RcuBox<F> {}
Why sealed? Just make the trait unsafe and require the things you require from the user.
This is far better. We definitely only allow the user to pass A or B, and only then it compiles.
The unsafe implementation could be messed up.
I thought that's what Sealed is for. Or isn't it?
+/// A synchronization primitive mainly for GPU drivers. +/// +/// Fences are always reference counted. The typical use case is that one side registers +/// callbacks on the fence which will perform a certain action (such as queueing work) once the +/// other side signals the fence. +/// +/// # Examples +/// +/// ``` +/// use kernel::dma_buf::{DriverFence, FenceCtx, FenceCb, FenceCbRegistration}; +/// use kernel::str::CString; +/// use kernel::sync::{ +/// aref::ARef, +/// rcu::RcuBox, // +/// }; +/// use core::ops::Deref; +/// use core::fmt::Display;
Use fmt traits from kernel instead. (Actually, I don't think you use Display at all here?)
I tried, see a few lines below:
+/// struct CallbackData { } +/// +/// impl FenceCb for CallbackData { +/// fn called(&mut self) { +/// pr_info!("DmaFence callback executed.\n"); +/// } +/// } +/// +/// let driver_name = CString::try_from_fmt(fmt!("dummy_driver"))?; +/// let timeline_name = CString::try_from_fmt(fmt!("dummy_timeline"))?; +/// +/// let fctx = FenceCtx::new(driver_name, timeline_name, ())?; +/// +/// let fence_data = CString::try_from_fmt(fmt!("dummy_data"))?; +/// // DriverFence::data must either not need drop, or live in an RcuBox. +/// let fence_data = RcuBox::new(fence_data, GFP_KERNEL)?; +/// +/// let fence_alloc = fctx.as_arc_borrow().new_fence_allocation(fence_data)?; +/// let mut fence = fctx.new_fence(fence_alloc); +/// +/// let cb_data = CallbackData { }; +/// let waiting_fence = ARef::from(fence.as_fence()); +/// let cb_reg = FenceCbRegistration::new(&waiting_fence, cb_data); +/// let cb_reg = KBox::pin_init(cb_reg, GFP_KERNEL)?; +/// +/// // DriverFence implements Deref. +/// // FIXME: unit test claims that CString does not implement Display. Why? +/// // pr_info!("Fence's inner data is: {}", fence.deref().deref());
Lazily, I was hoping that someone here will tell me how that is supposed to be done correctly 8-)
+/// +/// // TODO begin_signalling +/// fence.signal(Ok(())); +/// assert_eq!(waiting_fence.is_signaled(), true); +/// +/// Ok::<(), Error>(()) +/// ``` +pub struct DriverFence<F: Send + Sync, C: Send + Sync> { + /// The actual content of the fence. Lives in a raw pointer so that its + /// memory can be managed independently. Valid until both the [`DriverFence`] + /// and all associated [`Fence`]s have disappeared. + data: NonNull<DriverFenceData<F, C>>, +}
+/// A pre-prepared DMA fence, carrying the user's data and the memory it and the +/// fence reside in. Only useful for creating a [`DriverFence`]. Splitting +/// allocation and full initialization is necessary because fences cannot be +/// allocated dynamically in some circumstances (deadlock). +pub struct DriverFenceAllocation<F: Send + Sync, C: Send + Sync> { + /// The memory for the actual content of the fence. + /// Handed over to a [`DriverFence`], or deallocated once the + /// [`DriverFenceAllocation`] drops. + data: KBox<DriverFenceData<F, C>>, +}
+impl<F: Send + Sync + DriverFenceAllowedData, C: Send + Sync> DriverFenceAllocation<F, C> { + /// Create a new allocation slot that can later be used to create a fully + /// initialized [`DriverFence`] without the need to allocate. + pub fn new(fctx: Arc<FenceCtx<F, C>>, data: F) -> Result<Self> { + let fence_data = DriverFenceData { + // `inner` remains uninitialized until a [`DriverFence`] takes over. + inner: Fence { + inner: Opaque::uninit(), + }, + fctx, + data, + };
+ // In order to support the C dma_fence callbacks, it is necessary for + // a `Fence` and a `DriverFence` to live in the same allocation, + // because the C backend passes a dma_fence, from which the driver most + // likely wants to be able to access its `data` in `DriverFence`. + // + // Hence, we need the manage the memory manually. It will be freed by the + // C backend automatically once the refcount within `Fence` drops to 0. + let data = KBox::new(fence_data, GFP_KERNEL | __GFP_ZERO)?;
+ Ok(Self { data }) + }
+ fn as_raw(&self) -> *mut bindings::dma_fence { + self.data.inner.inner.get() + } +}
+impl<F: Send + Sync, C: Send + Sync> DriverFence<F, C> { + fn as_raw(&self) -> *mut bindings::dma_fence { + // SAFETY: Valid because `self` is valid. + let fence_data = unsafe { &mut *self.data.as_ptr() };
+ fence_data.inner.inner.get() + }
+ /// Create a [`DriverFence`] from a raw pointer to a [`bindings::dma_fence`]. + /// + /// # Safety + /// + /// `ptr` must be a valid pointer to a `dma_fence` that was obtained through + /// a [`DriverFence`] with matching generic data for both fence and associated + /// [`FenceCtx`]. + unsafe fn from_raw(ptr: *mut bindings::dma_fence) -> Self { + let opaque_fence = Opaque::cast_from(ptr);
+ // SAFETY: Safe due to the function's overall safety requirements. + let fence_ptr = unsafe { container_of!(opaque_fence, Fence, inner) };
+ // DriverFenceData is repr(C) and a Fence is its first member. + let fence_data_ptr = fence_ptr as *mut DriverFenceData<F, C>;
+ // SAFETY: `fence_data_ptr` was created validly above. + let data = unsafe { NonNull::new_unchecked(fence_data_ptr) };
+ Self { data } + }
+ /// Return the underlying [`Fence`]. + pub fn as_fence(&self) -> &Fence { + // SAFETY: `self` is by definition still valid, and it cannot drop until + // this new reference is gone. + unsafe { Fence::from_raw(self.as_raw()) } + }
+ /// Signal the fence. This will invoke all registered callbacks. + pub fn signal(self, res: Result) { + let fence = self.as_raw(); + let mut fence_flags: usize = 0; + let flag_ptr = &raw mut fence_flags;
+ // SAFETY: Once a `DriverFence` is initialized, the inner `fence` is + // valid and initialized. It is valid until the refcount drops + // to 0, which can earliest happen once the `DriverFence` has been dropped. + unsafe { + bindings::dma_fence_lock_irqsave(fence, flag_ptr); + if !bindings::dma_fence_is_signaled_locked(fence) { + if let Err(err) = res { + bindings::dma_fence_set_error(fence, err.to_errno()); + } + bindings::dma_fence_signal_locked(fence); + } + bindings::dma_fence_unlock_irqrestore(fence, flag_ptr); + }
This single unsafe blocks spans five different unsafe operations.
Same discussion with Danilo. I'd prefer it this way, but I guess separate blocks also have some advantages.
+ } +}
+// SAFETY: Fences are literally designed to be shared between threads. +unsafe impl<F: Send + Sync, C: Send + Sync> Send for DriverFence<F, C> {}
+impl<F: Send + Sync, C: Send + Sync> Deref for DriverFence<F, C> { + type Target = F;
+ fn deref(&self) -> &Self::Target { + // SAFETY: Thanks to refcounting, `data` is always valid as long as `self` is. + let data = unsafe { &*self.data.as_ptr() };
+ &data.data + } +}
+/// A borrowed [`DriverFence`]. All you can do with it is access your user data +/// and obtain a [`Fence`]. +pub struct DriverFenceBorrow<F: Send + Sync, C: Send + Sync> { + /// The actual content of the fence. Lives in a raw pointer so that its + /// memory can be managed independently. Valid until both the [`DriverFence`] + /// and all associated [`Fence`]s have disappeared. + data: NonNull<DriverFenceData<F, C>>, +}
+impl<F: Send + Sync, C: Send + Sync> Deref for DriverFenceBorrow<F, C> { + type Target = F;
+ fn deref(&self) -> &Self::Target { + // SAFETY: Thanks to refcounting, `data` is always valid as long as `self` is. + let data = unsafe { &*self.data.as_ptr() };
+ &data.data + } +}
+impl<F: Send + Sync, C: Send + Sync> DriverFenceBorrow<F, C> { + fn as_raw(&self) -> *mut bindings::dma_fence { + // SAFETY: Valid because `self` is valid. + let fence_data = unsafe { &mut *self.data.as_ptr() };
+ fence_data.inner.inner.get() + }
+ /// Return the underlying [`Fence`]. + pub fn as_fence(&self) -> &Fence { + // SAFETY: `self` is by definition still valid, and it cannot drop until + // this new reference is gone. + unsafe { Fence::from_raw(self.as_raw()) } + }
+ /// Get a [`DriverFenceBorrow`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must point to a raw dma_fence within a [`Fence`] within a [`DriverFenceData`]. + unsafe fn from_raw(ptr: *mut bindings::dma_fence) -> Self { + let opaque_fence = Opaque::cast_from(ptr);
+ // SAFETY: Safe due to the function's overall safety requirements. + let fence_ptr = unsafe { container_of!(opaque_fence, Fence, inner) };
+ // DriverFenceData is repr(C) and a Fence is its first member. + let fence_data_ptr = fence_ptr as *mut DriverFenceData<F, C>;
+ // SAFETY: `fence_data_ptr` was created validly above. + let data = unsafe { NonNull::new_unchecked(fence_data_ptr) };
+ Self { data } + } +}
+// SAFETY: The Rust dma_fence abstractions are already designed around the inner +// C `dma_fence`, which can serve safely as the identification point when being +// owned by C. Moreover, safety is ensured by not dropping `DriverFence` and by +// only allowing operations without side effects on the Borrowed type. +unsafe impl<F: Send + Sync + 'static, C: Send + Sync + 'static> ForeignOwnable + for DriverFence<F, C> +{ + // `DriverFence` is merely a wrapper around a raw pointer. Thus, we can just + // use it directly. + type Borrowed<'a> = DriverFenceBorrow<F, C>; + type BorrowedMut<'a> = DriverFenceBorrow<F, C>;
+ const FOREIGN_ALIGN: usize = core::mem::align_of::bindings::dma_fence();
+ fn into_foreign(self) -> *mut c_void { + let fence = self;
+ let ptr = fence.as_raw();
+ // DriverFence must not drop. + core::mem::forget(fence);
Nit: Modern Rust uses ManuallyDrop instead of forget().
You mean still take `self` here, then stuff it into ManuallyDrop and let it go out of scope, aye?
Thx for the review, P.