The following series implements small updates to the fence api. I've found them useful when implementing the fence API in ttm and i915.
The last patch enables RCU on top of the api. I've found this less useful, but it was the condition on which Thomas Hellstrom was ok with converting TTM to fence, so I had to keep it in.
If nobody objects I'll probably merge that patch through drm, because some care is needed in ttm before it can flip the switch on rcu.
---
Maarten Lankhorst (2): reservation: update api and add some helpers [RFC] reservation: add suppport for read-only access using rcu
Move the list of shared fences to a struct, and return it in reservation_object_get_list().
Add reservation_object_reserve_shared(), which reserves space in the reservation_object for 1 more shared fence.
reservation_object_add_shared_fence() and reservation_object_add_excl_fence() are used to assign a new fence to a reservation_object pointer, to complete a reservation.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/base/dma-buf.c | 35 +++++++--- drivers/base/fence.c | 4 + drivers/base/reservation.c | 154 +++++++++++++++++++++++++++++++++++++++++++ include/linux/fence.h | 6 ++ include/linux/reservation.h | 48 +++++++++++-- kernel/sched/core.c | 1 6 files changed, 228 insertions(+), 20 deletions(-)
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index 96338bf7f457..d89a98d2c37b 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -134,7 +134,10 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) { struct dma_buf *dmabuf; struct reservation_object *resv; + struct reservation_object_list *fobj; + struct fence *fence_excl; unsigned long events; + unsigned shared_count;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -150,12 +153,18 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
ww_mutex_lock(&resv->lock, NULL);
- if (resv->fence_excl && (!(events & POLLOUT) || - resv->fence_shared_count == 0)) { + fobj = resv->fence; + if (!fobj) + goto out; + + shared_count = fobj->shared_count; + fence_excl = resv->fence_excl; + + if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; unsigned long pevents = POLLIN;
- if (resv->fence_shared_count == 0) + if (shared_count == 0) pevents |= POLLOUT;
spin_lock_irq(&dmabuf->poll.lock); @@ -167,19 +176,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock);
if (events & pevents) { - if (!fence_add_callback(resv->fence_excl, - &dcb->cb, dma_buf_poll_cb)) + if (!fence_add_callback(fence_excl, &dcb->cb, + dma_buf_poll_cb)) { events &= ~pevents; - else + } else { /* * No callback queued, wake up any additional * waiters. */ dma_buf_poll_cb(NULL, &dcb->cb); + } } }
- if ((events & POLLOUT) && resv->fence_shared_count > 0) { + if ((events & POLLOUT) && shared_count > 0) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared; int i;
@@ -194,15 +204,18 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) if (!(events & POLLOUT)) goto out;
- for (i = 0; i < resv->fence_shared_count; ++i) - if (!fence_add_callback(resv->fence_shared[i], - &dcb->cb, dma_buf_poll_cb)) { + for (i = 0; i < shared_count; ++i) { + struct fence *fence = fobj->shared[i]; + + if (!fence_add_callback(fence, &dcb->cb, + dma_buf_poll_cb)) { events &= ~POLLOUT; break; } + }
/* No callback queued, wake up any additional waiters. */ - if (i == resv->fence_shared_count) + if (i == shared_count) dma_buf_poll_cb(NULL, &dcb->cb); }
diff --git a/drivers/base/fence.c b/drivers/base/fence.c index 8fff13fb86cf..f780f9b3d418 100644 --- a/drivers/base/fence.c +++ b/drivers/base/fence.c @@ -170,7 +170,7 @@ void release_fence(struct kref *kref) if (fence->ops->release) fence->ops->release(fence); else - kfree(fence); + free_fence(fence); } EXPORT_SYMBOL(release_fence);
@@ -448,7 +448,7 @@ static void seqno_release(struct fence *fence) if (f->ops->release) f->ops->release(fence); else - kfree(f); + free_fence(fence); }
static long seqno_wait(struct fence *fence, bool intr, signed long timeout) diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c index a73fbf3b8e56..b82a5b630a8e 100644 --- a/drivers/base/reservation.c +++ b/drivers/base/reservation.c @@ -37,3 +37,157 @@
DEFINE_WW_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); + +/* + * Reserve space to add a shared fence to a reservation_object, + * must be called with obj->lock held. + */ +int reservation_object_reserve_shared(struct reservation_object *obj) +{ + struct reservation_object_list *fobj, *old; + u32 max; + + old = reservation_object_get_list(obj); + + if (old && old->shared_max) { + if (old->shared_count < old->shared_max) { + /* perform an in-place update */ + kfree(obj->staged); + obj->staged = NULL; + return 0; + } + max = old->shared_max * 2; + } else + max = 4; + + /* + * resize obj->staged or allocate if it doesn't exist, + * noop if already correct size + */ + fobj = krealloc(obj->staged, offsetof(typeof(*fobj), shared[max]), + GFP_KERNEL); + if (!fobj) + return -ENOMEM; + + obj->staged = fobj; + fobj->shared_max = max; + return 0; +} +EXPORT_SYMBOL(reservation_object_reserve_shared); + +static void +reservation_object_add_shared_inplace(struct reservation_object *obj, + struct reservation_object_list *fobj, + struct fence *fence) +{ + u32 i; + + for (i = 0; i < fobj->shared_count; ++i) { + if (fobj->shared[i]->context == fence->context) { + struct fence *old_fence = fobj->shared[i]; + + fence_get(fence); + + fobj->shared[i] = fence; + + fence_put(old_fence); + return; + } + } + + fence_get(fence); + fobj->shared[fobj->shared_count] = fence; + /* + * make the new fence visible before incrementing + * fobj->shared_count + */ + smp_wmb(); + fobj->shared_count++; +} + +static void +reservation_object_add_shared_replace(struct reservation_object *obj, + struct reservation_object_list *old, + struct reservation_object_list *fobj, + struct fence *fence) +{ + unsigned i; + + fence_get(fence); + + if (!old) { + fobj->shared[0] = fence; + fobj->shared_count = 1; + goto done; + } + + /* + * no need to bump fence refcounts, rcu_read access + * requires the use of kref_get_unless_zero, and the + * references from the old struct are carried over to + * the new. + */ + fobj->shared_count = old->shared_count; + + for (i = 0; i < old->shared_count; ++i) { + if (fence && old->shared[i]->context == fence->context) { + fence_put(old->shared[i]); + fobj->shared[i] = fence; + fence = NULL; + } else + fobj->shared[i] = old->shared[i]; + } + if (fence) + fobj->shared[fobj->shared_count++] = fence; + +done: + obj->fence = fobj; + kfree(old); +} + +/* + * Add a fence to a shared slot, obj->lock must be held, and + * reservation_object_reserve_shared_fence has been called. + */ +void reservation_object_add_shared_fence(struct reservation_object *obj, + struct fence *fence) +{ + struct reservation_object_list *old, *fobj = obj->staged; + + old = reservation_object_get_list(obj); + obj->staged = NULL; + + if (!fobj) { + BUG_ON(old->shared_count == old->shared_max); + reservation_object_add_shared_inplace(obj, old, fence); + } else + reservation_object_add_shared_replace(obj, old, fobj, fence); +} +EXPORT_SYMBOL(reservation_object_add_shared_fence); + +void reservation_object_add_excl_fence(struct reservation_object *obj, + struct fence *fence) +{ + struct fence *old_fence = obj->fence_excl; + struct reservation_object_list *old; + u32 i = 0; + + old = reservation_object_get_list(obj); + if (old) { + i = old->shared_count; + old->shared_count = 0; + } + + if (fence) + fence_get(fence); + + obj->fence_excl = fence; + + /* inplace update, no shared fences */ + while (i--) + fence_put(old->shared[i]); + + if (old_fence) + fence_put(old_fence); +} +EXPORT_SYMBOL(reservation_object_add_excl_fence); diff --git a/include/linux/fence.h b/include/linux/fence.h index 65f2a01ee7e4..d13b5ab61726 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -31,6 +31,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/printk.h> +#include <linux/slab.h>
struct fence; struct fence_ops; @@ -191,6 +192,11 @@ static inline void fence_get(struct fence *fence)
extern void release_fence(struct kref *kref);
+static inline void free_fence(struct fence *fence) +{ + kfree(fence); +} + /** * fence_put - decreases refcount of the fence * @fence: [in] fence to reduce refcount of diff --git a/include/linux/reservation.h b/include/linux/reservation.h index f3f57460a205..b602365c87f9 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -45,36 +45,70 @@
extern struct ww_class reservation_ww_class;
+struct reservation_object_list { + u32 shared_count, shared_max; + struct fence *shared[]; +}; + struct reservation_object { struct ww_mutex lock;
struct fence *fence_excl; - struct fence **fence_shared; - u32 fence_shared_count, fence_shared_max; + struct reservation_object_list *fence; + struct reservation_object_list *staged; };
+#define reservation_object_assert_held(obj) \ + lockdep_assert_held(&(obj)->lock.base) + static inline void reservation_object_init(struct reservation_object *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
- obj->fence_shared_count = obj->fence_shared_max = 0; - obj->fence_shared = NULL; obj->fence_excl = NULL; + obj->fence = NULL; + obj->staged = NULL; }
static inline void reservation_object_fini(struct reservation_object *obj) { int i; + struct reservation_object_list *fobj;
+ /* + * This object should be dead and all references must have + * been released to it. + */ if (obj->fence_excl) fence_put(obj->fence_excl); - for (i = 0; i < obj->fence_shared_count; ++i) - fence_put(obj->fence_shared[i]); - kfree(obj->fence_shared); + + fobj = obj->fence; + if (fobj) { + for (i = 0; i < fobj->shared_count; ++i) + fence_put(fobj->shared[i]); + + kfree(fobj); + } + kfree(obj->staged);
ww_mutex_destroy(&obj->lock); }
+static inline struct reservation_object_list * +reservation_object_get_list(struct reservation_object *obj) +{ + reservation_object_assert_held(obj); + + return obj->fence; +} + +int reservation_object_reserve_shared(struct reservation_object *obj); +void reservation_object_add_shared_fence(struct reservation_object *obj, + struct fence *fence); + +void reservation_object_add_excl_fence(struct reservation_object *obj, + struct fence *fence); + #endif /* _LINUX_RESERVATION_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f5c6635b806c..b8cfb790e831 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1694,6 +1694,7 @@ int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); } +EXPORT_SYMBOL(wake_up_state);
/* * Perform scheduler related setup for a newly forked process p.
This adds 3 more functions to deal with rcu.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/base/dma-buf.c | 46 +++++++++++-- drivers/base/reservation.c | 147 +++++++++++++++++++++++++++++++++++++++++-- include/linux/fence.h | 22 ++++++ include/linux/reservation.h | 40 ++++++++---- 4 files changed, 224 insertions(+), 31 deletions(-)
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..fc2d7546b8b0 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -151,14 +151,22 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
- ww_mutex_lock(&resv->lock, NULL); + rcu_read_lock();
- fobj = resv->fence; - if (!fobj) - goto out; + fobj = rcu_dereference(resv->fence); + if (fobj) { + shared_count = ACCESS_ONCE(fobj->shared_count); + smp_mb(); /* shared_count needs transitivity wrt fence_excl */ + } else + shared_count = 0; + fence_excl = rcu_dereference(resv->fence_excl);
- shared_count = fobj->shared_count; - fence_excl = resv->fence_excl; + /* + * This would have needed a smp_read_barrier_depends() + * because shared_count needs to be read before shared[i], but + * spin_lock_irq and spin_unlock_irq provide even stronger + * guarantees. + */
if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; @@ -176,14 +184,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock);
if (events & pevents) { - if (!fence_add_callback(fence_excl, &dcb->cb, + if (!fence_get_rcu(fence_excl)) { + /* force a recheck */ + events &= ~pevents; + dma_buf_poll_cb(NULL, &dcb->cb); + } else if (!fence_add_callback(fence_excl, &dcb->cb, dma_buf_poll_cb)) { events &= ~pevents; + fence_put(fence_excl); } else { /* * No callback queued, wake up any additional * waiters. */ + fence_put(fence_excl); dma_buf_poll_cb(NULL, &dcb->cb); } } @@ -205,13 +219,25 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) goto out;
for (i = 0; i < shared_count; ++i) { - struct fence *fence = fobj->shared[i]; - + struct fence *fence = fence_get_rcu(fobj->shared[i]); + if (!fence) { + /* + * fence refcount dropped to zero, this means + * that fobj has been freed + * + * call dma_buf_poll_cb and force a recheck! + */ + events &= ~POLLOUT; + dma_buf_poll_cb(NULL, &dcb->cb); + break; + } if (!fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb)) { + fence_put(fence); events &= ~POLLOUT; break; } + fence_put(fence); }
/* No callback queued, wake up any additional waiters. */ @@ -220,7 +246,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) }
out: - ww_mutex_unlock(&resv->lock); + rcu_read_unlock(); return events; }
diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c index b82a5b630a8e..4cdce63140b8 100644 --- a/drivers/base/reservation.c +++ b/drivers/base/reservation.c @@ -87,9 +87,13 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, struct fence *old_fence = fobj->shared[i];
fence_get(fence); + /* for the fence_get() */ + smp_mb__after_atomic_inc();
fobj->shared[i] = fence;
+ /* for the fence_put() */ + smp_mb__before_atomic_dec(); fence_put(old_fence); return; } @@ -141,8 +145,9 @@ reservation_object_add_shared_replace(struct reservation_object *obj, fobj->shared[fobj->shared_count++] = fence;
done: - obj->fence = fobj; - kfree(old); + rcu_assign_pointer(obj->fence, fobj); + if (old) + kfree_rcu(old, rcu); }
/* @@ -168,20 +173,25 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence); void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence) { - struct fence *old_fence = obj->fence_excl; + struct fence *old_fence = reservation_object_get_excl(obj); struct reservation_object_list *old; u32 i = 0;
old = reservation_object_get_list(obj); - if (old) { + if (old) i = old->shared_count; - old->shared_count = 0; - }
if (fence) fence_get(fence);
- obj->fence_excl = fence; + rcu_assign_pointer(obj->fence_excl, fence); + + if (old) { + smp_wmb(); + old->shared_count = 0; + } + + smp_mb__before_atomic_dec();
/* inplace update, no shared fences */ while (i--) @@ -191,3 +201,126 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, fence_put(old_fence); } EXPORT_SYMBOL(reservation_object_add_excl_fence); + +long reservation_object_wait_timeout_rcu(struct reservation_object *obj, + bool wait_all, bool intr, + unsigned long timeout) +{ + unsigned shared_count = 0; + + while (1) { + struct fence *fence = NULL, *fence_excl; + struct reservation_object_list *fobj; + long ret; + + rcu_read_lock(); + +retry: + if (wait_all) { + unsigned i; + + fobj = rcu_dereference(obj->fence); + if (fobj) + shared_count = ACCESS_ONCE(fobj->shared_count); + else + shared_count = 0; + smp_mb(); /* shared_count needs transitivity wrt fence_excl */ + + for (i = 0; i < shared_count; ++i) { + fence = ACCESS_ONCE(fobj->shared[i]); + + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; + + if (!fence_get_rcu(fence)) + goto retry; + + if (fence_is_signaled(fence)) + fence_put(fence); + else + break; + } + if (i == shared_count) + fence = NULL; + } + + if (shared_count == 0) { + fence_excl = rcu_dereference(obj->fence_excl); + if (fence_excl && !fence_get_rcu(fence_excl)) + goto retry; + + if (fence_excl && fence_is_signaled(fence_excl)) + fence_put(fence_excl); + else + fence = fence_excl; + } + + rcu_read_unlock(); + + if (fence == NULL) + return timeout; + + ret = fence_wait_timeout(fence, intr, timeout); + fence_put(fence); + + if (ret <= 0) + return ret; + else + timeout = ret; + } +} +EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu); + +bool reservation_object_test_signaled_rcu(struct reservation_object *obj, bool test_all) +{ + struct fence *fence, *fence_excl; + struct reservation_object_list *fobj; + unsigned shared_count = 0; + bool ret = true; + + rcu_read_lock(); + +retry: + if (test_all) { + unsigned i; + + fobj = rcu_dereference(obj->fence); + if (fobj) + shared_count = ACCESS_ONCE(fobj->shared_count); + else + shared_count = 0; + smp_mb(); /* shared_count needs transitivity wrt fence_excl */ + + for (i = 0; i < shared_count; ++i) { + fence = ACCESS_ONCE(fobj->shared[i]); + + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; + + if (!fence_get_rcu(fence)) + goto retry; + + ret = fence_is_signaled(fence); + fence_put(fence); + if (!ret) + break; + } + } + + if (shared_count == 0) { + fence_excl = rcu_dereference(obj->fence_excl); + + if (fence_excl && !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) { + fence = fence_get_rcu(fence_excl); + if (!fence) + goto retry; + + ret = fence_is_signaled(fence); + fence_put(fence); + } + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu); diff --git a/include/linux/fence.h b/include/linux/fence.h index d13b5ab61726..8499ace045da 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -31,7 +31,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/printk.h> -#include <linux/slab.h> +#include <linux/rcupdate.h>
struct fence; struct fence_ops; @@ -41,6 +41,7 @@ struct fence_cb; * struct fence - software synchronization primitive * @refcount: refcount for this fence * @ops: fence_ops associated with this fence + * @rcu: used for releasing fence with kfree_rcu * @cb_list: list of all callbacks to call * @lock: spin_lock_irqsave used for locking * @context: execution context this fence belongs to, returned by @@ -74,6 +75,7 @@ struct fence_cb; struct fence { struct kref refcount; const struct fence_ops *ops; + struct rcu_head rcu; struct list_head cb_list; spinlock_t *lock; unsigned context, seqno; @@ -190,11 +192,27 @@ static inline void fence_get(struct fence *fence) kref_get(&fence->refcount); }
+/** + * fence_get_rcu - get a fence from a reservation_object_list with rcu read lock + * @fence: [in] fence to increase refcount of + * + * Function returns NULL if no refcount could be obtained, or the fence. + */ +static inline struct fence *fence_get_rcu(struct fence *fence) +{ + struct fence *f = ACCESS_ONCE(fence); + + if (kref_get_unless_zero(&f->refcount)) + return f; + else + return NULL; +} + extern void release_fence(struct kref *kref);
static inline void free_fence(struct fence *fence) { - kfree(fence); + kfree_rcu(fence, rcu); }
/** diff --git a/include/linux/reservation.h b/include/linux/reservation.h index b602365c87f9..2a53bfbec6a1 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -42,10 +42,12 @@ #include <linux/ww_mutex.h> #include <linux/fence.h> #include <linux/slab.h> +#include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class;
struct reservation_object_list { + struct rcu_head rcu; u32 shared_count, shared_max; struct fence *shared[]; }; @@ -53,21 +55,20 @@ struct reservation_object_list { struct reservation_object { struct ww_mutex lock;
- struct fence *fence_excl; - struct reservation_object_list *fence; + struct fence __rcu *fence_excl; + struct reservation_object_list __rcu *fence; struct reservation_object_list *staged; };
-#define reservation_object_assert_held(obj) \ - lockdep_assert_held(&(obj)->lock.base) +#define reservation_object_held(obj) (lockdep_is_held(&(obj)->lock.base))
static inline void reservation_object_init(struct reservation_object *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
- obj->fence_excl = NULL; - obj->fence = NULL; + RCU_INIT_POINTER(obj->fence, NULL); + RCU_INIT_POINTER(obj->fence_excl, NULL); obj->staged = NULL; }
@@ -76,15 +77,17 @@ reservation_object_fini(struct reservation_object *obj) { int i; struct reservation_object_list *fobj; + struct fence *excl;
/* * This object should be dead and all references must have - * been released to it. + * been released to it, so no need to be protected with rcu. */ - if (obj->fence_excl) - fence_put(obj->fence_excl); + excl = rcu_dereference_protected(obj->fence_excl, 1); + if (excl) + fence_put(excl);
- fobj = obj->fence; + fobj = rcu_dereference_protected(obj->fence, 1); if (fobj) { for (i = 0; i < fobj->shared_count; ++i) fence_put(fobj->shared[i]); @@ -99,9 +102,15 @@ reservation_object_fini(struct reservation_object *obj) static inline struct reservation_object_list * reservation_object_get_list(struct reservation_object *obj) { - reservation_object_assert_held(obj); + return rcu_dereference_protected(obj->fence, + reservation_object_held(obj)); +}
- return obj->fence; +static inline struct fence * +reservation_object_get_excl(struct reservation_object *obj) +{ + return rcu_dereference_protected(obj->fence_excl, + reservation_object_held(obj)); }
int reservation_object_reserve_shared(struct reservation_object *obj); @@ -111,4 +120,11 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence);
+long reservation_object_wait_timeout_rcu(struct reservation_object *obj, + bool wait_all, bool intr, + unsigned long timeout); + +bool reservation_object_test_signaled_rcu(struct reservation_object *obj, + bool test_all); + #endif /* _LINUX_RESERVATION_H */
Hi!
Ugh. This became more complicated than I thought, but I'm OK with moving TTM over to fence while we sort out how / if we're going to use this.
While reviewing, it struck me that this is kind of error-prone, and hard to follow since we're operating on a structure that may be continually updated under us, needing a lot of RCU-specific macros and barriers.
Also the rcu wait appears to not complete until there are no busy fences left (new ones can be added while we wait) rather than waiting on a snapshot of busy fences.
I wonder if these issues can be addressed by having a function that provides a snapshot of all busy fences: This can be accomplished either by including the exclusive fence in the fence_list structure and allocate a new such structure each time it is updated. The RCU reader could then just make a copy of the current fence_list structure pointed to by &obj->fence, but I'm not sure we want to reallocate *each* time we update the fence pointer.
The other approach uses a seqlock to obtain a consistent snapshot, and I've attached an incomplete outline, and I'm not 100% whether it's OK to combine RCU and seqlocks in this way...
Both these approaches have the benefit of hiding the RCU snapshotting in a single function, that can then be used by any waiting or polling function.
/Thomas
On 04/09/2014 04:49 PM, Maarten Lankhorst wrote:
This adds 3 more functions to deal with rcu.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
drivers/base/dma-buf.c | 46 +++++++++++-- drivers/base/reservation.c | 147 +++++++++++++++++++++++++++++++++++++++++-- include/linux/fence.h | 22 ++++++ include/linux/reservation.h | 40 ++++++++---- 4 files changed, 224 insertions(+), 31 deletions(-)
Hey,
op 10-04-14 10:46, Thomas Hellstrom schreef:
Hi!
Ugh. This became more complicated than I thought, but I'm OK with moving TTM over to fence while we sort out how / if we're going to use this.
While reviewing, it struck me that this is kind of error-prone, and hard to follow since we're operating on a structure that may be continually updated under us, needing a lot of RCU-specific macros and barriers.
Yeah, but with the exception of dma_buf_poll I don't think there is anything else outside drivers/base/reservation.c has to deal with rcu.
Also the rcu wait appears to not complete until there are no busy fences left (new ones can be added while we wait) rather than waiting on a snapshot of busy fences.
This has been by design, because 'wait for bo idle' type of functions only care if the bo is completely idle or not.
It would be easy to make a snapshot even without seqlocks, just copy reservation_object_test_signaled_rcu to return a shared list if test_all is set, or return pointer to exclusive otherwise.
I wonder if these issues can be addressed by having a function that provides a snapshot of all busy fences: This can be accomplished either by including the exclusive fence in the fence_list structure and allocate a new such structure each time it is updated. The RCU reader could then just make a copy of the current fence_list structure pointed to by &obj->fence, but I'm not sure we want to reallocate *each* time we update the fence pointer.
No, the most common operation is updating fence pointers, which is why the current design makes that cheap. It's also why doing rcu reads is more expensive.
The other approach uses a seqlock to obtain a consistent snapshot, and I've attached an incomplete outline, and I'm not 100% whether it's OK to combine RCU and seqlocks in this way...
Both these approaches have the benefit of hiding the RCU snapshotting in a single function, that can then be used by any waiting or polling function.
I think the middle way with using seqlocks to protect the fence_excl pointer and shared list combination, and using RCU to protect the refcounts for fences and the availability of the list could work for our usecase and might remove a bunch of memory barriers. But yeah that depends on layering rcu and seqlocks. No idea if that is allowed. But I suppose it is.
Also, you're being overly paranoid with seqlock reading, we would only need something like this:
rcu_read_lock() preempt_disable() seq = read_seqcount_begin(); read fence_excl, shared_count = ACCESS_ONCE(fence->shared_count) copy shared to a struct. if (read_seqcount_retry()) { unlock and retry } preempt_enable(); use fence_get_rcu() to bump refcount on everything, if that fails unlock, put, and retry rcu_read_unlock()
But the shared list would still need to be RCU'd, to make sure we're not reading freed garbage.
~Maarten
On 04/10/2014 12:07 PM, Maarten Lankhorst wrote:
Hey,
op 10-04-14 10:46, Thomas Hellstrom schreef:
Hi!
Ugh. This became more complicated than I thought, but I'm OK with moving TTM over to fence while we sort out how / if we're going to use this.
While reviewing, it struck me that this is kind of error-prone, and hard to follow since we're operating on a structure that may be continually updated under us, needing a lot of RCU-specific macros and barriers.
Yeah, but with the exception of dma_buf_poll I don't think there is anything else outside drivers/base/reservation.c has to deal with rcu.
Also the rcu wait appears to not complete until there are no busy fences left (new ones can be added while we wait) rather than waiting on a snapshot of busy fences.
This has been by design, because 'wait for bo idle' type of functions only care if the bo is completely idle or not.
No, not when using RCU, because the bo may be busy again before the function returns :) Complete idleness can only be guaranteed if holding the reservation, or otherwise making sure that no new rendering is submitted to the buffer, so it's an overkill to wait for complete idleness here.
It would be easy to make a snapshot even without seqlocks, just copy reservation_object_test_signaled_rcu to return a shared list if test_all is set, or return pointer to exclusive otherwise.
I wonder if these issues can be addressed by having a function that provides a snapshot of all busy fences: This can be accomplished either by including the exclusive fence in the fence_list structure and allocate a new such structure each time it is updated. The RCU reader could then just make a copy of the current fence_list structure pointed to by &obj->fence, but I'm not sure we want to reallocate *each* time we update the fence pointer.
No, the most common operation is updating fence pointers, which is why the current design makes that cheap. It's also why doing rcu reads is more expensive.
The other approach uses a seqlock to obtain a consistent snapshot, and I've attached an incomplete outline, and I'm not 100% whether it's OK to combine RCU and seqlocks in this way...
Both these approaches have the benefit of hiding the RCU snapshotting in a single function, that can then be used by any waiting or polling function.
I think the middle way with using seqlocks to protect the fence_excl pointer and shared list combination, and using RCU to protect the refcounts for fences and the availability of the list could work for our usecase and might remove a bunch of memory barriers. But yeah that depends on layering rcu and seqlocks. No idea if that is allowed. But I suppose it is.
Also, you're being overly paranoid with seqlock reading, we would only need something like this:
rcu_read_lock() preempt_disable() seq = read_seqcount_begin() read fence_excl, shared_count = ACCESS_ONCE(fence->shared_count) copy shared to a struct. if (read_seqcount_retry()) { unlock and retry } preempt_enable(); use fence_get_rcu() to bump refcount on everything, if that fails unlock, put, and retry rcu_read_unlock()
But the shared list would still need to be RCU'd, to make sure we're not reading freed garbage.
Ah. OK, But I think we should use rcu inside seqcount, because read_seqcount_begin() may spin for a long time if there are many writers. Also I don't think the preempt_disable() is needed for read_seq critical sections other than they might decrease the risc of retries..
Thanks, Thomas
~Maarten
On 04/10/2014 01:08 PM, Thomas Hellstrom wrote:
On 04/10/2014 12:07 PM, Maarten Lankhorst wrote:
Hey,
op 10-04-14 10:46, Thomas Hellstrom schreef:
Hi!
Ugh. This became more complicated than I thought, but I'm OK with moving TTM over to fence while we sort out how / if we're going to use this.
While reviewing, it struck me that this is kind of error-prone, and hard to follow since we're operating on a structure that may be continually updated under us, needing a lot of RCU-specific macros and barriers.
Yeah, but with the exception of dma_buf_poll I don't think there is anything else outside drivers/base/reservation.c has to deal with rcu.
Also the rcu wait appears to not complete until there are no busy fences left (new ones can be added while we wait) rather than waiting on a snapshot of busy fences.
This has been by design, because 'wait for bo idle' type of functions only care if the bo is completely idle or not.
No, not when using RCU, because the bo may be busy again before the function returns :) Complete idleness can only be guaranteed if holding the reservation, or otherwise making sure that no new rendering is submitted to the buffer, so it's an overkill to wait for complete idleness here.
Although, if we fail to get a refcount for a fence, and it's still busy we need to do a seq retry, because the fence might have been replaced by another fence from the same context, without being idle. That check is not present in the snapshot code I sent.
/Thomas
op 10-04-14 13:08, Thomas Hellstrom schreef:
On 04/10/2014 12:07 PM, Maarten Lankhorst wrote:
Hey,
op 10-04-14 10:46, Thomas Hellstrom schreef:
Hi!
Ugh. This became more complicated than I thought, but I'm OK with moving TTM over to fence while we sort out how / if we're going to use this.
While reviewing, it struck me that this is kind of error-prone, and hard to follow since we're operating on a structure that may be continually updated under us, needing a lot of RCU-specific macros and barriers.
Yeah, but with the exception of dma_buf_poll I don't think there is anything else outside drivers/base/reservation.c has to deal with rcu.
Also the rcu wait appears to not complete until there are no busy fences left (new ones can be added while we wait) rather than waiting on a snapshot of busy fences.
This has been by design, because 'wait for bo idle' type of functions only care if the bo is completely idle or not.
No, not when using RCU, because the bo may be busy again before the function returns :) Complete idleness can only be guaranteed if holding the reservation, or otherwise making sure that no new rendering is submitted to the buffer, so it's an overkill to wait for complete idleness here.
You're probably right, but it makes waiting a lot easier if I don't have to deal with memory allocations. :P
It would be easy to make a snapshot even without seqlocks, just copy reservation_object_test_signaled_rcu to return a shared list if test_all is set, or return pointer to exclusive otherwise.
I wonder if these issues can be addressed by having a function that provides a snapshot of all busy fences: This can be accomplished either by including the exclusive fence in the fence_list structure and allocate a new such structure each time it is updated. The RCU reader could then just make a copy of the current fence_list structure pointed to by &obj->fence, but I'm not sure we want to reallocate *each* time we update the fence pointer.
No, the most common operation is updating fence pointers, which is why the current design makes that cheap. It's also why doing rcu reads is more expensive.
The other approach uses a seqlock to obtain a consistent snapshot, and I've attached an incomplete outline, and I'm not 100% whether it's OK to combine RCU and seqlocks in this way...
Both these approaches have the benefit of hiding the RCU snapshotting in a single function, that can then be used by any waiting or polling function.
I think the middle way with using seqlocks to protect the fence_excl pointer and shared list combination, and using RCU to protect the refcounts for fences and the availability of the list could work for our usecase and might remove a bunch of memory barriers. But yeah that depends on layering rcu and seqlocks. No idea if that is allowed. But I suppose it is.
Also, you're being overly paranoid with seqlock reading, we would only need something like this:
rcu_read_lock() preempt_disable() seq = read_seqcount_begin() read fence_excl, shared_count = ACCESS_ONCE(fence->shared_count) copy shared to a struct. if (read_seqcount_retry()) { unlock and retry } preempt_enable(); use fence_get_rcu() to bump refcount on everything, if that fails unlock, put, and retry rcu_read_unlock()
But the shared list would still need to be RCU'd, to make sure we're not reading freed garbage.
Ah. OK, But I think we should use rcu inside seqcount, because read_seqcount_begin() may spin for a long time if there are many writers. Also I don't think the preempt_disable() is needed for read_seq critical sections other than they might decrease the risc of retries..
Reading the seqlock code makes me suspect that's the case too. The lockdep code calls local_irq_disable, so it's probably safe without preemption disabled.
~Maarten
I like the ability of not allocating memory, so I kept reservation_object_wait_timeout_rcu mostly the way it was. This code appears to fail on nouveau when using the shared members, but I'm not completely sure whether the error is in nouveau or this code yet.
--8<-------- [RFC v2] reservation: add suppport for read-only access using rcu
This adds 4 more functions to deal with rcu.
reservation_object_get_fences_rcu() will obtain the list of shared and exclusive fences without obtaining the ww_mutex.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..ca6ef0c4b358 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -137,7 +137,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) struct reservation_object_list *fobj; struct fence *fence_excl; unsigned long events; - unsigned shared_count; + unsigned shared_count, seq;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -151,14 +151,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
- ww_mutex_lock(&resv->lock, NULL); +retry: + seq = read_seqcount_begin(&resv->seq); + rcu_read_lock();
- fobj = resv->fence; - if (!fobj) - goto out; - - shared_count = fobj->shared_count; - fence_excl = resv->fence_excl; + fobj = ACCESS_ONCE(resv->fence); + if (fobj) + shared_count = ACCESS_ONCE(fobj->shared_count); + else + shared_count = 0; + fence_excl = ACCESS_ONCE(resv->fence_excl); + if (read_seqcount_retry(&resv->seq, seq)) { + rcu_read_unlock(); + goto retry; + }
if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; @@ -176,14 +182,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock);
if (events & pevents) { - if (!fence_add_callback(fence_excl, &dcb->cb, + if (!fence_get_rcu(fence_excl)) { + /* force a recheck */ + events &= ~pevents; + dma_buf_poll_cb(NULL, &dcb->cb); + } else if (!fence_add_callback(fence_excl, &dcb->cb, dma_buf_poll_cb)) { events &= ~pevents; + fence_put(fence_excl); } else { /* * No callback queued, wake up any additional * waiters. */ + fence_put(fence_excl); dma_buf_poll_cb(NULL, &dcb->cb); } } @@ -205,13 +217,25 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) goto out;
for (i = 0; i < shared_count; ++i) { - struct fence *fence = fobj->shared[i]; - + struct fence *fence = fence_get_rcu(fobj->shared[i]); + if (!fence) { + /* + * fence refcount dropped to zero, this means + * that fobj has been freed + * + * call dma_buf_poll_cb and force a recheck! + */ + events &= ~POLLOUT; + dma_buf_poll_cb(NULL, &dcb->cb); + break; + } if (!fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb)) { + fence_put(fence); events &= ~POLLOUT; break; } + fence_put(fence); }
/* No callback queued, wake up any additional waiters. */ @@ -220,7 +244,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) }
out: - ww_mutex_unlock(&resv->lock); + rcu_read_unlock(); return events; }
diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c index b82a5b630a8e..3417382643df 100644 --- a/drivers/base/reservation.c +++ b/drivers/base/reservation.c @@ -38,6 +38,9 @@ DEFINE_WW_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class); + /* * Reserve space to add a shared fence to a reservation_object, * must be called with obj->lock held. @@ -82,27 +85,28 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, { u32 i;
+ fence_get(fence); + + preempt_disable(); + write_seqcount_begin(&obj->seq); + for (i = 0; i < fobj->shared_count; ++i) { if (fobj->shared[i]->context == fence->context) { struct fence *old_fence = fobj->shared[i];
- fence_get(fence); - fobj->shared[i] = fence; + write_seqcount_end(&obj->seq); + preempt_enable();
fence_put(old_fence); return; } }
- fence_get(fence); - fobj->shared[fobj->shared_count] = fence; - /* - * make the new fence visible before incrementing - * fobj->shared_count - */ - smp_wmb(); - fobj->shared_count++; + fobj->shared[fobj->shared_count++] = fence; + + write_seqcount_end(&obj->seq); + preempt_enable(); }
static void @@ -112,6 +116,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct fence *fence) { unsigned i; + struct fence *old_fence = NULL;
fence_get(fence);
@@ -130,19 +135,27 @@ reservation_object_add_shared_replace(struct reservation_object *obj, fobj->shared_count = old->shared_count;
for (i = 0; i < old->shared_count; ++i) { - if (fence && old->shared[i]->context == fence->context) { - fence_put(old->shared[i]); + if (!old_fence && old->shared[i]->context == fence->context) { + old_fence = old->shared[i]; fobj->shared[i] = fence; - fence = NULL; } else fobj->shared[i] = old->shared[i]; } - if (fence) + if (!old_fence) fobj->shared[fobj->shared_count++] = fence;
done: + preempt_disable(); + write_seqcount_begin(&obj->seq); obj->fence = fobj; - kfree(old); + write_seqcount_end(&obj->seq); + preempt_enable(); + + if (old) + kfree_rcu(old, rcu); + + if (old_fence) + fence_put(old_fence); }
/* @@ -168,20 +181,24 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence); void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence) { - struct fence *old_fence = obj->fence_excl; + struct fence *old_fence = reservation_object_get_excl(obj); struct reservation_object_list *old; u32 i = 0;
old = reservation_object_get_list(obj); - if (old) { + if (old) i = old->shared_count; - old->shared_count = 0; - }
if (fence) fence_get(fence);
+ preempt_disable(); + write_seqcount_begin(&obj->seq); obj->fence_excl = fence; + if (old) + old->shared_count = 0; + write_seqcount_end(&obj->seq); + preempt_enable();
/* inplace update, no shared fences */ while (i--) @@ -191,3 +208,236 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, fence_put(old_fence); } EXPORT_SYMBOL(reservation_object_add_excl_fence); + +int reservation_object_get_fences_rcu(struct reservation_object *obj, + struct fence **pfence_excl, + unsigned *pshared_count, + struct fence ***pshared) +{ + unsigned shared_count = 0; + unsigned retry = 1; + struct fence **shared = NULL, *fence_excl = NULL; + int ret = 0; + + while (retry) { + struct reservation_object_list *fobj; + unsigned seq, retry; + + seq = read_seqcount_begin(&obj->seq); + + rcu_read_lock(); + + fobj = ACCESS_ONCE(obj->fence); + if (fobj) { + struct fence **nshared; + + shared_count = ACCESS_ONCE(fobj->shared_count); + nshared = krealloc(shared, sizeof(*shared) * shared_count, GFP_KERNEL); + if (!nshared) { + ret = -ENOMEM; + shared_count = retry = 0; + goto unlock; + } + shared = nshared; + memcpy(shared, fobj->shared, sizeof(*shared) * shared_count); + } else + shared_count = 0; + fence_excl = obj->fence_excl; + + retry = read_seqcount_retry(&obj->seq, seq); + if (retry) + goto unlock; + + if (!fence_excl || fence_get_rcu(fence_excl)) { + unsigned i; + + for (i = 0; i < shared_count; ++i) { + if (fence_get_rcu(shared[i])) + continue; + + /* uh oh, refcount failed, abort and retry */ + while (i--) + fence_put(shared[i]); + + if (fence_excl) { + fence_put(fence_excl); + fence_excl = NULL; + } + + retry = 1; + break; + } + } else + retry = 1; + +unlock: + rcu_read_unlock(); + } + *pshared_count = shared_count; + if (shared_count) + *pshared = shared; + else { + *pshared = NULL; + kfree(shared); + } + *pfence_excl = fence_excl; + + return ret; +} +EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu); + +long reservation_object_wait_timeout_rcu(struct reservation_object *obj, + bool wait_all, bool intr, + unsigned long timeout) +{ + struct fence *fence; + unsigned seq, shared_count, i = 0; + long ret = timeout; + +retry: + fence = NULL; + shared_count = 0; + seq = read_seqcount_begin(&obj->seq); + rcu_read_lock(); + + if (wait_all) { + struct reservation_object_list *fobj = ACCESS_ONCE(obj->fence); + + if (fobj) + shared_count = ACCESS_ONCE(fobj->shared_count); + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + for (i = 0; i < shared_count; ++i) { + struct fence *lfence = ACCESS_ONCE(fobj->shared[i]); + + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) + continue; + + if (!fence_get_rcu(lfence)) + goto unlock_retry; + + if (fence_is_signaled(lfence)) { + fence_put(lfence); + continue; + } + + fence = lfence; + break; + } + } + + if (!shared_count) { + struct fence *fence_excl = ACCESS_ONCE(obj->fence_excl); + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + if (fence_excl && + !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) { + if (!fence_get_rcu(fence_excl)) + goto unlock_retry; + + if (fence_is_signaled(fence_excl)) + fence_put(fence_excl); + else + fence = fence_excl; + } + } + + rcu_read_unlock(); + if (fence) { + ret = fence_wait_timeout(fence, intr, ret); + fence_put(fence); + if (ret > 0 && wait_all && (i + 1 < shared_count)) + goto retry; + } + return ret; + +unlock_retry: + rcu_read_unlock(); + goto retry; +} +EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu); + + +static inline int +reservation_object_test_signaled_single(struct fence *passed_fence) +{ + struct fence *fence, *lfence = ACCESS_ONCE(passed_fence); + int ret = 1; + + if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { + int ret; + + fence = fence_get_rcu(lfence); + if (!fence) + return -1; + + ret = !!fence_is_signaled(fence); + fence_put(fence); + } + return ret; +} + +bool reservation_object_test_signaled_rcu(struct reservation_object *obj, + bool test_all) +{ + struct fence *fence_excl; + struct reservation_object_list *fobj; + unsigned seq, shared_count; + int ret = true; + +retry: + shared_count = 0; + seq = read_seqcount_begin(&obj->seq); + rcu_read_lock(); + + if (test_all) { + unsigned i; + + fobj = ACCESS_ONCE(obj->fence); + + if (fobj) + shared_count = ACCESS_ONCE(fobj->shared_count); + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + for (i = 0; i < shared_count; ++i) { + ret = reservation_object_test_signaled_single(fobj->shared[i]); + if (ret < 0) + goto unlock_retry; + else if (!ret) + break; + } + + /* + * There could be a read_seqcount_retry here, but nothing cares + * about whether it's the old or newer fence pointers that are + * signale. That race could still have happened after checking + * read_seqcount_retry. If you care, use ww_mutex_lock. + */ + } + + if (!shared_count) { + fence_excl = ACCESS_ONCE(obj->fence_excl); + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + if (fence_excl) { + ret = reservation_object_test_signaled_single(fence_excl); + if (ret < 0) + goto unlock_retry; + } + } + + rcu_read_unlock(); + return ret; + +unlock_retry: + rcu_read_unlock(); + goto retry; +} +EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu); diff --git a/include/linux/fence.h b/include/linux/fence.h index d13b5ab61726..8499ace045da 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -31,7 +31,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/printk.h> -#include <linux/slab.h> +#include <linux/rcupdate.h>
struct fence; struct fence_ops; @@ -41,6 +41,7 @@ struct fence_cb; * struct fence - software synchronization primitive * @refcount: refcount for this fence * @ops: fence_ops associated with this fence + * @rcu: used for releasing fence with kfree_rcu * @cb_list: list of all callbacks to call * @lock: spin_lock_irqsave used for locking * @context: execution context this fence belongs to, returned by @@ -74,6 +75,7 @@ struct fence_cb; struct fence { struct kref refcount; const struct fence_ops *ops; + struct rcu_head rcu; struct list_head cb_list; spinlock_t *lock; unsigned context, seqno; @@ -190,11 +192,27 @@ static inline void fence_get(struct fence *fence) kref_get(&fence->refcount); }
+/** + * fence_get_rcu - get a fence from a reservation_object_list with rcu read lock + * @fence: [in] fence to increase refcount of + * + * Function returns NULL if no refcount could be obtained, or the fence. + */ +static inline struct fence *fence_get_rcu(struct fence *fence) +{ + struct fence *f = ACCESS_ONCE(fence); + + if (kref_get_unless_zero(&f->refcount)) + return f; + else + return NULL; +} + extern void release_fence(struct kref *kref);
static inline void free_fence(struct fence *fence) { - kfree(fence); + kfree_rcu(fence, rcu); }
/** diff --git a/include/linux/reservation.h b/include/linux/reservation.h index b602365c87f9..d6e1f620d1d0 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -42,16 +42,21 @@ #include <linux/ww_mutex.h> #include <linux/fence.h> #include <linux/slab.h> +#include <linux/seqlock.h> +#include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class;
struct reservation_object_list { + struct rcu_head rcu; u32 shared_count, shared_max; struct fence *shared[]; };
struct reservation_object { struct ww_mutex lock; + seqcount_t seq;
struct fence *fence_excl; struct reservation_object_list *fence; @@ -66,8 +71,9 @@ reservation_object_init(struct reservation_object *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
- obj->fence_excl = NULL; + __seqcount_init(&obj->seq, "reservation_seqcount", &reservation_seqcount_class); obj->fence = NULL; + obj->fence_excl = NULL; obj->staged = NULL; }
@@ -76,13 +82,15 @@ reservation_object_fini(struct reservation_object *obj) { int i; struct reservation_object_list *fobj; + struct fence *excl;
/* * This object should be dead and all references must have - * been released to it. + * been released to it, so no need to be protected with rcu. */ - if (obj->fence_excl) - fence_put(obj->fence_excl); + excl = obj->fence_excl; + if (excl) + fence_put(excl);
fobj = obj->fence; if (fobj) { @@ -104,6 +112,14 @@ reservation_object_get_list(struct reservation_object *obj) return obj->fence; }
+static inline struct fence * +reservation_object_get_excl(struct reservation_object *obj) +{ + reservation_object_assert_held(obj); + + return obj->fence_excl; +} + int reservation_object_reserve_shared(struct reservation_object *obj); void reservation_object_add_shared_fence(struct reservation_object *obj, struct fence *fence); @@ -111,4 +127,16 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence);
+int reservation_object_get_fences_rcu(struct reservation_object *obj, + struct fence **pfence_excl, + unsigned *pshared_count, + struct fence ***pshared); + +long reservation_object_wait_timeout_rcu(struct reservation_object *obj, + bool wait_all, bool intr, + unsigned long timeout); + +bool reservation_object_test_signaled_rcu(struct reservation_object *obj, + bool test_all); + #endif /* _LINUX_RESERVATION_H */
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
On 04/10/2014 05:00 PM, Maarten Lankhorst wrote:
op 10-04-14 13:08, Thomas Hellstrom schreef:
On 04/10/2014 12:07 PM, Maarten Lankhorst wrote:
Hey,
op 10-04-14 10:46, Thomas Hellstrom schreef:
Hi!
Ugh. This became more complicated than I thought, but I'm OK with moving TTM over to fence while we sort out how / if we're going to use this.
While reviewing, it struck me that this is kind of error-prone, and hard to follow since we're operating on a structure that may be continually updated under us, needing a lot of RCU-specific macros and barriers.
Yeah, but with the exception of dma_buf_poll I don't think there is anything else outside drivers/base/reservation.c has to deal with rcu.
Also the rcu wait appears to not complete until there are no busy fences left (new ones can be added while we wait) rather than waiting on a snapshot of busy fences.
This has been by design, because 'wait for bo idle' type of functions only care if the bo is completely idle or not.
No, not when using RCU, because the bo may be busy again before the function returns :) Complete idleness can only be guaranteed if holding the reservation, or otherwise making sure that no new rendering is submitted to the buffer, so it's an overkill to wait for complete idleness here.
You're probably right, but it makes waiting a lot easier if I don't have to deal with memory allocations. :P
It would be easy to make a snapshot even without seqlocks, just copy reservation_object_test_signaled_rcu to return a shared list if test_all is set, or return pointer to exclusive otherwise.
I wonder if these issues can be addressed by having a function that provides a snapshot of all busy fences: This can be accomplished either by including the exclusive fence in the fence_list structure and allocate a new such structure each time it is updated. The RCU reader could then just make a copy of the current fence_list structure pointed to by &obj->fence, but I'm not sure we want to reallocate *each* time we update the fence pointer.
No, the most common operation is updating fence pointers, which is why the current design makes that cheap. It's also why doing rcu reads is more expensive.
The other approach uses a seqlock to obtain a consistent snapshot, and I've attached an incomplete outline, and I'm not 100% whether it's OK to combine RCU and seqlocks in this way...
Both these approaches have the benefit of hiding the RCU snapshotting in a single function, that can then be used by any waiting or polling function.
I think the middle way with using seqlocks to protect the fence_excl pointer and shared list combination, and using RCU to protect the refcounts for fences and the availability of the list could work for our usecase and might remove a bunch of memory barriers. But yeah that depends on layering rcu and seqlocks. No idea if that is allowed. But I suppose it is.
Also, you're being overly paranoid with seqlock reading, we would only need something like this:
rcu_read_lock() preempt_disable() seq = read_seqcount_begin() read fence_excl, shared_count = ACCESS_ONCE(fence->shared_count) copy shared to a struct. if (read_seqcount_retry()) { unlock and retry } preempt_enable(); use fence_get_rcu() to bump refcount on everything, if that fails unlock, put, and retry rcu_read_unlock()
But the shared list would still need to be RCU'd, to make sure we're not reading freed garbage.
Ah. OK, But I think we should use rcu inside seqcount, because read_seqcount_begin() may spin for a long time if there are many writers. Also I don't think the preempt_disable() is needed for read_seq critical sections other than they might decrease the risc of retries..
Reading the seqlock code makes me suspect that's the case too. The lockdep code calls local_irq_disable, so it's probably safe without preemption disabled.
~Maarten
I like the ability of not allocating memory, so I kept reservation_object_wait_timeout_rcu mostly the way it was. This code appears to fail on nouveau when using the shared members, but I'm not completely sure whether the error is in nouveau or this code yet.
--8<-------- [RFC v2] reservation: add suppport for read-only access using rcu
This adds 4 more functions to deal with rcu.
reservation_object_get_fences_rcu() will obtain the list of shared and exclusive fences without obtaining the ww_mutex.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..ca6ef0c4b358 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq, retry;
You're shadowing retry?
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = ACCESS_ONCE(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
krealloc inside rcu_read_lock(). Better to put this first in the loop.
if (!nshared) {
ret = -ENOMEM;
shared_count = retry = 0;
goto unlock;
}
shared = nshared;
memcpy(shared, fobj->shared, sizeof(*shared) *
shared_count);
} else
shared_count = 0;
fence_excl = obj->fence_excl;
retry = read_seqcount_retry(&obj->seq, seq);
if (retry)
goto unlock;
if (!fence_excl || fence_get_rcu(fence_excl)) {
unsigned i;
for (i = 0; i < shared_count; ++i) {
if (fence_get_rcu(shared[i]))
continue;
/* uh oh, refcount failed, abort and retry */
while (i--)
fence_put(shared[i]);
if (fence_excl) {
fence_put(fence_excl);
fence_excl = NULL;
}
retry = 1;
break;
}
} else
retry = 1;
+unlock:
rcu_read_unlock();
- }
- *pshared_count = shared_count;
- if (shared_count)
*pshared = shared;
- else {
*pshared = NULL;
kfree(shared);
- }
- *pfence_excl = fence_excl;
- return ret;
+} +EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
Thanks, Thomas
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..ca6ef0c4b358 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq, retry;
You're shadowing retry?
Oops.
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = ACCESS_ONCE(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
krealloc inside rcu_read_lock(). Better to put this first in the loop.
Except that shared_count isn't known until the rcu_read_lock is taken.
Thanks, Thomas
~Maarten
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
Thanks, /Thomas
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..ca6ef0c4b358 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq, retry;
You're shadowing retry?
Oops.
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = ACCESS_ONCE(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
krealloc inside rcu_read_lock(). Better to put this first in the loop.
Except that shared_count isn't known until the rcu_read_lock is taken.
Thanks, Thomas
~Maarten
op 11-04-14 12:11, Thomas Hellstrom schreef:
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
No, because shared_count is protected with seqcount. I only allow appending to the array, so when shared_count is validated by seqcount it means that the [0...shared_count) indexes are valid and non-null. What could happen though is that the fence at a specific index is updated with another one from the same context, but that's harmless.
~Maarten
Hi!
On 04/11/2014 08:09 PM, Maarten Lankhorst wrote:
op 11-04-14 12:11, Thomas Hellstrom schreef:
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
No, because shared_count is protected with seqcount. I only allow appending to the array, so when shared_count is validated by seqcount it means that the [0...shared_count) indexes are valid and non-null. What could happen though is that the fence at a specific index is updated with another one from the same context, but that's harmless.
Hmm. Shouldn't we have a way to clean signaled fences from reservation objects? Perhaps when we attach a new fence, or after a wait with ww_mutex held? Otherwise we'd have a lot of completely unused fence objects hanging around for no reason. I don't think we need to be as picky as TTM, but I think we should do something?
/Thomas
~Maarten
op 11-04-14 21:30, Thomas Hellstrom schreef:
Hi!
On 04/11/2014 08:09 PM, Maarten Lankhorst wrote:
op 11-04-14 12:11, Thomas Hellstrom schreef:
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
No, because shared_count is protected with seqcount. I only allow appending to the array, so when shared_count is validated by seqcount it means that the [0...shared_count) indexes are valid and non-null. What could happen though is that the fence at a specific index is updated with another one from the same context, but that's harmless.
Hmm. Shouldn't we have a way to clean signaled fences from reservation objects? Perhaps when we attach a new fence, or after a wait with ww_mutex held? Otherwise we'd have a lot of completely unused fence objects hanging around for no reason. I don't think we need to be as picky as TTM, but I think we should do something?
Calling reservation_object_add_excl_fence with a NULL fence works, I do this in ttm_bo_wait(). It requires ww_mutex.
~Maarten
On 04/11/2014 08:09 PM, Maarten Lankhorst wrote:
op 11-04-14 12:11, Thomas Hellstrom schreef:
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
No, because shared_count is protected with seqcount. I only allow appending to the array, so when shared_count is validated by seqcount it means that the [0...shared_count) indexes are valid and non-null. What could happen though is that the fence at a specific index is updated with another one from the same context, but that's harmless.
Hmm, doesn't attaching an exclusive fence clear all shared fence pointers from under a reader?
/Thomas
~Maarten _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
op 11-04-14 21:35, Thomas Hellstrom schreef:
On 04/11/2014 08:09 PM, Maarten Lankhorst wrote:
op 11-04-14 12:11, Thomas Hellstrom schreef:
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
No, because shared_count is protected with seqcount. I only allow appending to the array, so when shared_count is validated by seqcount it means that the [0...shared_count) indexes are valid and non-null. What could happen though is that the fence at a specific index is updated with another one from the same context, but that's harmless.
Hmm, doesn't attaching an exclusive fence clear all shared fence pointers from under a reader?
No, for that reason. It only resets shared_count to 0. This is harmless because the shared fence pointers are still valid long enough because of RCU delayed deletion. fence_get_rcu will fail when the refcount has dropped to zero. This is enough of a check to prevent errors, so there's no need to explicitly clear the fence pointers.
~Maarten
On 04/14/2014 09:42 AM, Maarten Lankhorst wrote:
op 11-04-14 21:35, Thomas Hellstrom schreef:
On 04/11/2014 08:09 PM, Maarten Lankhorst wrote:
op 11-04-14 12:11, Thomas Hellstrom schreef:
On 04/11/2014 11:24 AM, Maarten Lankhorst wrote:
op 11-04-14 10:38, Thomas Hellstrom schreef:
Hi, Maarten.
Here I believe we encounter a lot of locking inconsistencies.
First, it seems you're use a number of pointers as RCU pointers without annotating them as such and use the correct rcu macros when assigning those pointers.
Some pointers (like the pointers in the shared fence list) are both used as RCU pointers (in dma_buf_poll()) for example, or considered protected by the seqlock (reservation_object_get_fences_rcu()), which I believe is OK, but then the pointers must be assigned using the correct rcu macros. In the memcpy in reservation_object_get_fences_rcu() we might get away with an ugly typecast, but with a verbose comment that the pointers are considered protected by the seqlock at that location.
So I've updated (attached) the headers with proper __rcu annotation and locking comments according to how they are being used in the various reading functions. I believe if we want to get rid of this we need to validate those pointers using the seqlock as well. This will generate a lot of sparse warnings in those places needing rcu_dereference() rcu_assign_pointer() rcu_dereference_protected()
With this I think we can get rid of all ACCESS_ONCE macros: It's not needed when the rcu_x() macros are used, and it's never needed for the members protected by the seqlock, (provided that the seq is tested). The only place where I think that's *not* the case is at the krealloc in reservation_object_get_fences_rcu().
Also I have some more comments in the reservation_object_get_fences_rcu() function below:
I felt that the barriers needed for rcu were already provided by checking the seqcount lock. But looking at rcu_dereference makes it seem harmless to add it in more places, it handles the ACCESS_ONCE and barrier() for us.
And it makes the code more maintainable, and helps sparse doing a lot of checking for us. I guess we can tolerate a couple of extra barriers for that.
We could probably get away with using RCU_INIT_POINTER on the writer side, because the smp_wmb is already done by arranging seqcount updates correctly.
Hmm. yes, probably. At least in the replace function. I think if we do it in other places, we should add comments as to where the smp_wmb() is located, for future reference.
Also I saw in a couple of places where you're checking the shared pointers, you're not checking for NULL pointers, which I guess may happen if shared_count and pointers are not in full sync?
No, because shared_count is protected with seqcount. I only allow appending to the array, so when shared_count is validated by seqcount it means that the [0...shared_count) indexes are valid and non-null. What could happen though is that the fence at a specific index is updated with another one from the same context, but that's harmless.
Hmm, doesn't attaching an exclusive fence clear all shared fence pointers from under a reader?
No, for that reason. It only resets shared_count to 0.
Ah. OK. I guess I didn't read the code carefully enough.
Thanks, Thomas
~Maarten
This adds 4 more functions to deal with rcu.
reservation_object_get_fences_rcu() will obtain the list of shared and exclusive fences without obtaining the ww_mutex.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- Using seqcount and fixing some lockdep bugs. Changes since v2: - Fix some crashes, remove some unneeded barriers when provided by seqcount writes - Fix code to work correctly with sparse's RCU annotations. - Create a global string for the seqcount lock to make lockdep happy.
Can I get this version reviewed? If it looks correct I'll mail the full series because it's intertwined with the TTM conversion to use this code.
See http://cgit.freedesktop.org/~mlankhorst/linux/log/?h=vmwgfx_wip --- diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..0df673f812eb 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -137,7 +137,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) struct reservation_object_list *fobj; struct fence *fence_excl; unsigned long events; - unsigned shared_count; + unsigned shared_count, seq;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -151,14 +151,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
- ww_mutex_lock(&resv->lock, NULL); +retry: + seq = read_seqcount_begin(&resv->seq); + rcu_read_lock();
- fobj = resv->fence; - if (!fobj) - goto out; - - shared_count = fobj->shared_count; - fence_excl = resv->fence_excl; + fobj = rcu_dereference(resv->fence); + if (fobj) + shared_count = fobj->shared_count; + else + shared_count = 0; + fence_excl = rcu_dereference(resv->fence_excl); + if (read_seqcount_retry(&resv->seq, seq)) { + rcu_read_unlock(); + goto retry; + }
if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; @@ -176,14 +182,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock);
if (events & pevents) { - if (!fence_add_callback(fence_excl, &dcb->cb, + if (!fence_get_rcu(fence_excl)) { + /* force a recheck */ + events &= ~pevents; + dma_buf_poll_cb(NULL, &dcb->cb); + } else if (!fence_add_callback(fence_excl, &dcb->cb, dma_buf_poll_cb)) { events &= ~pevents; + fence_put(fence_excl); } else { /* * No callback queued, wake up any additional * waiters. */ + fence_put(fence_excl); dma_buf_poll_cb(NULL, &dcb->cb); } } @@ -205,13 +217,26 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) goto out;
for (i = 0; i < shared_count; ++i) { - struct fence *fence = fobj->shared[i]; + struct fence *fence = rcu_dereference(fobj->shared[i]);
+ if (!fence_get_rcu(fence)) { + /* + * fence refcount dropped to zero, this means + * that fobj has been freed + * + * call dma_buf_poll_cb and force a recheck! + */ + events &= ~POLLOUT; + dma_buf_poll_cb(NULL, &dcb->cb); + break; + } if (!fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb)) { + fence_put(fence); events &= ~POLLOUT; break; } + fence_put(fence); }
/* No callback queued, wake up any additional waiters. */ @@ -220,7 +245,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) }
out: - ww_mutex_unlock(&resv->lock); + rcu_read_unlock(); return events; }
diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c index b82a5b630a8e..901bbf19c868 100644 --- a/drivers/base/reservation.c +++ b/drivers/base/reservation.c @@ -38,6 +38,11 @@ DEFINE_WW_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class); + +const char reservation_seqcount_string[] = "reservation_seqcount"; +EXPORT_SYMBOL(reservation_seqcount_string); /* * Reserve space to add a shared fence to a reservation_object, * must be called with obj->lock held. @@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct reservation_object *obj) kfree(obj->staged); obj->staged = NULL; return 0; - } - max = old->shared_max * 2; + } else + max = old->shared_max * 2; } else max = 4;
@@ -82,27 +87,37 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, { u32 i;
+ fence_get(fence); + + preempt_disable(); + write_seqcount_begin(&obj->seq); + for (i = 0; i < fobj->shared_count; ++i) { - if (fobj->shared[i]->context == fence->context) { - struct fence *old_fence = fobj->shared[i]; + struct fence *old_fence;
- fence_get(fence); + old_fence = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj));
- fobj->shared[i] = fence; + if (old_fence->context == fence->context) { + /* memory barrier is added by write_seqcount_begin */ + RCU_INIT_POINTER(fobj->shared[i], fence); + write_seqcount_end(&obj->seq); + preempt_enable();
fence_put(old_fence); return; } }
- fence_get(fence); - fobj->shared[fobj->shared_count] = fence; /* - * make the new fence visible before incrementing - * fobj->shared_count + * memory barrier is added by write_seqcount_begin, + * fobj->shared_count is protected by this lock too */ - smp_wmb(); + RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); fobj->shared_count++; + + write_seqcount_end(&obj->seq); + preempt_enable(); }
static void @@ -112,11 +127,12 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct fence *fence) { unsigned i; + struct fence *old_fence = NULL;
fence_get(fence);
if (!old) { - fobj->shared[0] = fence; + RCU_INIT_POINTER(fobj->shared[0], fence); fobj->shared_count = 1; goto done; } @@ -130,19 +146,38 @@ reservation_object_add_shared_replace(struct reservation_object *obj, fobj->shared_count = old->shared_count;
for (i = 0; i < old->shared_count; ++i) { - if (fence && old->shared[i]->context == fence->context) { - fence_put(old->shared[i]); - fobj->shared[i] = fence; - fence = NULL; + struct fence *check; + + check = rcu_dereference_protected(old->shared[i], + reservation_object_held(obj)); + + if (!old_fence && check->context == fence->context) { + old_fence = check; + RCU_INIT_POINTER(fobj->shared[i], fence); } else - fobj->shared[i] = old->shared[i]; + RCU_INIT_POINTER(fobj->shared[i], check); + } + if (!old_fence) { + RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); + fobj->shared_count++; } - if (fence) - fobj->shared[fobj->shared_count++] = fence;
done: - obj->fence = fobj; - kfree(old); + preempt_disable(); + write_seqcount_begin(&obj->seq); + /* + * RCU_INIT_POINTER can be used here, + * seqcount provides the necessary barriers + */ + RCU_INIT_POINTER(obj->fence, fobj); + write_seqcount_end(&obj->seq); + preempt_enable(); + + if (old) + kfree_rcu(old, rcu); + + if (old_fence) + fence_put(old_fence); }
/* @@ -158,7 +193,7 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, obj->staged = NULL;
if (!fobj) { - BUG_ON(old->shared_count == old->shared_max); + BUG_ON(old->shared_count >= old->shared_max); reservation_object_add_shared_inplace(obj, old, fence); } else reservation_object_add_shared_replace(obj, old, fobj, fence); @@ -168,26 +203,266 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence); void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence) { - struct fence *old_fence = obj->fence_excl; + struct fence *old_fence = reservation_object_get_excl(obj); struct reservation_object_list *old; u32 i = 0;
old = reservation_object_get_list(obj); - if (old) { + if (old) i = old->shared_count; - old->shared_count = 0; - }
if (fence) fence_get(fence);
- obj->fence_excl = fence; + preempt_disable(); + write_seqcount_begin(&obj->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(obj->fence_excl, fence); + if (old) + old->shared_count = 0; + write_seqcount_end(&obj->seq); + preempt_enable();
/* inplace update, no shared fences */ while (i--) - fence_put(old->shared[i]); + fence_put(rcu_dereference_protected(old->shared[i], + reservation_object_held(obj)));
if (old_fence) fence_put(old_fence); } EXPORT_SYMBOL(reservation_object_add_excl_fence); + +int reservation_object_get_fences_rcu(struct reservation_object *obj, + struct fence **pfence_excl, + unsigned *pshared_count, + struct fence ***pshared) +{ + unsigned shared_count = 0; + unsigned retry = 1; + struct fence **shared = NULL, *fence_excl = NULL; + int ret = 0; + + while (retry) { + struct reservation_object_list *fobj; + unsigned seq; + + seq = read_seqcount_begin(&obj->seq); + + rcu_read_lock(); + + fobj = rcu_dereference(obj->fence); + if (fobj) { + struct fence **nshared; + + shared_count = ACCESS_ONCE(fobj->shared_count); + nshared = krealloc(shared, sizeof(*shared) * shared_count, GFP_KERNEL); + if (!nshared) { + ret = -ENOMEM; + shared_count = retry = 0; + goto unlock; + } + shared = nshared; + memcpy(shared, fobj->shared, sizeof(*shared) * shared_count); + } else + shared_count = 0; + fence_excl = rcu_dereference(obj->fence_excl); + + retry = read_seqcount_retry(&obj->seq, seq); + if (retry) + goto unlock; + + if (!fence_excl || fence_get_rcu(fence_excl)) { + unsigned i; + + for (i = 0; i < shared_count; ++i) { + if (fence_get_rcu(shared[i])) + continue; + + /* uh oh, refcount failed, abort and retry */ + while (i--) + fence_put(shared[i]); + + if (fence_excl) { + fence_put(fence_excl); + fence_excl = NULL; + } + + retry = 1; + break; + } + } else + retry = 1; + +unlock: + rcu_read_unlock(); + } + *pshared_count = shared_count; + if (shared_count) + *pshared = shared; + else { + *pshared = NULL; + kfree(shared); + } + *pfence_excl = fence_excl; + + return ret; +} +EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu); + +long reservation_object_wait_timeout_rcu(struct reservation_object *obj, + bool wait_all, bool intr, + unsigned long timeout) +{ + struct fence *fence; + unsigned seq, shared_count, i = 0; + long ret = timeout; + +retry: + fence = NULL; + shared_count = 0; + seq = read_seqcount_begin(&obj->seq); + rcu_read_lock(); + + if (wait_all) { + struct reservation_object_list *fobj = rcu_dereference(obj->fence); + + if (fobj) + shared_count = fobj->shared_count; + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + for (i = 0; i < shared_count; ++i) { + struct fence *lfence = rcu_dereference(fobj->shared[i]); + + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) + continue; + + if (!fence_get_rcu(lfence)) + goto unlock_retry; + + if (fence_is_signaled(lfence)) { + fence_put(lfence); + continue; + } + + fence = lfence; + break; + } + } + + if (!shared_count) { + struct fence *fence_excl = rcu_dereference(obj->fence_excl); + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + if (fence_excl && + !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) { + if (!fence_get_rcu(fence_excl)) + goto unlock_retry; + + if (fence_is_signaled(fence_excl)) + fence_put(fence_excl); + else + fence = fence_excl; + } + } + + rcu_read_unlock(); + if (fence) { + ret = fence_wait_timeout(fence, intr, ret); + fence_put(fence); + if (ret > 0 && wait_all && (i + 1 < shared_count)) + goto retry; + } + return ret; + +unlock_retry: + rcu_read_unlock(); + goto retry; +} +EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu); + + +static inline int +reservation_object_test_signaled_single(struct fence *passed_fence) +{ + struct fence *fence, *lfence = passed_fence; + int ret = 1; + + if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { + int ret; + + fence = fence_get_rcu(lfence); + if (!fence) + return -1; + + ret = !!fence_is_signaled(fence); + fence_put(fence); + } + return ret; +} + +bool reservation_object_test_signaled_rcu(struct reservation_object *obj, + bool test_all) +{ + unsigned seq, shared_count; + int ret = true; + +retry: + shared_count = 0; + seq = read_seqcount_begin(&obj->seq); + rcu_read_lock(); + + if (test_all) { + unsigned i; + + struct reservation_object_list *fobj = rcu_dereference(obj->fence); + + if (fobj) + shared_count = fobj->shared_count; + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + for (i = 0; i < shared_count; ++i) { + struct fence *fence = rcu_dereference(fobj->shared[i]); + + ret = reservation_object_test_signaled_single(fence); + if (ret < 0) + goto unlock_retry; + else if (!ret) + break; + } + + /* + * There could be a read_seqcount_retry here, but nothing cares + * about whether it's the old or newer fence pointers that are + * signale. That race could still have happened after checking + * read_seqcount_retry. If you care, use ww_mutex_lock. + */ + } + + if (!shared_count) { + struct fence *fence_excl = rcu_dereference(obj->fence_excl); + + if (read_seqcount_retry(&obj->seq, seq)) + goto unlock_retry; + + if (fence_excl) { + ret = reservation_object_test_signaled_single(fence_excl); + if (ret < 0) + goto unlock_retry; + } + } + + rcu_read_unlock(); + return ret; + +unlock_retry: + rcu_read_unlock(); + goto retry; +} +EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu); diff --git a/include/linux/fence.h b/include/linux/fence.h index d13b5ab61726..4b7002457af0 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -31,7 +31,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/printk.h> -#include <linux/slab.h> +#include <linux/rcupdate.h>
struct fence; struct fence_ops; @@ -41,6 +41,7 @@ struct fence_cb; * struct fence - software synchronization primitive * @refcount: refcount for this fence * @ops: fence_ops associated with this fence + * @rcu: used for releasing fence with kfree_rcu * @cb_list: list of all callbacks to call * @lock: spin_lock_irqsave used for locking * @context: execution context this fence belongs to, returned by @@ -74,6 +75,7 @@ struct fence_cb; struct fence { struct kref refcount; const struct fence_ops *ops; + struct rcu_head rcu; struct list_head cb_list; spinlock_t *lock; unsigned context, seqno; @@ -190,11 +192,25 @@ static inline void fence_get(struct fence *fence) kref_get(&fence->refcount); }
+/** + * fence_get_rcu - get a fence from a reservation_object_list with rcu read lock + * @fence: [in] fence to increase refcount of + * + * Function returns NULL if no refcount could be obtained, or the fence. + */ +static inline struct fence *fence_get_rcu(struct fence *fence) +{ + if (kref_get_unless_zero(&fence->refcount)) + return fence; + else + return NULL; +} + extern void release_fence(struct kref *kref);
static inline void free_fence(struct fence *fence) { - kfree(fence); + kfree_rcu(fence, rcu); }
/** diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 2affe67dea6e..b73d3df5a8e8 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -42,22 +42,29 @@ #include <linux/ww_mutex.h> #include <linux/fence.h> #include <linux/slab.h> +#include <linux/seqlock.h> +#include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class; +extern const char reservation_seqcount_string[];
struct reservation_object_list { + struct rcu_head rcu; u32 shared_count, shared_max; - struct fence *shared[]; + struct fence __rcu *shared[]; };
struct reservation_object { struct ww_mutex lock; + seqcount_t seq;
- struct fence *fence_excl; - struct reservation_object_list *fence; + struct fence __rcu *fence_excl; + struct reservation_object_list __rcu *fence; struct reservation_object_list *staged; };
+#define reservation_object_held(obj) lockdep_is_held(&(obj)->lock.base) #define reservation_object_assert_held(obj) \ lockdep_assert_held(&(obj)->lock.base)
@@ -66,8 +73,9 @@ reservation_object_init(struct reservation_object *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
- obj->fence_excl = NULL; - obj->fence = NULL; + __seqcount_init(&obj->seq, reservation_seqcount_string, &reservation_seqcount_class); + RCU_INIT_POINTER(obj->fence, NULL); + RCU_INIT_POINTER(obj->fence_excl, NULL); obj->staged = NULL; }
@@ -76,18 +84,20 @@ reservation_object_fini(struct reservation_object *obj) { int i; struct reservation_object_list *fobj; + struct fence *excl;
/* * This object should be dead and all references must have - * been released to it. + * been released to it, so no need to be protected with rcu. */ - if (obj->fence_excl) - fence_put(obj->fence_excl); + excl = rcu_dereference_protected(obj->fence_excl, 1); + if (excl) + fence_put(excl);
- fobj = obj->fence; + fobj = rcu_dereference_protected(obj->fence, 1); if (fobj) { for (i = 0; i < fobj->shared_count; ++i) - fence_put(fobj->shared[i]); + fence_put(rcu_dereference_protected(fobj->shared[i], 1));
kfree(fobj); } @@ -99,17 +109,15 @@ reservation_object_fini(struct reservation_object *obj) static inline struct reservation_object_list * reservation_object_get_list(struct reservation_object *obj) { - reservation_object_assert_held(obj); - - return obj->fence; + return rcu_dereference_check(obj->fence, + reservation_object_held(obj)); }
static inline struct fence * reservation_object_get_excl(struct reservation_object *obj) { - reservation_object_assert_held(obj); - - return obj->fence_excl; + return rcu_dereference_check(obj->fence_excl, + reservation_object_held(obj)); }
int reservation_object_reserve_shared(struct reservation_object *obj); @@ -119,4 +127,16 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence);
+int reservation_object_get_fences_rcu(struct reservation_object *obj, + struct fence **pfence_excl, + unsigned *pshared_count, + struct fence ***pshared); + +long reservation_object_wait_timeout_rcu(struct reservation_object *obj, + bool wait_all, bool intr, + unsigned long timeout); + +bool reservation_object_test_signaled_rcu(struct reservation_object *obj, + bool test_all); + #endif /* _LINUX_RESERVATION_H */
op 23-04-14 13:15, Maarten Lankhorst schreef:
This adds 4 more functions to deal with rcu.
reservation_object_get_fences_rcu() will obtain the list of shared and exclusive fences without obtaining the ww_mutex.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
Using seqcount and fixing some lockdep bugs. Changes since v2:
- Fix some crashes, remove some unneeded barriers when provided by seqcount writes
- Fix code to work correctly with sparse's RCU annotations.
- Create a global string for the seqcount lock to make lockdep happy.
Can I get this version reviewed? If it looks correct I'll mail the full series because it's intertwined with the TTM conversion to use this code.
Ping, can anyone review this?
On 04/29/2014 04:32 PM, Maarten Lankhorst wrote:
op 23-04-14 13:15, Maarten Lankhorst schreef:
This adds 4 more functions to deal with rcu.
reservation_object_get_fences_rcu() will obtain the list of shared and exclusive fences without obtaining the ww_mutex.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
Using seqcount and fixing some lockdep bugs. Changes since v2:
- Fix some crashes, remove some unneeded barriers when provided by
seqcount writes
- Fix code to work correctly with sparse's RCU annotations.
- Create a global string for the seqcount lock to make lockdep happy.
Can I get this version reviewed? If it looks correct I'll mail the full series because it's intertwined with the TTM conversion to use this code.
Ping, can anyone review this?
Hi, Maarten. It's on my todo-list. Been away from office for a while.
/Thomas
Hi, Maarten!
Some nitpicks, and that krealloc within rcu lock still worries me. Otherwise looks good.
/Thomas
On 04/23/2014 12:15 PM, Maarten Lankhorst wrote:
This adds 4 more functions to deal with rcu.
reservation_object_get_fences_rcu() will obtain the list of shared and exclusive fences without obtaining the ww_mutex.
reservation_object_wait_timeout_rcu() will wait on all fences of the reservation_object, without obtaining the ww_mutex.
reservation_object_test_signaled_rcu() will test if all fences of the reservation_object are signaled without using the ww_mutex.
reservation_object_get_excl() is added because touching the fence_excl member directly will trigger a sparse warning.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
Using seqcount and fixing some lockdep bugs. Changes since v2:
- Fix some crashes, remove some unneeded barriers when provided by
seqcount writes
- Fix code to work correctly with sparse's RCU annotations.
- Create a global string for the seqcount lock to make lockdep happy.
Can I get this version reviewed? If it looks correct I'll mail the full series because it's intertwined with the TTM conversion to use this code.
See https://urldefense.proofpoint.com/v1/url?u=http://cgit.freedesktop.org/~mlan...
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index d89a98d2c37b..0df673f812eb 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -137,7 +137,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) struct reservation_object_list *fobj; struct fence *fence_excl; unsigned long events;
- unsigned shared_count;
- unsigned shared_count, seq;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -151,14 +151,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
- ww_mutex_lock(&resv->lock, NULL);
+retry:
- seq = read_seqcount_begin(&resv->seq);
- rcu_read_lock();
- fobj = resv->fence;
- if (!fobj)
goto out;
- shared_count = fobj->shared_count;
- fence_excl = resv->fence_excl;
- fobj = rcu_dereference(resv->fence);
- if (fobj)
shared_count = fobj->shared_count;
- else
shared_count = 0;
- fence_excl = rcu_dereference(resv->fence_excl);
- if (read_seqcount_retry(&resv->seq, seq)) {
rcu_read_unlock();
goto retry;
- }
if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; @@ -176,14 +182,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock); if (events & pevents) {
if (!fence_add_callback(fence_excl, &dcb->cb,
if (!fence_get_rcu(fence_excl)) {
/* force a recheck */
events &= ~pevents;
dma_buf_poll_cb(NULL, &dcb->cb);
} else if (!fence_add_callback(fence_excl, &dcb->cb, dma_buf_poll_cb)) { events &= ~pevents;
fence_put(fence_excl); } else { /* * No callback queued, wake up any additional * waiters. */
fence_put(fence_excl); dma_buf_poll_cb(NULL, &dcb->cb); } }
@@ -205,13 +217,26 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) goto out; for (i = 0; i < shared_count; ++i) {
struct fence *fence = fobj->shared[i];
struct fence *fence = rcu_dereference(fobj->shared[i]);
if (!fence_get_rcu(fence)) {
/*
* fence refcount dropped to zero, this means
* that fobj has been freed
*
* call dma_buf_poll_cb and force a recheck!
*/
events &= ~POLLOUT;
dma_buf_poll_cb(NULL, &dcb->cb);
break;
} if (!fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb)) {
fence_put(fence); events &= ~POLLOUT; break; }
fence_put(fence); }
/* No callback queued, wake up any additional waiters. */ @@ -220,7 +245,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) } out:
- ww_mutex_unlock(&resv->lock);
- rcu_read_unlock(); return events;
} diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c index b82a5b630a8e..901bbf19c868 100644 --- a/drivers/base/reservation.c +++ b/drivers/base/reservation.c @@ -38,6 +38,11 @@ DEFINE_WW_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); +struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class);
+const char reservation_seqcount_string[] = "reservation_seqcount"; +EXPORT_SYMBOL(reservation_seqcount_string); /*
- Reserve space to add a shared fence to a reservation_object,
- must be called with obj->lock held.
@@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct reservation_object *obj) kfree(obj->staged); obj->staged = NULL; return 0;
}
max = old->shared_max * 2;
} else
max = old->shared_max * 2;
Perhaps as a separate reformatting patch?
} else max = 4;
@@ -82,27 +87,37 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, { u32 i;
- fence_get(fence);
- preempt_disable();
- write_seqcount_begin(&obj->seq);
- for (i = 0; i < fobj->shared_count; ++i) {
if (fobj->shared[i]->context == fence->context) {
struct fence *old_fence = fobj->shared[i];
struct fence *old_fence;
fence_get(fence);
old_fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(obj));
fobj->shared[i] = fence;
if (old_fence->context == fence->context) {
/* memory barrier is added by write_seqcount_begin */
RCU_INIT_POINTER(fobj->shared[i], fence);
write_seqcount_end(&obj->seq);
preempt_enable();
fence_put(old_fence); return; } }
- fence_get(fence);
- fobj->shared[fobj->shared_count] = fence; /*
* make the new fence visible before incrementing
* fobj->shared_count
* memory barrier is added by write_seqcount_begin,
* fobj->shared_count is protected by this lock too */
- smp_wmb();
- RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); fobj->shared_count++;
- write_seqcount_end(&obj->seq);
- preempt_enable();
} static void @@ -112,11 +127,12 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct fence *fence) { unsigned i;
- struct fence *old_fence = NULL;
fence_get(fence); if (!old) {
fobj->shared[0] = fence;
}RCU_INIT_POINTER(fobj->shared[0], fence); fobj->shared_count = 1; goto done;
@@ -130,19 +146,38 @@ reservation_object_add_shared_replace(struct reservation_object *obj, fobj->shared_count = old->shared_count; for (i = 0; i < old->shared_count; ++i) {
if (fence && old->shared[i]->context == fence->context) {
fence_put(old->shared[i]);
fobj->shared[i] = fence;
fence = NULL;
struct fence *check;
check = rcu_dereference_protected(old->shared[i],
reservation_object_held(obj));
if (!old_fence && check->context == fence->context) {
old_fence = check;
RCU_INIT_POINTER(fobj->shared[i], fence); } else
fobj->shared[i] = old->shared[i];
RCU_INIT_POINTER(fobj->shared[i], check);
- }
- if (!old_fence) {
RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
}fobj->shared_count++;
- if (fence)
fobj->shared[fobj->shared_count++] = fence;
done:
- obj->fence = fobj;
- kfree(old);
- preempt_disable();
- write_seqcount_begin(&obj->seq);
- /*
* RCU_INIT_POINTER can be used here,
* seqcount provides the necessary barriers
*/
- RCU_INIT_POINTER(obj->fence, fobj);
- write_seqcount_end(&obj->seq);
- preempt_enable();
- if (old)
kfree_rcu(old, rcu);
- if (old_fence)
fence_put(old_fence);
} /* @@ -158,7 +193,7 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, obj->staged = NULL; if (!fobj) {
BUG_ON(old->shared_count == old->shared_max);
} else reservation_object_add_shared_replace(obj, old, fobj, fence);BUG_ON(old->shared_count >= old->shared_max); reservation_object_add_shared_inplace(obj, old, fence);
@@ -168,26 +203,266 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence); void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence) {
- struct fence *old_fence = obj->fence_excl;
- struct fence *old_fence = reservation_object_get_excl(obj); struct reservation_object_list *old; u32 i = 0;
old = reservation_object_get_list(obj);
- if (old) {
- if (old) i = old->shared_count;
old->shared_count = 0;
- }
if (fence) fence_get(fence);
- obj->fence_excl = fence;
- preempt_disable();
- write_seqcount_begin(&obj->seq);
- /* write_seqcount_begin provides the necessary memory barrier */
- RCU_INIT_POINTER(obj->fence_excl, fence);
- if (old)
old->shared_count = 0;
- write_seqcount_end(&obj->seq);
- preempt_enable();
/* inplace update, no shared fences */ while (i--)
fence_put(old->shared[i]);
fence_put(rcu_dereference_protected(old->shared[i],
reservation_object_held(obj)));
if (old_fence) fence_put(old_fence); } EXPORT_SYMBOL(reservation_object_add_excl_fence);
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq;
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = rcu_dereference(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
ACCESS_ONCE() shouldn't be needed inside the seqlock?
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
Again, krealloc should be a sleeping function, and not suitable within a RCU read lock? I still think this krealloc should be moved to the start of the retry loop, and we should start with a suitable guess of shared_count (perhaps 0?) It's not like we're going to waste a lot of memory....
if (!nshared) {
ret = -ENOMEM;
shared_count = retry = 0;
goto unlock;
}
shared = nshared;
memcpy(shared, fobj->shared, sizeof(*shared) *
shared_count);
} else
shared_count = 0;
fence_excl = rcu_dereference(obj->fence_excl);
retry = read_seqcount_retry(&obj->seq, seq);
if (retry)
goto unlock;
if (!fence_excl || fence_get_rcu(fence_excl)) {
unsigned i;
for (i = 0; i < shared_count; ++i) {
if (fence_get_rcu(shared[i]))
continue;
/* uh oh, refcount failed, abort and retry */
while (i--)
fence_put(shared[i]);
if (fence_excl) {
fence_put(fence_excl);
fence_excl = NULL;
}
retry = 1;
break;
}
} else
retry = 1;
+unlock:
rcu_read_unlock();
- }
- *pshared_count = shared_count;
- if (shared_count)
*pshared = shared;
- else {
*pshared = NULL;
kfree(shared);
- }
- *pfence_excl = fence_excl;
- return ret;
+} +EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
+long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
bool wait_all, bool intr,
unsigned long timeout)
+{
- struct fence *fence;
- unsigned seq, shared_count, i = 0;
- long ret = timeout;
+retry:
- fence = NULL;
- shared_count = 0;
- seq = read_seqcount_begin(&obj->seq);
- rcu_read_lock();
- if (wait_all) {
struct reservation_object_list *fobj =
rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
if (read_seqcount_retry(&obj->seq, seq))
goto unlock_retry;
for (i = 0; i < shared_count; ++i) {
struct fence *lfence = rcu_dereference(fobj->shared[i]);
if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags))
continue;
if (!fence_get_rcu(lfence))
goto unlock_retry;
if (fence_is_signaled(lfence)) {
fence_put(lfence);
continue;
}
fence = lfence;
break;
}
- }
- if (!shared_count) {
struct fence *fence_excl = rcu_dereference(obj->fence_excl);
if (read_seqcount_retry(&obj->seq, seq))
goto unlock_retry;
if (fence_excl &&
!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) {
if (!fence_get_rcu(fence_excl))
goto unlock_retry;
if (fence_is_signaled(fence_excl))
fence_put(fence_excl);
else
fence = fence_excl;
}
- }
- rcu_read_unlock();
- if (fence) {
ret = fence_wait_timeout(fence, intr, ret);
fence_put(fence);
if (ret > 0 && wait_all && (i + 1 < shared_count))
goto retry;
- }
- return ret;
+unlock_retry:
- rcu_read_unlock();
- goto retry;
+} +EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu);
+static inline int +reservation_object_test_signaled_single(struct fence *passed_fence) +{
- struct fence *fence, *lfence = passed_fence;
- int ret = 1;
- if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) {
int ret;
fence = fence_get_rcu(lfence);
if (!fence)
return -1;
ret = !!fence_is_signaled(fence);
fence_put(fence);
- }
- return ret;
+}
+bool reservation_object_test_signaled_rcu(struct reservation_object *obj,
bool test_all)
+{
- unsigned seq, shared_count;
- int ret = true;
+retry:
- shared_count = 0;
- seq = read_seqcount_begin(&obj->seq);
- rcu_read_lock();
- if (test_all) {
unsigned i;
struct reservation_object_list *fobj =
rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
if (read_seqcount_retry(&obj->seq, seq))
goto unlock_retry;
for (i = 0; i < shared_count; ++i) {
struct fence *fence = rcu_dereference(fobj->shared[i]);
ret = reservation_object_test_signaled_single(fence);
if (ret < 0)
goto unlock_retry;
else if (!ret)
break;
}
/*
* There could be a read_seqcount_retry here, but nothing cares
* about whether it's the old or newer fence pointers that are
* signale. That race could still have happened after checking
Typo.
* read_seqcount_retry. If you care, use ww_mutex_lock.
*/
- }
- if (!shared_count) {
struct fence *fence_excl = rcu_dereference(obj->fence_excl);
if (read_seqcount_retry(&obj->seq, seq))
goto unlock_retry;
if (fence_excl) {
ret = reservation_object_test_signaled_single(fence_excl);
if (ret < 0)
goto unlock_retry;
}
- }
- rcu_read_unlock();
- return ret;
+unlock_retry:
- rcu_read_unlock();
- goto retry;
+} +EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu); diff --git a/include/linux/fence.h b/include/linux/fence.h index d13b5ab61726..4b7002457af0 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -31,7 +31,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/printk.h> -#include <linux/slab.h> +#include <linux/rcupdate.h> struct fence; struct fence_ops; @@ -41,6 +41,7 @@ struct fence_cb;
- struct fence - software synchronization primitive
- @refcount: refcount for this fence
- @ops: fence_ops associated with this fence
- @rcu: used for releasing fence with kfree_rcu
- @cb_list: list of all callbacks to call
- @lock: spin_lock_irqsave used for locking
- @context: execution context this fence belongs to, returned by
@@ -74,6 +75,7 @@ struct fence_cb; struct fence { struct kref refcount; const struct fence_ops *ops;
- struct rcu_head rcu; struct list_head cb_list; spinlock_t *lock; unsigned context, seqno;
@@ -190,11 +192,25 @@ static inline void fence_get(struct fence *fence) kref_get(&fence->refcount); } +/**
- fence_get_rcu - get a fence from a reservation_object_list with
rcu read lock
- @fence: [in] fence to increase refcount of
- Function returns NULL if no refcount could be obtained, or the fence.
- */
+static inline struct fence *fence_get_rcu(struct fence *fence) +{
- if (kref_get_unless_zero(&fence->refcount))
return fence;
- else
return NULL;
+}
extern void release_fence(struct kref *kref); static inline void free_fence(struct fence *fence) {
- kfree(fence);
- kfree_rcu(fence, rcu);
} /** diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 2affe67dea6e..b73d3df5a8e8 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -42,22 +42,29 @@ #include <linux/ww_mutex.h> #include <linux/fence.h> #include <linux/slab.h> +#include <linux/seqlock.h> +#include <linux/rcupdate.h> extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class; +extern const char reservation_seqcount_string[]; struct reservation_object_list {
- struct rcu_head rcu; u32 shared_count, shared_max;
- struct fence *shared[];
- struct fence __rcu *shared[];
}; struct reservation_object { struct ww_mutex lock;
- seqcount_t seq;
- struct fence *fence_excl;
- struct reservation_object_list *fence;
- struct fence __rcu *fence_excl;
- struct reservation_object_list __rcu *fence; struct reservation_object_list *staged;
}; +#define reservation_object_held(obj) lockdep_is_held(&(obj)->lock.base) #define reservation_object_assert_held(obj) \ lockdep_assert_held(&(obj)->lock.base) @@ -66,8 +73,9 @@ reservation_object_init(struct reservation_object *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
- obj->fence_excl = NULL;
- obj->fence = NULL;
- __seqcount_init(&obj->seq, reservation_seqcount_string,
&reservation_seqcount_class);
- RCU_INIT_POINTER(obj->fence, NULL);
- RCU_INIT_POINTER(obj->fence_excl, NULL); obj->staged = NULL;
} @@ -76,18 +84,20 @@ reservation_object_fini(struct reservation_object *obj) { int i; struct reservation_object_list *fobj;
- struct fence *excl;
/* * This object should be dead and all references must have
* been released to it.
* been released to it, so no need to be protected with rcu. */
- if (obj->fence_excl)
fence_put(obj->fence_excl);
- excl = rcu_dereference_protected(obj->fence_excl, 1);
- if (excl)
fence_put(excl);
- fobj = obj->fence;
- fobj = rcu_dereference_protected(obj->fence, 1); if (fobj) { for (i = 0; i < fobj->shared_count; ++i)
fence_put(fobj->shared[i]);
fence_put(rcu_dereference_protected(fobj->shared[i], 1));
kfree(fobj); } @@ -99,17 +109,15 @@ reservation_object_fini(struct reservation_object *obj) static inline struct reservation_object_list * reservation_object_get_list(struct reservation_object *obj) {
- reservation_object_assert_held(obj);
- return obj->fence;
- return rcu_dereference_check(obj->fence,
reservation_object_held(obj));
} static inline struct fence * reservation_object_get_excl(struct reservation_object *obj) {
- reservation_object_assert_held(obj);
- return obj->fence_excl;
- return rcu_dereference_check(obj->fence_excl,
reservation_object_held(obj));
} int reservation_object_reserve_shared(struct reservation_object *obj); @@ -119,4 +127,16 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence); +int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared);
+long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
bool wait_all, bool intr,
unsigned long timeout);
+bool reservation_object_test_signaled_rcu(struct reservation_object *obj,
bool test_all);
#endif /* _LINUX_RESERVATION_H */
op 19-05-14 15:42, Thomas Hellstrom schreef:
Hi, Maarten!
Some nitpicks, and that krealloc within rcu lock still worries me. Otherwise looks good.
/Thomas
On 04/23/2014 12:15 PM, Maarten Lankhorst wrote:
@@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct reservation_object *obj) kfree(obj->staged); obj->staged = NULL; return 0;
}
max = old->shared_max * 2;
} else
max = old->shared_max * 2;
Perhaps as a separate reformatting patch?
I'll fold it in to the patch that added reservation_object_reserve_shared.
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq;
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = rcu_dereference(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
ACCESS_ONCE() shouldn't be needed inside the seqlock?
Yes it is, shared_count may be increased, leading to potential different sizes for krealloc and memcpy if the ACCESS_ONCE is removed. I could use shared_max here instead, which stays the same, but it would waste more memory.
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
Again, krealloc should be a sleeping function, and not suitable within a RCU read lock? I still think this krealloc should be moved to the start of the retry loop, and we should start with a suitable guess of shared_count (perhaps 0?) It's not like we're going to waste a lot of memory....
But shared_count is only known when holding the rcu lock.
What about this change?
@@ -254,16 +254,27 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, fobj = rcu_dereference(obj->fence); if (fobj) { struct fence **nshared; + size_t sz;
shared_count = ACCESS_ONCE(fobj->shared_count); - nshared = krealloc(shared, sizeof(*shared) * shared_count, GFP_KERNEL); + sz = sizeof(*shared) * shared_count; + + nshared = krealloc(shared, sz, + GFP_NOWAIT | __GFP_NOWARN); if (!nshared) { + rcu_read_unlock(); + nshared = krealloc(shared, sz, GFP_KERNEL) + if (nshared) { + shared = nshared; + continue; + } + ret = -ENOMEM; - shared_count = retry = 0; - goto unlock; + shared_count = 0; + break; } shared = nshared; - memcpy(shared, fobj->shared, sizeof(*shared) * shared_count); + memcpy(shared, fobj->shared, sz); } else shared_count = 0; fence_excl = rcu_dereference(obj->fence_excl);
/*
* There could be a read_seqcount_retry here, but nothing cares
* about whether it's the old or newer fence pointers that are
* signale. That race could still have happened after checking
Typo.
Oops.
On 05/19/2014 03:13 PM, Maarten Lankhorst wrote:
op 19-05-14 15:42, Thomas Hellstrom schreef:
Hi, Maarten!
Some nitpicks, and that krealloc within rcu lock still worries me. Otherwise looks good.
/Thomas
On 04/23/2014 12:15 PM, Maarten Lankhorst wrote:
@@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct reservation_object *obj) kfree(obj->staged); obj->staged = NULL; return 0;
}
max = old->shared_max * 2;
} else
max = old->shared_max * 2;
Perhaps as a separate reformatting patch?
I'll fold it in to the patch that added reservation_object_reserve_shared.
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq;
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = rcu_dereference(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
ACCESS_ONCE() shouldn't be needed inside the seqlock?
Yes it is, shared_count may be increased, leading to potential different sizes for krealloc and memcpy if the ACCESS_ONCE is removed. I could use shared_max here instead, which stays the same, but it would waste more memory.
OK.
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
Again, krealloc should be a sleeping function, and not suitable within a RCU read lock? I still think this krealloc should be moved to the start of the retry loop, and we should start with a suitable guess of shared_count (perhaps 0?) It's not like we're going to waste a lot of memory....
But shared_count is only known when holding the rcu lock.
What about this change?
Sure. That should work.
/Thomas
@@ -254,16 +254,27 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, fobj = rcu_dereference(obj->fence); if (fobj) { struct fence **nshared;
size_t sz;
shared_count = ACCESS_ONCE(fobj->shared_count);
nshared = krealloc(shared, sizeof(*shared) *
shared_count, GFP_KERNEL);
sz = sizeof(*shared) * shared_count;
nshared = krealloc(shared, sz,
GFP_NOWAIT | __GFP_NOWARN); if (!nshared) {
rcu_read_unlock();
nshared = krealloc(shared, sz, GFP_KERNEL)
if (nshared) {
shared = nshared;
continue;
}
ret = -ENOMEM;
shared_count = retry = 0;
goto unlock;
shared_count = 0;
break; } shared = nshared;
memcpy(shared, fobj->shared, sizeof(*shared) *
shared_count);
memcpy(shared, fobj->shared, sz); } else shared_count = 0; fence_excl = rcu_dereference(obj->fence_excl);
/*
* There could be a read_seqcount_retry here, but nothing
cares
* about whether it's the old or newer fence pointers that are
* signale. That race could still have happened after checking
Typo.
Oops
On 05/19/2014 03:13 PM, Maarten Lankhorst wrote:
op 19-05-14 15:42, Thomas Hellstrom schreef:
Hi, Maarten!
Some nitpicks, and that krealloc within rcu lock still worries me. Otherwise looks good.
/Thomas
On 04/23/2014 12:15 PM, Maarten Lankhorst wrote:
@@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct reservation_object *obj) kfree(obj->staged); obj->staged = NULL; return 0;
}
max = old->shared_max * 2;
} else
max = old->shared_max * 2;
Perhaps as a separate reformatting patch?
I'll fold it in to the patch that added reservation_object_reserve_shared.
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq;
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = rcu_dereference(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
ACCESS_ONCE() shouldn't be needed inside the seqlock?
Yes it is, shared_count may be increased, leading to potential different sizes for krealloc and memcpy if the ACCESS_ONCE is removed. I could use shared_max here instead, which stays the same, but it would waste more memory.
Maarten, Another perhaps ignorant question WRT this, Does ACCESS_ONCE() guarantee that the value accessed is read atomically?
/Thomas
op 20-05-14 17:13, Thomas Hellstrom schreef:
On 05/19/2014 03:13 PM, Maarten Lankhorst wrote:
op 19-05-14 15:42, Thomas Hellstrom schreef:
Hi, Maarten!
Some nitpicks, and that krealloc within rcu lock still worries me. Otherwise looks good.
/Thomas
On 04/23/2014 12:15 PM, Maarten Lankhorst wrote:
@@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct reservation_object *obj) kfree(obj->staged); obj->staged = NULL; return 0;
}
max = old->shared_max * 2;
} else
max = old->shared_max * 2;
Perhaps as a separate reformatting patch?
I'll fold it in to the patch that added reservation_object_reserve_shared.
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct fence **pfence_excl,
unsigned *pshared_count,
struct fence ***pshared)
+{
- unsigned shared_count = 0;
- unsigned retry = 1;
- struct fence **shared = NULL, *fence_excl = NULL;
- int ret = 0;
- while (retry) {
struct reservation_object_list *fobj;
unsigned seq;
seq = read_seqcount_begin(&obj->seq);
rcu_read_lock();
fobj = rcu_dereference(obj->fence);
if (fobj) {
struct fence **nshared;
shared_count = ACCESS_ONCE(fobj->shared_count);
ACCESS_ONCE() shouldn't be needed inside the seqlock?
Yes it is, shared_count may be increased, leading to potential different sizes for krealloc and memcpy if the ACCESS_ONCE is removed. I could use shared_max here instead, which stays the same, but it would waste more memory.
Maarten, Another perhaps ignorant question WRT this, Does ACCESS_ONCE() guarantee that the value accessed is read atomically?
Well I've reworked the code to use shared_max, so this point is moot. :-)
On any archs I'm aware of it would work, either the old or new value would be visible, as long as natural alignment is used. rcu uses the same trick in the rcu_dereference macro, so if this didn't work rcu wouldn't work either.
~Maarten
linaro-mm-sig@lists.linaro.org