 
            Quoting Chris Wilson (2019-08-21 16:24:22)
Quoting Christian König (2019-08-21 13:31:45)
@@ -117,17 +120,10 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); /* Translate shared fences to READ set of engines */
list = rcu_dereference(obj->base.resv->fence);
if (list) {
unsigned int shared_count = list->shared_count, i;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
args->busy |= busy_check_reader(fence);
}
}
readers = dma_resv_fences_get_rcu(&obj->base.resv->readers);
dma_fence_array_for_each(fence, cursor, readers)
args->busy |= busy_check_reader(fence);
dma_fence_put(readers);That's underwhelming, the full-mb shows up in scaling tests (I'll test the impact of this series later). Something like,
To put some numbers to it, adding the full-mb adds 5ns to a single thread on Kabylake and 20ns under contention. -Chris