There is a real deadlock as well as sleeping in atomic() bug in here, if the bo put happens to be the last ref, since bo destruction wants to grab the same spinlock and sleeping locks. Fix that by dropping the ref using xe_bo_put_deferred(), and moving the final commit outside of the lock. Dropping the lock around the put is tricky since the bo can go out of scope and delete itself from the list, making it difficult to navigate to the next list entry.
Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2727 Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Himal Prasad Ghimiray himal.prasad.ghimiray@intel.com Cc: Tejas Upadhyay tejas.upadhyay@intel.com Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Matthew Brost matthew.brost@intel.com Reviewed-by: Tejas Upadhyay tejas.upadhyay@intel.com --- drivers/gpu/drm/xe/xe_drm_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index e64f4b645e2e..badfa045ead8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -196,6 +196,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type;
@@ -215,11 +216,14 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; + bo_meminfo(bo, stats); - xe_bo_put(bo); + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock);
+ xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue;
bo_meminfo() wants to inspect bo state like tt and the ttm resource, however this state can change at any point leading to stuff like NPD and UAF, if the bo lock is not held. Grab the bo lock when calling bo_meminfo(), ensuring we drop any spinlocks first. In the case of object_idr we now also need to hold a ref.
v2 (MattB) - Also add xe_bo_assert_held()
Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Himal Prasad Ghimiray himal.prasad.ghimiray@intel.com Cc: Tejas Upadhyay tejas.upadhyay@intel.com Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Matthew Brost matthew.brost@intel.com Reviewed-by: Tejas Upadhyay tejas.upadhyay@intel.com --- drivers/gpu/drm/xe/xe_drm_client.c | 39 +++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index badfa045ead8..95a05c5bc897 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/types.h>
+#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client;
+ xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock);
xe_drm_client_put(client); @@ -166,6 +170,8 @@ static void bo_meminfo(struct xe_bo *bo, u64 sz = bo->size; u32 mem_type;
+ xe_bo_assert_held(bo); + if (bo->placement.placement) mem_type = bo->placement.placement->mem_type; else @@ -207,7 +213,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj);
- bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock);
@@ -217,7 +236,21 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue;
- bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock);
linux-stable-mirror@lists.linaro.org