From: Rob Clark robdclark@chromium.org
If a signal callback releases the sw_sync fence, that will trigger a deadlock as the timeline_fence_release recurses onto the fence->lock (used both for signaling and the the timeline tree).
To avoid that, temporarily hold an extra reference to the signalled fences until after we drop the lock.
(This is an alternative implementation of https://patchwork.kernel.org/patch/11664717/ which avoids some potential UAF issues with the original patch.)
Reported-by: Bas Nieuwenhuizen bas@basnieuwenhuizen.nl Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free") Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/dma-buf/sw_sync.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 63f0aeb66db6..ceb6a0408624 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = { */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) { + LIST_HEAD(signalled); struct sync_pt *pt, *next;
trace_sync_timeline(obj); @@ -203,9 +204,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) if (!timeline_fence_signaled(&pt->base)) break;
+ dma_fence_get(&pt->base); + list_del_init(&pt->link); rb_erase(&pt->node, &obj->pt_tree);
+ list_add_tail(&pt->link, &signalled); + /* * A signal callback may release the last reference to this * fence, causing it to be freed. That operation has to be @@ -218,6 +223,11 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) }
spin_unlock_irq(&obj->lock); + + list_for_each_entry_safe(pt, next, &signalled, link) { + list_del(&pt->link); + dma_fence_put(&pt->base); + } }
/**
Am 17.08.23 um 23:37 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
If a signal callback releases the sw_sync fence, that will trigger a deadlock as the timeline_fence_release recurses onto the fence->lock (used both for signaling and the the timeline tree).
To avoid that, temporarily hold an extra reference to the signalled fences until after we drop the lock.
(This is an alternative implementation of https://patchwork.kernel.org/patch/11664717/ which avoids some potential UAF issues with the original patch.)
Reported-by: Bas Nieuwenhuizen bas@basnieuwenhuizen.nl Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free") Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/sw_sync.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 63f0aeb66db6..ceb6a0408624 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = { */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) {
- LIST_HEAD(signalled); struct sync_pt *pt, *next;
trace_sync_timeline(obj); @@ -203,9 +204,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) if (!timeline_fence_signaled(&pt->base)) break;
dma_fence_get(&pt->base);
Question is why don't have the fences a reference on the list in the first place?
- list_del_init(&pt->link); rb_erase(&pt->node, &obj->pt_tree);
list_add_tail(&pt->link, &signalled);
Instead of list_del()/list_add_tail() you could also use list_move_tail() here.
- /*
- A signal callback may release the last reference to this
- fence, causing it to be freed. That operation has to be
@@ -218,6 +223,11 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) } spin_unlock_irq(&obj->lock);
- list_for_each_entry_safe(pt, next, &signalled, link) {
list_del(&pt->link);
You must use list_del_init() here or otherwise the pt->link will keep pointing to the prev/next entries and the list_empty() check in timeline_fence_release() will fail and potentially corrupt things.
Regards, Christian.
dma_fence_put(&pt->base);
- } }
/**
On Fri, Aug 18, 2023 at 2:09 AM Christian König christian.koenig@amd.com wrote:
Am 17.08.23 um 23:37 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
If a signal callback releases the sw_sync fence, that will trigger a deadlock as the timeline_fence_release recurses onto the fence->lock (used both for signaling and the the timeline tree).
To avoid that, temporarily hold an extra reference to the signalled fences until after we drop the lock.
(This is an alternative implementation of https://patchwork.kernel.org/patch/11664717/ which avoids some potential UAF issues with the original patch.)
Reported-by: Bas Nieuwenhuizen bas@basnieuwenhuizen.nl Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free") Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/sw_sync.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 63f0aeb66db6..ceb6a0408624 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = { */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) {
LIST_HEAD(signalled); struct sync_pt *pt, *next; trace_sync_timeline(obj);
@@ -203,9 +204,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) if (!timeline_fence_signaled(&pt->base)) break;
dma_fence_get(&pt->base);
Question is why don't have the fences a reference on the list in the first place?
As best I can tell, it is because the fences hold a reference to the timeline, so a reference in the other direction would cause a loop.
BR, -R
list_del_init(&pt->link); rb_erase(&pt->node, &obj->pt_tree);
list_add_tail(&pt->link, &signalled);
Instead of list_del()/list_add_tail() you could also use list_move_tail() here.
/* * A signal callback may release the last reference to this * fence, causing it to be freed. That operation has to be
@@ -218,6 +223,11 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) }
spin_unlock_irq(&obj->lock);
list_for_each_entry_safe(pt, next, &signalled, link) {
list_del(&pt->link);
You must use list_del_init() here or otherwise the pt->link will keep pointing to the prev/next entries and the list_empty() check in timeline_fence_release() will fail and potentially corrupt things.
Regards, Christian.
dma_fence_put(&pt->base);
}
}
/**
linaro-mm-sig@lists.linaro.org