Hi guys,
turned out that userspace can also merge dma_fence_chain contains which can result in really huge arrays.
Fix those merges to sort the arrays and remove the duplicates. Additional to that start to use kvzalloc() for dma_fence_array containers so that can handle much larger arrays if necessary.
Please review and comment, Christian.
Reports indicates that some userspace applications try to merge more than 80k of fences into a single dma_fence_array leading to a warning from kzalloc() that the requested size becomes to big.
While that is clearly an userspace bug we should probably handle that case gracefully in the kernel.
So we can either reject requests to merge more than a reasonable amount of fences (64k maybe?) or we can start to use kvzalloc() instead of kzalloc(). This patch here does the later.
Signed-off-by: Christian König christian.koenig@amd.com CC: stable@vger.kernel.org --- drivers/dma-buf/dma-fence-array.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 8a08ffde31e7..46ac42bcfac0 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -119,8 +119,8 @@ static void dma_fence_array_release(struct dma_fence *fence) for (i = 0; i < array->num_fences; ++i) dma_fence_put(array->fences[i]);
- kfree(array->fences); - dma_fence_free(fence); + kvfree(array->fences); + kvfree_rcu(fence, rcu); }
static void dma_fence_array_set_deadline(struct dma_fence *fence, @@ -153,7 +153,7 @@ struct dma_fence_array *dma_fence_array_alloc(int num_fences) { struct dma_fence_array *array;
- return kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); + return kvzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); } EXPORT_SYMBOL(dma_fence_array_alloc);
The merge function initially handled only individual fences and arrays which in turn were created by the merge function. This allowed to create the new array by a simple merge sort based on the fence context number.
The problem is now that since the addition of timeline sync objects userspace can create chain containers in basically any fence context order.
If those are merged together it can happen that we create really large arrays since the merge sort algorithm doesn't work any more.
So put an insert sort behind the merge sort which kicks in when the input fences are not in the expected order. This isn't as efficient as a heap sort, but has better properties for the most common use case.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-unwrap.c | 39 ++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 628af51c81af..d9aa280d9ff6 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -106,7 +106,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
count = 0; - do { + while (true) { unsigned int sel;
restart: @@ -144,11 +144,40 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, } }
- if (tmp) { - array[count++] = dma_fence_get(tmp); - fences[sel] = dma_fence_unwrap_next(&iter[sel]); + if (!tmp) + break; + + /* + * We could use a binary search here, but since the assumption + * is that the main input are already sorted dma_fence_arrays + * just looking from end has a higher chance of finding the + * right location on the first try + */ + + for (i = count; i--;) { + if (likely(array[i]->context < tmp->context)) + break; + + if (array[i]->context == tmp->context) { + if (dma_fence_is_later(tmp, array[i])) { + dma_fence_put(array[i]); + array[i] = dma_fence_get(tmp); + } + fences[sel] = dma_fence_unwrap_next(&iter[sel]); + goto restart; + } } - } while (tmp); + + ++i; + /* + * Make room for the fence, this should be a nop most of the + * time. + */ + memcpy(&array[i + 1], &array[i], (count - i) * sizeof(*array)); + array[i] = dma_fence_get(tmp); + fences[sel] = dma_fence_unwrap_next(&iter[sel]); + count++; + };
if (count == 0) { tmp = dma_fence_allocate_private_stub(ktime_get());
Add a test which double checks that fences are in the expected order after a merge.
While at it also switch to using a mock array for the complex test instead of a merge.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/st-dma-fence-unwrap.c | 69 ++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index f0cee984b6c7..876eabddb08f 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -304,6 +304,72 @@ static int unwrap_merge(void *arg) return err; }
+static int unwrap_merge_order(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *a1, *a2, *c1, *c2; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + dma_fence_enable_sw_signaling(f1); + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + dma_fence_enable_sw_signaling(f2); + + a1 = mock_array(2, f1, f2); + if (!a1) + return -ENOMEM; + + c1 = mock_chain(NULL, dma_fence_get(f1)); + if (!c1) + goto error_put_a1; + + c2 = mock_chain(c1, dma_fence_get(f2)); + if (!c2) + goto error_put_a1; + + /* + * The fences in the chain are the same as in a1 but in oposite order, + * the dma_fence_merge() function should be able to handle that. + */ + a2 = dma_fence_unwrap_merge(a1, c2); + + dma_fence_unwrap_for_each(fence, &iter, a2) { + if (fence == f1) { + f1 = NULL; + if (!f2) + pr_err("Unexpected order!\n"); + } else if (fence == f2) { + f2 = NULL; + if (f1) + pr_err("Unexpected order!\n"); + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_put(a2); + return err; + +error_put_a1: + dma_fence_put(a1); + return -ENOMEM; +} + static int unwrap_merge_complex(void *arg) { struct dma_fence *fence, *f1, *f2, *f3, *f4, *f5; @@ -327,7 +393,7 @@ static int unwrap_merge_complex(void *arg) goto error_put_f2;
/* The resulting array has the fences in reverse */ - f4 = dma_fence_unwrap_merge(f2, f1); + f4 = mock_array(2, dma_fence_get(f2), dma_fence_get(f1)); if (!f4) goto error_put_f3;
@@ -375,6 +441,7 @@ int dma_fence_unwrap(void) SUBTEST(unwrap_chain), SUBTEST(unwrap_chain_array), SUBTEST(unwrap_merge), + SUBTEST(unwrap_merge_order), SUBTEST(unwrap_merge_complex), };
linaro-mm-sig@lists.linaro.org