krealloc_array() ignores attempts to reduce the array size, so the attempt to save memory is completely pointless here.
Also move testing for the no fence case into sync_file_set_fence(), this way we don't even touch the fence array when we don't have any fences.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/sync_file.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 514d213261df..0fe564539166 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -157,9 +157,15 @@ static int sync_file_set_fence(struct sync_file *sync_file, * we already own a new reference to the fence. For num_fence > 1 * we own the reference of the dma_fence_array creation. */ - if (num_fences == 1) { + + if (num_fences == 0) { + sync_file->fence = dma_fence_get_stub(); + kfree(fences); + + } else if (num_fences == 1) { sync_file->fence = fences[0]; kfree(fences); + } else { array = dma_fence_array_create(num_fences, fences, dma_fence_context_alloc(1), @@ -261,19 +267,6 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, } }
- if (index == 0) - fences[index++] = dma_fence_get_stub(); - - if (num_fences > index) { - struct dma_fence **tmp; - - /* Keep going even when reducing the size failed */ - tmp = krealloc_array(fences, index, sizeof(*fences), - GFP_KERNEL); - if (tmp) - fences = tmp; - } - if (sync_file_set_fence(sync_file, fences, index) < 0) goto err_put_fences;
Move the code from the inline functions into exported functions.
While at it also cleanup the the selftests, fix the error handling, remove unused functions and stop leaking memory in failed tests.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/Makefile | 2 +- drivers/dma-buf/dma-fence-unwrap.c | 59 +++++++++++++++++++++++++++ drivers/dma-buf/st-dma-fence-unwrap.c | 40 ++++++++---------- include/linux/dma-fence-unwrap.h | 52 ++--------------------- 4 files changed, 80 insertions(+), 73 deletions(-) create mode 100644 drivers/dma-buf/dma-fence-unwrap.c
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 4c9eb53ba3f8..70ec901edf2c 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ - dma-resv.o + dma-fence-unwrap.o dma-resv.o obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o obj-$(CONFIG_DMABUF_HEAPS) += heaps/ obj-$(CONFIG_SYNC_FILE) += sync_file.o diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c new file mode 100644 index 000000000000..711be125428c --- /dev/null +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * dma-fence-util: misc functions for dma_fence objects + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Authors: + * Christian König christian.koenig@amd.com + */ + +#include <linux/dma-fence.h> +#include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> +#include <linux/dma-fence-unwrap.h> + +/* Internal helper to start new array iteration, don't use directly */ +static struct dma_fence * +__dma_fence_unwrap_array(struct dma_fence_unwrap *cursor) +{ + cursor->array = dma_fence_chain_contained(cursor->chain); + cursor->index = 0; + return dma_fence_array_first(cursor->array); +} + +/** + * dma_fence_unwrap_first - return the first fence from fence containers + * @head: the entrypoint into the containers + * @cursor: current position inside the containers + * + * Unwraps potential dma_fence_chain/dma_fence_array containers and return the + * first fence. + */ +struct dma_fence *dma_fence_unwrap_first(struct dma_fence *head, + struct dma_fence_unwrap *cursor) +{ + cursor->chain = dma_fence_get(head); + return __dma_fence_unwrap_array(cursor); +} +EXPORT_SYMBOL_GPL(dma_fence_unwrap_first); + +/** + * dma_fence_unwrap_next - return the next fence from a fence containers + * @cursor: current position inside the containers + * + * Continue unwrapping the dma_fence_chain/dma_fence_array containers and return + * the next fence from them. + */ +struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) +{ + struct dma_fence *tmp; + + ++cursor->index; + tmp = dma_fence_array_next(cursor->array, cursor->index); + if (tmp) + return tmp; + + cursor->chain = dma_fence_chain_walk(cursor->chain); + return __dma_fence_unwrap_array(cursor); +} +EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 039f016b57be..59628add93f5 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -4,27 +4,19 @@ * Copyright (C) 2022 Advanced Micro Devices, Inc. */
+#include <linux/dma-fence.h> +#include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> -#if 0 -#include <linux/kernel.h> -#include <linux/kthread.h> -#include <linux/mm.h> -#include <linux/sched/signal.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/random.h> -#endif
#include "selftest.h"
#define CHAIN_SZ (4 << 10)
-static inline struct mock_fence { +struct mock_fence { struct dma_fence base; spinlock_t lock; -} *to_mock_fence(struct dma_fence *f) { - return container_of(f, struct mock_fence, base); -} +};
static const char *mock_name(struct dma_fence *f) { @@ -45,7 +37,8 @@ static struct dma_fence *mock_fence(void) return NULL;
spin_lock_init(&f->lock); - dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0); + dma_fence_init(&f->base, &mock_ops, &f->lock, + dma_fence_context_alloc(1), 1);
return &f->base; } @@ -113,7 +106,6 @@ static int sanitycheck(void *arg) if (!chain) return -ENOMEM;
- dma_fence_signal(f); dma_fence_put(chain); return err; } @@ -154,10 +146,10 @@ static int unwrap_array(void *arg) err = -EINVAL; }
- dma_fence_signal(f1); - dma_fence_signal(f2); + dma_fence_put(f1); + dma_fence_put(f2); dma_fence_put(array); - return 0; + return err; }
static int unwrap_chain(void *arg) @@ -196,10 +188,10 @@ static int unwrap_chain(void *arg) err = -EINVAL; }
- dma_fence_signal(f1); - dma_fence_signal(f2); + dma_fence_put(f1); + dma_fence_put(f2); dma_fence_put(chain); - return 0; + return err; }
static int unwrap_chain_array(void *arg) @@ -242,10 +234,10 @@ static int unwrap_chain_array(void *arg) err = -EINVAL; }
- dma_fence_signal(f1); - dma_fence_signal(f2); + dma_fence_put(f1); + dma_fence_put(f2); dma_fence_put(chain); - return 0; + return err; }
int dma_fence_unwrap(void) diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 77e335a1bcac..e7c219da4ed7 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * fence-chain: chain fences together in a timeline - * * Copyright (C) 2022 Advanced Micro Devices, Inc. * Authors: * Christian König christian.koenig@amd.com @@ -10,8 +8,7 @@ #ifndef __LINUX_DMA_FENCE_UNWRAP_H #define __LINUX_DMA_FENCE_UNWRAP_H
-#include <linux/dma-fence-chain.h> -#include <linux/dma-fence-array.h> +struct dma_fence;
/** * struct dma_fence_unwrap - cursor into the container structure @@ -33,50 +30,9 @@ struct dma_fence_unwrap { unsigned int index; };
-/* Internal helper to start new array iteration, don't use directly */ -static inline struct dma_fence * -__dma_fence_unwrap_array(struct dma_fence_unwrap * cursor) -{ - cursor->array = dma_fence_chain_contained(cursor->chain); - cursor->index = 0; - return dma_fence_array_first(cursor->array); -} - -/** - * dma_fence_unwrap_first - return the first fence from fence containers - * @head: the entrypoint into the containers - * @cursor: current position inside the containers - * - * Unwraps potential dma_fence_chain/dma_fence_array containers and return the - * first fence. - */ -static inline struct dma_fence * -dma_fence_unwrap_first(struct dma_fence *head, struct dma_fence_unwrap *cursor) -{ - cursor->chain = dma_fence_get(head); - return __dma_fence_unwrap_array(cursor); -} - -/** - * dma_fence_unwrap_next - return the next fence from a fence containers - * @cursor: current position inside the containers - * - * Continue unwrapping the dma_fence_chain/dma_fence_array containers and return - * the next fence from them. - */ -static inline struct dma_fence * -dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) -{ - struct dma_fence *tmp; - - ++cursor->index; - tmp = dma_fence_array_next(cursor->array, cursor->index); - if (tmp) - return tmp; - - cursor->chain = dma_fence_chain_walk(cursor->chain); - return __dma_fence_unwrap_array(cursor); -} +struct dma_fence *dma_fence_unwrap_first(struct dma_fence *head, + struct dma_fence_unwrap *cursor); +struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor);
/** * dma_fence_unwrap_for_each - iterate over all fences in containers
On Tue, Apr 26, 2022 at 02:46:36PM +0200, Christian König wrote:
Move the code from the inline functions into exported functions.
While at it also cleanup the the selftests, fix the error handling, remove unused functions and stop leaking memory in failed tests.
Can you split this out? At least I'm not seeing why this has to be all smashed into one patch. -Daniel
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/Makefile | 2 +- drivers/dma-buf/dma-fence-unwrap.c | 59 +++++++++++++++++++++++++++ drivers/dma-buf/st-dma-fence-unwrap.c | 40 ++++++++---------- include/linux/dma-fence-unwrap.h | 52 ++--------------------- 4 files changed, 80 insertions(+), 73 deletions(-) create mode 100644 drivers/dma-buf/dma-fence-unwrap.c
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 4c9eb53ba3f8..70ec901edf2c 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
dma-resv.o
dma-fence-unwrap.o dma-resv.o
obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o obj-$(CONFIG_DMABUF_HEAPS) += heaps/ obj-$(CONFIG_SYNC_FILE) += sync_file.o diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c new file mode 100644 index 000000000000..711be125428c --- /dev/null +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/*
- dma-fence-util: misc functions for dma_fence objects
- Copyright (C) 2022 Advanced Micro Devices, Inc.
- Authors:
- Christian König christian.koenig@amd.com
- */
+#include <linux/dma-fence.h> +#include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> +#include <linux/dma-fence-unwrap.h>
+/* Internal helper to start new array iteration, don't use directly */ +static struct dma_fence * +__dma_fence_unwrap_array(struct dma_fence_unwrap *cursor) +{
- cursor->array = dma_fence_chain_contained(cursor->chain);
- cursor->index = 0;
- return dma_fence_array_first(cursor->array);
+}
+/**
- dma_fence_unwrap_first - return the first fence from fence containers
- @head: the entrypoint into the containers
- @cursor: current position inside the containers
- Unwraps potential dma_fence_chain/dma_fence_array containers and return the
- first fence.
- */
+struct dma_fence *dma_fence_unwrap_first(struct dma_fence *head,
struct dma_fence_unwrap *cursor)
+{
- cursor->chain = dma_fence_get(head);
- return __dma_fence_unwrap_array(cursor);
+} +EXPORT_SYMBOL_GPL(dma_fence_unwrap_first);
+/**
- dma_fence_unwrap_next - return the next fence from a fence containers
- @cursor: current position inside the containers
- Continue unwrapping the dma_fence_chain/dma_fence_array containers and return
- the next fence from them.
- */
+struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) +{
- struct dma_fence *tmp;
- ++cursor->index;
- tmp = dma_fence_array_next(cursor->array, cursor->index);
- if (tmp)
return tmp;
- cursor->chain = dma_fence_chain_walk(cursor->chain);
- return __dma_fence_unwrap_array(cursor);
+} +EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 039f016b57be..59628add93f5 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -4,27 +4,19 @@
- Copyright (C) 2022 Advanced Micro Devices, Inc.
*/ +#include <linux/dma-fence.h> +#include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> -#if 0 -#include <linux/kernel.h> -#include <linux/kthread.h> -#include <linux/mm.h> -#include <linux/sched/signal.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/random.h> -#endif #include "selftest.h" #define CHAIN_SZ (4 << 10) -static inline struct mock_fence { +struct mock_fence { struct dma_fence base; spinlock_t lock; -} *to_mock_fence(struct dma_fence *f) {
- return container_of(f, struct mock_fence, base);
-} +}; static const char *mock_name(struct dma_fence *f) { @@ -45,7 +37,8 @@ static struct dma_fence *mock_fence(void) return NULL; spin_lock_init(&f->lock);
- dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
- dma_fence_init(&f->base, &mock_ops, &f->lock,
dma_fence_context_alloc(1), 1);
return &f->base; } @@ -113,7 +106,6 @@ static int sanitycheck(void *arg) if (!chain) return -ENOMEM;
- dma_fence_signal(f); dma_fence_put(chain); return err;
} @@ -154,10 +146,10 @@ static int unwrap_array(void *arg) err = -EINVAL; }
- dma_fence_signal(f1);
- dma_fence_signal(f2);
- dma_fence_put(f1);
- dma_fence_put(f2); dma_fence_put(array);
- return 0;
- return err;
} static int unwrap_chain(void *arg) @@ -196,10 +188,10 @@ static int unwrap_chain(void *arg) err = -EINVAL; }
- dma_fence_signal(f1);
- dma_fence_signal(f2);
- dma_fence_put(f1);
- dma_fence_put(f2); dma_fence_put(chain);
- return 0;
- return err;
} static int unwrap_chain_array(void *arg) @@ -242,10 +234,10 @@ static int unwrap_chain_array(void *arg) err = -EINVAL; }
- dma_fence_signal(f1);
- dma_fence_signal(f2);
- dma_fence_put(f1);
- dma_fence_put(f2); dma_fence_put(chain);
- return 0;
- return err;
} int dma_fence_unwrap(void) diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 77e335a1bcac..e7c219da4ed7 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /*
- fence-chain: chain fences together in a timeline
- Copyright (C) 2022 Advanced Micro Devices, Inc.
- Authors:
- Christian König christian.koenig@amd.com
@@ -10,8 +8,7 @@ #ifndef __LINUX_DMA_FENCE_UNWRAP_H #define __LINUX_DMA_FENCE_UNWRAP_H -#include <linux/dma-fence-chain.h> -#include <linux/dma-fence-array.h> +struct dma_fence; /**
- struct dma_fence_unwrap - cursor into the container structure
@@ -33,50 +30,9 @@ struct dma_fence_unwrap { unsigned int index; }; -/* Internal helper to start new array iteration, don't use directly */ -static inline struct dma_fence * -__dma_fence_unwrap_array(struct dma_fence_unwrap * cursor) -{
- cursor->array = dma_fence_chain_contained(cursor->chain);
- cursor->index = 0;
- return dma_fence_array_first(cursor->array);
-}
-/**
- dma_fence_unwrap_first - return the first fence from fence containers
- @head: the entrypoint into the containers
- @cursor: current position inside the containers
- Unwraps potential dma_fence_chain/dma_fence_array containers and return the
- first fence.
- */
-static inline struct dma_fence * -dma_fence_unwrap_first(struct dma_fence *head, struct dma_fence_unwrap *cursor) -{
- cursor->chain = dma_fence_get(head);
- return __dma_fence_unwrap_array(cursor);
-}
-/**
- dma_fence_unwrap_next - return the next fence from a fence containers
- @cursor: current position inside the containers
- Continue unwrapping the dma_fence_chain/dma_fence_array containers and return
- the next fence from them.
- */
-static inline struct dma_fence * -dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) -{
- struct dma_fence *tmp;
- ++cursor->index;
- tmp = dma_fence_array_next(cursor->array, cursor->index);
- if (tmp)
return tmp;
- cursor->chain = dma_fence_chain_walk(cursor->chain);
- return __dma_fence_unwrap_array(cursor);
-} +struct dma_fence *dma_fence_unwrap_first(struct dma_fence *head,
struct dma_fence_unwrap *cursor);
+struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); /**
- dma_fence_unwrap_for_each - iterate over all fences in containers
-- 2.25.1
Introduce a dma_fence_merge() macro which allows to unwrap fences which potentially can be containers as well and then merge them back together into a flat dma_fence_array.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-unwrap.c | 95 ++++++++++++++++++++ drivers/dma-buf/st-dma-fence-unwrap.c | 47 ++++++++++ drivers/dma-buf/sync_file.c | 119 ++------------------------ include/linux/dma-fence-unwrap.h | 24 ++++++ 4 files changed, 172 insertions(+), 113 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 711be125428c..c9becc74896d 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -11,6 +11,7 @@ #include <linux/dma-fence-array.h> #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> +#include <linux/slab.h>
/* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -57,3 +58,97 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) return __dma_fence_unwrap_array(cursor); } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); + +/* Implementation for the dma_fence_merge() marco, don't use directly */ +struct dma_fence *__dma_fence_merge(unsigned int num_fences, + struct dma_fence **fences, + struct dma_fence_unwrap *iter) +{ + struct dma_fence_array *result; + struct dma_fence *tmp, **array; + unsigned int i, count; + + count = 0; + for (i = 0; i < num_fences; ++i) { + dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) + if (!dma_fence_is_signaled(tmp)) + ++count; + } + + if (count == 0) + return dma_fence_get_stub(); + + if (count > INT_MAX) + return NULL; + + array = kmalloc_array(count, sizeof(*array), GFP_KERNEL); + if (!array) + return NULL; + + /* + * We can't guarantee that inpute fences are ordered by context, but + * it is still quite likely when this function is used multiple times. + * So attempt to order the fences by context as we pass over them and + * merge fences with the same context. + */ + for (i = 0; i < num_fences; ++i) + fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]); + + count = 0; + do { + unsigned int sel; + +restart: + tmp = NULL; + for (i = 0; i < num_fences; ++i) { + struct dma_fence *next = fences[i]; + + if (!next || dma_fence_is_signaled(next)) + continue; + + if (!tmp || tmp->context > next->context) { + tmp = next; + sel = i; + + } else if (tmp->context < next->context) { + continue; + + } else if (dma_fence_is_later(tmp, next)) { + fences[i] = dma_fence_unwrap_next(&iter[i]); + goto restart; + } else { + fences[sel] = dma_fence_unwrap_next(&iter[sel]); + goto restart; + } + } + + if (tmp) { + array[count++] = dma_fence_get(tmp); + fences[sel] = dma_fence_unwrap_next(&iter[sel]); + } + } while (tmp); + + if (count == 0) { + tmp = dma_fence_get_stub(); + goto return_tmp; + } + + if (count == 1) { + tmp = array[0]; + goto return_tmp; + } + + result = dma_fence_array_create(count, array, + dma_fence_context_alloc(1), + 1, false); + if (!result) { + tmp = NULL; + goto return_tmp; + } + return &result->base; + +return_tmp: + kfree(array); + return tmp; +} +EXPORT_SYMBOL_GPL(__dma_fence_merge); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 59628add93f5..23ab134417ed 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -240,6 +240,52 @@ static int unwrap_chain_array(void *arg) return err; }
+static int unwrap_merge(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *f3; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + err = -ENOMEM; + goto error_put_f1; + } + + f3 = dma_fence_merge(f1, f2); + if (!f3) { + err = -ENOMEM; + goto error_put_f2; + } + + dma_fence_unwrap_for_each(fence, &iter, f3) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_put(f3); +error_put_f2: + dma_fence_put(f2); +error_put_f1: + dma_fence_put(f1); + return err; +} + int dma_fence_unwrap(void) { static const struct subtest tests[] = { @@ -247,6 +293,7 @@ int dma_fence_unwrap(void) SUBTEST(unwrap_array), SUBTEST(unwrap_chain), SUBTEST(unwrap_chain_array), + SUBTEST(unwrap_merge), };
return subtests(tests, NULL); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 0fe564539166..fe149d7e3ce2 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -146,50 +146,6 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) return buf; }
-static int sync_file_set_fence(struct sync_file *sync_file, - struct dma_fence **fences, int num_fences) -{ - struct dma_fence_array *array; - - /* - * The reference for the fences in the new sync_file and held - * in add_fence() during the merge procedure, so for num_fences == 1 - * we already own a new reference to the fence. For num_fence > 1 - * we own the reference of the dma_fence_array creation. - */ - - if (num_fences == 0) { - sync_file->fence = dma_fence_get_stub(); - kfree(fences); - - } else if (num_fences == 1) { - sync_file->fence = fences[0]; - kfree(fences); - - } else { - array = dma_fence_array_create(num_fences, fences, - dma_fence_context_alloc(1), - 1, false); - if (!array) - return -ENOMEM; - - sync_file->fence = &array->base; - } - - return 0; -} - -static void add_fence(struct dma_fence **fences, - int *i, struct dma_fence *fence) -{ - fences[*i] = fence; - - if (!dma_fence_is_signaled(fence)) { - dma_fence_get(fence); - (*i)++; - } -} - /** * sync_file_merge() - merge two sync_files * @name: name of new fence @@ -203,84 +159,21 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { - struct dma_fence *a_fence, *b_fence, **fences; - struct dma_fence_unwrap a_iter, b_iter; - unsigned int index, num_fences; struct sync_file *sync_file; + struct dma_fence *fence;
sync_file = sync_file_alloc(); if (!sync_file) return NULL;
- num_fences = 0; - dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence) - ++num_fences; - dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence) - ++num_fences; - - if (num_fences > INT_MAX) - goto err_free_sync_file; - - fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); - if (!fences) - goto err_free_sync_file; - - /* - * We can't guarantee that fences in both a and b are ordered, but it is - * still quite likely. - * - * So attempt to order the fences as we pass over them and merge fences - * with the same context. - */ - - index = 0; - for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter), - b_fence = dma_fence_unwrap_first(b->fence, &b_iter); - a_fence || b_fence; ) { - - if (!b_fence) { - add_fence(fences, &index, a_fence); - a_fence = dma_fence_unwrap_next(&a_iter); - - } else if (!a_fence) { - add_fence(fences, &index, b_fence); - b_fence = dma_fence_unwrap_next(&b_iter); - - } else if (a_fence->context < b_fence->context) { - add_fence(fences, &index, a_fence); - a_fence = dma_fence_unwrap_next(&a_iter); - - } else if (b_fence->context < a_fence->context) { - add_fence(fences, &index, b_fence); - b_fence = dma_fence_unwrap_next(&b_iter); - - } else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno, - a_fence->ops)) { - add_fence(fences, &index, a_fence); - a_fence = dma_fence_unwrap_next(&a_iter); - b_fence = dma_fence_unwrap_next(&b_iter); - - } else { - add_fence(fences, &index, b_fence); - a_fence = dma_fence_unwrap_next(&a_iter); - b_fence = dma_fence_unwrap_next(&b_iter); - } + fence = dma_fence_merge(a->fence, b->fence); + if (!fence) { + fput(sync_file->file); + return NULL; } - - if (sync_file_set_fence(sync_file, fences, index) < 0) - goto err_put_fences; - + sync_file->fence = fence; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; - -err_put_fences: - while (index) - dma_fence_put(fences[--index]); - kfree(fences); - -err_free_sync_file: - fput(sync_file->file); - return NULL; }
static int sync_file_release(struct inode *inode, struct file *file) diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index e7c219da4ed7..7c0fab318301 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -48,4 +48,28 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); for (fence = dma_fence_unwrap_first(head, cursor); fence; \ fence = dma_fence_unwrap_next(cursor))
+struct dma_fence *__dma_fence_merge(unsigned int num_fences, + struct dma_fence **fences, + struct dma_fence_unwrap *cursors); + +/** + * dma_fence_merge - unwrap and merge fences + * + * All fences given as parameters are unwrapped and merged back together as flat + * dma_fence_array. Useful if multiple containers need to be merged together. + * + * Implemented as a macro to allocate the necessary arrays on the stack and + * account the stack frame size to the caller. + * + * Returns NULL on memory allocation failure, a dma_fence object representing + * all the given fences otherwise. + */ +#define dma_fence_merge(...) \ + ({ \ + struct dma_fence *__f[] = { __VA_ARGS__ }; \ + struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \ + \ + __dma_fence_merge(ARRAY_SIZE(__f), __f, __c); \ + }) + #endif
Greeting,
FYI, we noticed the following commit (built with gcc-11):
commit: 81503894b26294f97f732563263752ea44add7c7 ("[PATCH 3/3] dma-buf: generalize fence merging") url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/dma-buf-sync_... base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git d615b5416f8a1afeb82d13b238f8152c572d59c0 patch link: https://lore.kernel.org/dri-devel/20220426124637.329764-3-christian.koenig@a...
in testcase: igt version: igt-x86_64-c2b13bab-1_20220427 with following parameters:
group: group-04 ucode: 0xec
on test machine: 12 threads 1 sockets Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz with 32G memory
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
If you fix the issue, kindly add following tag Reported-by: kernel test robot oliver.sang@intel.com
[ 55.768078][ T2464] ------------[ cut here ]------------ [ 55.780592][ T439] jbd2_revoke_record_s 0 0 64 64 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.781248][ T2464] Memory manager not clean during takedown. [ 55.781269][ T2464] WARNING: CPU: 11 PID: 2464 at drivers/gpu/drm/drm_mm.c:999 drm_mm_takedown+0x3d/0x80 [drm] [ 55.786549][ T439] [ 55.788115][ T439] ext4_fc_dentry_update 0 0 128 32 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.798170][ T2464] Modules linked in: btrfs [ 55.803906][ T439] [ 55.813867][ T2464] blake2b_generic xor raid6_pq zstd_compress libcrc32c [ 55.817555][ T439] ext4_inode_cache 0 0 1304 25 8 : tunables 0 0 0 : slabdata 0 0 0 [ 55.827752][ T2464] i915(-) intel_gtt [ 55.832010][ T439] [ 55.833493][ T439] ext4_free_data 0 0 88 46 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.834196][ T2464] ipmi_devintf drm_buddy [ 55.840960][ T439] [ 55.842533][ T439] ext4_allocation_context 0 0 208 39 2 : tunables 0 0 0 : slabdata 0 0 0 [ 55.852294][ T2464] ipmi_msghandler drm_dp_helper [ 55.856043][ T439] [ 55.858224][ T2464] sd_mod ttm t10_pi drm_kms_helper crc64_rocksoft_generic crc64_rocksoft crc64 syscopyarea sg [ 55.871096][ T439] ext4_prealloc_space 0 0 168 24 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.873753][ T2464] sysfillrect sysimgblt [ 55.875940][ T439] [ 55.887787][ T2464] intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass [ 55.894065][ T439] ext4_system_zone 0 0 56 73 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.894774][ T2464] crct10dif_pclmul [ 55.904905][ T439] [ 55.916416][ T2464] crc32_pclmul crc32c_intel intel_wmi_thunderbolt wmi_bmof fb_sys_fops ghash_clmulni_intel rapl intel_cstate [ 55.921993][ T439] ext4_io_end_vec 0 0 48 85 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.922703][ T2464] ahci mei_wdt libahci intel_uncore [ 55.933870][ T439] [ 55.945209][ T2464] mei_me i2c_designware_platform libata i2c_designware_core drm idma64 mei intel_pch_thermal [ 55.950355][ T439] ext4_io_end 0 0 96 42 1 : tunables 0 0 0 : slabdata 0 0 0 [ 55.951079][ T2464] wmi video [ 55.962498][ T439] [ 55.964087][ T439] ext4_bio_post_read_ctx 128 128 64 64 1 : tunables 0 0 0 : slabdata 2 2 0 [ 55.973846][ T2464] intel_pmc_core acpi_pad ip_tables [ 55.973849][ T2464] CPU: 11 PID: 2464 Comm: i915_module_loa Not tainted 5.18.0-rc4-00009-g81503894b262 #1 [ 55.978971][ T439] [ 55.981161][ T2464] Hardware name: Dell Inc. OptiPlex 7060/0C96W1, BIOS 1.4.2 06/11/2019 [ 55.981163][ T2464] RIP: 0010:drm_mm_takedown+0x3d/0x80 [drm] [ 55.992780][ T439] ext4_pending_reservation 0 0 48 85 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.002540][ T2464] Code: 38 48 89 ea 53 48 89 fb 48 c1 ea 03 80 3c 02 00 75 1d 48 8b 43 38 48 39 c5 75 03 5b 5d c3 48 c7 c7 40 47 52 c0 e8 40 f7 01 c3 <0f> 0b 5b 5d c3 48 89 ef e8 36 6b 46 c1 eb d9 66 66 2e 0f 1f 84 00 [ 56.005591][ T439] [ 56.007091][ T439] ext4_extent_status 0 0 56 73 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.007783][ T2464] RSP: 0018:ffffc9000661fb70 EFLAGS: 00010286 [ 56.019562][ T439] [ 56.024695][ T2464] [ 56.024696][ T2464] RAX: 0000000000000000 RBX: ffff8882890a5690 RCX: 0000000000000000 [ 56.024698][ T2464] RDX: 0000000000000027 RSI: 0000000000000004 RDI: fffff52000cc3f60 [ 56.035705][ T439] mbcache 0 0 88 46 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.036430][ T2464] RBP: ffff8882890a56c8 R08: 0000000000000001 R09: ffff8887357a774b [ 56.044496][ T439] [ 56.045973][ T439] io_kiocb 0 0 320 25 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.050234][ T2464] R10: ffffed10e6af4ee9 R11: 0000000000000001 R12: 0000000000000002 [ 56.050236][ T2464] R13: ffffffffc128b2c0 R14: dffffc0000000000 R15: ffff8882890a6458 [ 56.062182][ T439] [ 56.081547][ T2464] FS: 00007f43b5b46c00(0000) GS:ffff888735780000(0000) knlGS:0000000000000000 [ 56.081549][ T2464] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 56.085225][ T439] kioctx 0 0 704 23 4 : tunables 0 0 0 : slabdata 0 0 0 [ 56.095174][ T2464] CR2: 0000565235713758 CR3: 0000000209e8e006 CR4: 00000000003706e0 [ 56.101072][ T439] [ 56.103271][ T2464] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 56.103272][ T2464] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 56.103273][ T2464] Call Trace: [ 56.103275][ T2464] <TASK> [ 56.106939][ T439] aio_kiocb 0 0 256 32 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.113257][ T2464] release_stolen_smem+0x5a/0x80 [i915] [ 56.121055][ T439] [ 56.122589][ T439] userfaultfd_ctx_cache 0 0 192 21 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.132403][ T2464] intel_memory_regions_driver_release+0x9e/0x180 [i915] [ 56.140200][ T439] [ 56.141703][ T439] fanotify_perm_event 0 0 104 39 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.142400][ T2464] ? i915_gem_driver_release+0xca/0x200 [i915] [ 56.153732][ T439] [ 56.155246][ T439] fanotify_path_event 0 0 96 42 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.161524][ T2464] i915_driver_release+0x8f/0x1c0 [i915] [ 56.169331][ T439] [ 56.171523][ T2464] devm_drm_dev_init_release+0x82/0x100 [drm] [ 56.181764][ T439] fanotify_fid_event 0 0 104 39 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.186693][ T2464] release_nodes+0xb1/0x240 [ 56.198030][ T439] [ 56.199507][ T439] fsnotify_mark 0 0 104 39 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.205837][ T2464] devres_release_all+0x106/0x180 [ 56.208025][ T439] [ 56.209502][ T439] dnotify_mark 0 0 112 36 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.215824][ T2464] ? devres_remove_group+0x580/0x580 [ 56.223618][ T439] [ 56.225096][ T439] dnotify_struct 0 0 48 85 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.226759][ T2464] ? mutex_unlock+0x80/0x100 [ 56.229555][ T439] [ 56.231042][ T439] dio 0 0 768 21 4 : tunables 0 0 0 : slabdata 0 0 0 [ 56.240884][ T2464] ? __mutex_unlock_slowpath+0x2c0/0x2c0 [ 56.246266][ T439] [ 56.247756][ T439] fasync_cache 0 0 64 64 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.248450][ T2464] device_unbind_cleanup+0x16/0x1c0 [ 56.260144][ T439] [ 56.261618][ T439] audit_tree_mark 0 0 112 36 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.267009][ T2464] device_release_driver_internal+0x3cb/0x5c0 [ 56.269199][ T439] [ 56.270674][ T439] pid_namespace 0 0 200 20 1 : tunables 0 0 0 : slabdata 0 0 0 [ 56.280721][ T2464] driver_detach+0xc0/0x180 [ 56.286712][ T439] [ 56.288203][ T439] posix_timers_cache 0 0 320 25 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.288904][ T2464] ? i915_pci_register_driver+0x40/0x40 [i915] [ 56.300410][ T439] [ 56.305889][ T2464] bus_remove_driver+0xe4/0x300 [ 56.305892][ T2464] ? i915_pci_register_driver+0x40/0x40 [i915] [ 56.309558][ T439] rpc_inode_cache 39 39 832 39 8 : tunables 0 0 0 : slabdata 1 1 0 [ 56.313981][ T2464] pci_unregister_driver+0x26/0x280 [ 56.325402][ T439] [ 56.326876][ T439] rpc_buffers 15 15 2176 15 8 : tunables 0 0 0 : slabdata 1 1 0 [ 56.329754][ T2464] ? unregister_sysctl_table+0x7d/0x180 [ 56.331946][ T439] [ 56.333425][ T439] rpc_tasks 25 25 320 25 2 : tunables 0 0 0 : slabdata 1 1 0 [ 56.343288][ T2464] ? i915_pci_register_driver+0x40/0x40 [i915] [ 56.348153][ T439] [ 56.349631][ T439] UNIX-STREAM 601 728 1216 26 8 : tunables 0 0 0 : slabdata 28 28 0 [ 56.350344][ T2464] i915_exit+0x7e/0xc8 [i915] [ 56.361680][ T439] [ 56.363158][ T439] UNIX 312 312 1216 26 8 : tunables 0 0 0 : slabdata 12 12 0 [ 56.366810][ T2464] __do_sys_delete_module+0x2d6/0x500 [ 56.369004][ T439] [ 56.370479][ T439] ip4-frags 0 0 264 31 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.380352][ T2464] ? free_module+0x5c0/0x5c0 [ 56.384785][ T439] [ 56.386263][ T439] ip_mrt_cache 0 0 256 32 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.386982][ T2464] ? task_work_run+0xef/0x180 [ 56.398319][ T439] [ 56.404828][ T2464] ? exit_to_user_mode_loop+0xbc/0x140 [ 56.408487][ T439] UDP-Lite 0 0 1280 25 8 : tunables 0 0 0 : slabdata 0 0 0 [ 56.418351][ T2464] do_syscall_64+0x3b/0xc0 [ 56.423390][ T439] [ 56.424864][ T439] tcp_bind_bucket 96 96 128 32 1 : tunables 0 0 0 : slabdata 3 3 0 [ 56.425572][ T2464] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 56.436951][ T439] [ 56.442815][ T2464] RIP: 0033:0x7f43b94f3807 [ 56.442818][ T2464] Code: 73 01 c3 48 8b 0d 89 06 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 59 06 0c 00 f7 d8 64 89 01 48 [ 56.446526][ T439] inet_peer_cache 0 0 256 32 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.456370][ T2464] RSP: 002b:00007ffcb1ddb9c8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 56.460718][ T439] [ 56.462909][ T2464] RAX: ffffffffffffffda RBX: 000056523570dbc0 RCX: 00007f43b94f3807 [ 56.462910][ T2464] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000056523570dc28 [ 56.462912][ T2464] RBP: 00007f43b967ffc2 R08: 000056523570dc28 R09: 00007f43b9573e80 [ 56.462913][ T2464] R10: fffffffffffff80d R11: 0000000000000206 R12: 0000000000000000 [ 56.462914][ T2464] R13: 0000000000000000 R14: 0000000000000000 R15: 000056523570dbc0 [ 56.475823][ T439] xfrm_dst_cache 0 0 384 21 2 : tunables 0 0 0 : slabdata 0 0 0 [ 56.480333][ T2464] </TASK> [ 56.482522][ T439] [ 56.487204][ T2464] ---[ end trace 0000000000000000 ]---
...
[ 72.764392][ T2464] ============================================================================= [ 72.773226][ T2464] BUG i915_vma_resource (Tainted: G B W ): Objects remaining in i915_vma_resource on __kmem_cache_shutdown() [ 72.785604][ T2464] ----------------------------------------------------------------------------- [ 72.785604][ T2464] [ 72.796594][ T2464] Slab 0x000000005697c6d8 objects=21 used=2 fp=0x0000000014a9417a flags=0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) [ 72.810086][ T2464] CPU: 0 PID: 2464 Comm: i915_module_loa Tainted: G B W 5.18.0-rc4-00009-g81503894b262 #1 [ 72.820905][ T2464] Hardware name: Dell Inc. OptiPlex 7060/0C96W1, BIOS 1.4.2 06/11/2019 [ 72.828960][ T2464] Call Trace: [ 72.832099][ T2464] <TASK> [ 72.834887][ T2464] dump_stack_lvl+0x34/0x44 [ 72.839227][ T2464] slab_err+0x95/0x100 [ 72.843147][ T2464] ? _raw_write_lock_irq+0x100/0x100 [ 72.848273][ T2464] __kmem_cache_shutdown+0x176/0x380 [ 72.853393][ T2464] ? i915_vma_resource_bind_dep_await+0x2c0/0x2c0 [i915] [ 72.860329][ T2464] kmem_cache_destroy+0x53/0x140 [ 72.865103][ T2464] i915_exit+0x7e/0xc8 [i915] [ 72.869708][ T2464] __do_sys_delete_module+0x2d6/0x500 [ 72.875946][ T2464] ? free_module+0x5c0/0x5c0 [ 72.880379][ T2464] ? task_work_run+0xef/0x180 [ 72.884897][ T2464] ? exit_to_user_mode_loop+0xbc/0x140 [ 72.890187][ T2464] do_syscall_64+0x3b/0xc0 [ 72.894440][ T2464] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 72.900168][ T2464] RIP: 0033:0x7f43b94f3807 [ 72.904421][ T2464] Code: 73 01 c3 48 8b 0d 89 06 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 59 06 0c 00 f7 d8 64 89 01 48 [ 72.923778][ T2464] RSP: 002b:00007ffcb1ddb9c8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 72.932001][ T2464] RAX: ffffffffffffffda RBX: 000056523570dbc0 RCX: 00007f43b94f3807 [ 72.939790][ T2464] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000056523570dc28 [ 72.947592][ T2464] RBP: 00007f43b967ffc2 R08: 000056523570dc28 R09: 00007f43b9573e80 [ 72.955389][ T2464] R10: fffffffffffff80d R11: 0000000000000206 R12: 0000000000000000 [ 72.963187][ T2464] R13: 0000000000000000 R14: 0000000000000000 R15: 000056523570dbc0 [ 72.970980][ T2464] </TASK> [ 72.973851][ T2464] Object 0x000000008bd30ce1 @offset=0 [ 72.979061][ T2464] Object 0x00000000530ee48b @offset=768
To reproduce:
git clone https://github.com/intel/lkp-tests.git cd lkp-tests sudo bin/lkp install job.yaml # job file is attached in this email bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run sudo bin/lkp run generated-yaml-file
# if come across any failure that blocks the test, # please remove ~/.lkp and /lkp dir to run from a clean state.
On Tue, Apr 26, 2022 at 02:46:37PM +0200, Christian König wrote:
Introduce a dma_fence_merge() macro which allows to unwrap fences which potentially can be containers as well and then merge them back together into a flat dma_fence_array.
Signed-off-by: Christian König christian.koenig@amd.com
So this is really neat (the va args macro trick especially), but I'm not sure how much use it is with just one user. Is there like more planned? Or is the idea to make merging consistent so that the context sorting trick can be done consistently? -Daniel
drivers/dma-buf/dma-fence-unwrap.c | 95 ++++++++++++++++++++ drivers/dma-buf/st-dma-fence-unwrap.c | 47 ++++++++++ drivers/dma-buf/sync_file.c | 119 ++------------------------ include/linux/dma-fence-unwrap.h | 24 ++++++ 4 files changed, 172 insertions(+), 113 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 711be125428c..c9becc74896d 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -11,6 +11,7 @@ #include <linux/dma-fence-array.h> #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> +#include <linux/slab.h> /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -57,3 +58,97 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) return __dma_fence_unwrap_array(cursor); } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next);
+/* Implementation for the dma_fence_merge() marco, don't use directly */ +struct dma_fence *__dma_fence_merge(unsigned int num_fences,
struct dma_fence **fences,
struct dma_fence_unwrap *iter)
+{
- struct dma_fence_array *result;
- struct dma_fence *tmp, **array;
- unsigned int i, count;
- count = 0;
- for (i = 0; i < num_fences; ++i) {
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i])
if (!dma_fence_is_signaled(tmp))
++count;
- }
- if (count == 0)
return dma_fence_get_stub();
- if (count > INT_MAX)
return NULL;
- array = kmalloc_array(count, sizeof(*array), GFP_KERNEL);
- if (!array)
return NULL;
- /*
* We can't guarantee that inpute fences are ordered by context, but
* it is still quite likely when this function is used multiple times.
* So attempt to order the fences by context as we pass over them and
* merge fences with the same context.
*/
- for (i = 0; i < num_fences; ++i)
fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
- count = 0;
- do {
unsigned int sel;
+restart:
tmp = NULL;
for (i = 0; i < num_fences; ++i) {
struct dma_fence *next = fences[i];
if (!next || dma_fence_is_signaled(next))
continue;
if (!tmp || tmp->context > next->context) {
tmp = next;
sel = i;
} else if (tmp->context < next->context) {
continue;
} else if (dma_fence_is_later(tmp, next)) {
fences[i] = dma_fence_unwrap_next(&iter[i]);
goto restart;
} else {
fences[sel] = dma_fence_unwrap_next(&iter[sel]);
goto restart;
}
}
if (tmp) {
array[count++] = dma_fence_get(tmp);
fences[sel] = dma_fence_unwrap_next(&iter[sel]);
}
- } while (tmp);
- if (count == 0) {
tmp = dma_fence_get_stub();
goto return_tmp;
- }
- if (count == 1) {
tmp = array[0];
goto return_tmp;
- }
- result = dma_fence_array_create(count, array,
dma_fence_context_alloc(1),
1, false);
- if (!result) {
tmp = NULL;
goto return_tmp;
- }
- return &result->base;
+return_tmp:
- kfree(array);
- return tmp;
+} +EXPORT_SYMBOL_GPL(__dma_fence_merge); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 59628add93f5..23ab134417ed 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -240,6 +240,52 @@ static int unwrap_chain_array(void *arg) return err; } +static int unwrap_merge(void *arg) +{
- struct dma_fence *fence, *f1, *f2, *f3;
- struct dma_fence_unwrap iter;
- int err = 0;
- f1 = mock_fence();
- if (!f1)
return -ENOMEM;
- f2 = mock_fence();
- if (!f2) {
err = -ENOMEM;
goto error_put_f1;
- }
- f3 = dma_fence_merge(f1, f2);
- if (!f3) {
err = -ENOMEM;
goto error_put_f2;
- }
- dma_fence_unwrap_for_each(fence, &iter, f3) {
if (fence == f1) {
f1 = NULL;
} else if (fence == f2) {
f2 = NULL;
} else {
pr_err("Unexpected fence!\n");
err = -EINVAL;
}
- }
- if (f1 || f2) {
pr_err("Not all fences seen!\n");
err = -EINVAL;
- }
- dma_fence_put(f3);
+error_put_f2:
- dma_fence_put(f2);
+error_put_f1:
- dma_fence_put(f1);
- return err;
+}
int dma_fence_unwrap(void) { static const struct subtest tests[] = { @@ -247,6 +293,7 @@ int dma_fence_unwrap(void) SUBTEST(unwrap_array), SUBTEST(unwrap_chain), SUBTEST(unwrap_chain_array),
};SUBTEST(unwrap_merge),
return subtests(tests, NULL); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 0fe564539166..fe149d7e3ce2 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -146,50 +146,6 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) return buf; } -static int sync_file_set_fence(struct sync_file *sync_file,
struct dma_fence **fences, int num_fences)
-{
- struct dma_fence_array *array;
- /*
* The reference for the fences in the new sync_file and held
* in add_fence() during the merge procedure, so for num_fences == 1
* we already own a new reference to the fence. For num_fence > 1
* we own the reference of the dma_fence_array creation.
*/
- if (num_fences == 0) {
sync_file->fence = dma_fence_get_stub();
kfree(fences);
- } else if (num_fences == 1) {
sync_file->fence = fences[0];
kfree(fences);
- } else {
array = dma_fence_array_create(num_fences, fences,
dma_fence_context_alloc(1),
1, false);
if (!array)
return -ENOMEM;
sync_file->fence = &array->base;
- }
- return 0;
-}
-static void add_fence(struct dma_fence **fences,
int *i, struct dma_fence *fence)
-{
- fences[*i] = fence;
- if (!dma_fence_is_signaled(fence)) {
dma_fence_get(fence);
(*i)++;
- }
-}
/**
- sync_file_merge() - merge two sync_files
- @name: name of new fence
@@ -203,84 +159,21 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) {
- struct dma_fence *a_fence, *b_fence, **fences;
- struct dma_fence_unwrap a_iter, b_iter;
- unsigned int index, num_fences; struct sync_file *sync_file;
- struct dma_fence *fence;
sync_file = sync_file_alloc(); if (!sync_file) return NULL;
- num_fences = 0;
- dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence)
++num_fences;
- dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence)
++num_fences;
- if (num_fences > INT_MAX)
goto err_free_sync_file;
- fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
- if (!fences)
goto err_free_sync_file;
- /*
* We can't guarantee that fences in both a and b are ordered, but it is
* still quite likely.
*
* So attempt to order the fences as we pass over them and merge fences
* with the same context.
*/
- index = 0;
- for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter),
b_fence = dma_fence_unwrap_first(b->fence, &b_iter);
a_fence || b_fence; ) {
if (!b_fence) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
} else if (!a_fence) {
add_fence(fences, &index, b_fence);
b_fence = dma_fence_unwrap_next(&b_iter);
} else if (a_fence->context < b_fence->context) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
} else if (b_fence->context < a_fence->context) {
add_fence(fences, &index, b_fence);
b_fence = dma_fence_unwrap_next(&b_iter);
} else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno,
a_fence->ops)) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
b_fence = dma_fence_unwrap_next(&b_iter);
} else {
add_fence(fences, &index, b_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
b_fence = dma_fence_unwrap_next(&b_iter);
}
- fence = dma_fence_merge(a->fence, b->fence);
- if (!fence) {
fput(sync_file->file);
}return NULL;
- if (sync_file_set_fence(sync_file, fences, index) < 0)
goto err_put_fences;
- sync_file->fence = fence; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file;
-err_put_fences:
- while (index)
dma_fence_put(fences[--index]);
- kfree(fences);
-err_free_sync_file:
- fput(sync_file->file);
- return NULL;
} static int sync_file_release(struct inode *inode, struct file *file) diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index e7c219da4ed7..7c0fab318301 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -48,4 +48,28 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); for (fence = dma_fence_unwrap_first(head, cursor); fence; \ fence = dma_fence_unwrap_next(cursor)) +struct dma_fence *__dma_fence_merge(unsigned int num_fences,
struct dma_fence **fences,
struct dma_fence_unwrap *cursors);
+/**
- dma_fence_merge - unwrap and merge fences
- All fences given as parameters are unwrapped and merged back together as flat
- dma_fence_array. Useful if multiple containers need to be merged together.
- Implemented as a macro to allocate the necessary arrays on the stack and
- account the stack frame size to the caller.
- Returns NULL on memory allocation failure, a dma_fence object representing
- all the given fences otherwise.
- */
+#define dma_fence_merge(...) \
- ({ \
struct dma_fence *__f[] = { __VA_ARGS__ }; \
struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \
\
__dma_fence_merge(ARRAY_SIZE(__f), __f, __c); \
- })
#endif
2.25.1
Am 04.05.22 um 10:43 schrieb Daniel Vetter:
On Tue, Apr 26, 2022 at 02:46:37PM +0200, Christian König wrote:
Introduce a dma_fence_merge() macro which allows to unwrap fences which potentially can be containers as well and then merge them back together into a flat dma_fence_array.
Signed-off-by: Christian König christian.koenig@amd.com
So this is really neat (the va args macro trick especially), but I'm not sure how much use it is with just one user. Is there like more planned?
We have another potential user of this in drm_syncobj_flatten_chain() and at least another potential cases in amdgpu come to my mind.
I just wanted to double check the general implementation before I start to use this more widely.
Or is the idea to make merging consistent so that the context sorting trick can be done consistently?
The context sorting trick is just a nice to have optimization. My main intention here is to have an utility function for flattening things out I can point people to which does the job and works reliable in all cases.
Christian.
-Daniel
drivers/dma-buf/dma-fence-unwrap.c | 95 ++++++++++++++++++++ drivers/dma-buf/st-dma-fence-unwrap.c | 47 ++++++++++ drivers/dma-buf/sync_file.c | 119 ++------------------------ include/linux/dma-fence-unwrap.h | 24 ++++++ 4 files changed, 172 insertions(+), 113 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 711be125428c..c9becc74896d 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -11,6 +11,7 @@ #include <linux/dma-fence-array.h> #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> +#include <linux/slab.h> /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -57,3 +58,97 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) return __dma_fence_unwrap_array(cursor); } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next);
+/* Implementation for the dma_fence_merge() marco, don't use directly */ +struct dma_fence *__dma_fence_merge(unsigned int num_fences,
struct dma_fence **fences,
struct dma_fence_unwrap *iter)
+{
- struct dma_fence_array *result;
- struct dma_fence *tmp, **array;
- unsigned int i, count;
- count = 0;
- for (i = 0; i < num_fences; ++i) {
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i])
if (!dma_fence_is_signaled(tmp))
++count;
- }
- if (count == 0)
return dma_fence_get_stub();
- if (count > INT_MAX)
return NULL;
- array = kmalloc_array(count, sizeof(*array), GFP_KERNEL);
- if (!array)
return NULL;
- /*
* We can't guarantee that inpute fences are ordered by context, but
* it is still quite likely when this function is used multiple times.
* So attempt to order the fences by context as we pass over them and
* merge fences with the same context.
*/
- for (i = 0; i < num_fences; ++i)
fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
- count = 0;
- do {
unsigned int sel;
+restart:
tmp = NULL;
for (i = 0; i < num_fences; ++i) {
struct dma_fence *next = fences[i];
if (!next || dma_fence_is_signaled(next))
continue;
if (!tmp || tmp->context > next->context) {
tmp = next;
sel = i;
} else if (tmp->context < next->context) {
continue;
} else if (dma_fence_is_later(tmp, next)) {
fences[i] = dma_fence_unwrap_next(&iter[i]);
goto restart;
} else {
fences[sel] = dma_fence_unwrap_next(&iter[sel]);
goto restart;
}
}
if (tmp) {
array[count++] = dma_fence_get(tmp);
fences[sel] = dma_fence_unwrap_next(&iter[sel]);
}
- } while (tmp);
- if (count == 0) {
tmp = dma_fence_get_stub();
goto return_tmp;
- }
- if (count == 1) {
tmp = array[0];
goto return_tmp;
- }
- result = dma_fence_array_create(count, array,
dma_fence_context_alloc(1),
1, false);
- if (!result) {
tmp = NULL;
goto return_tmp;
- }
- return &result->base;
+return_tmp:
- kfree(array);
- return tmp;
+} +EXPORT_SYMBOL_GPL(__dma_fence_merge); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 59628add93f5..23ab134417ed 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -240,6 +240,52 @@ static int unwrap_chain_array(void *arg) return err; } +static int unwrap_merge(void *arg) +{
- struct dma_fence *fence, *f1, *f2, *f3;
- struct dma_fence_unwrap iter;
- int err = 0;
- f1 = mock_fence();
- if (!f1)
return -ENOMEM;
- f2 = mock_fence();
- if (!f2) {
err = -ENOMEM;
goto error_put_f1;
- }
- f3 = dma_fence_merge(f1, f2);
- if (!f3) {
err = -ENOMEM;
goto error_put_f2;
- }
- dma_fence_unwrap_for_each(fence, &iter, f3) {
if (fence == f1) {
f1 = NULL;
} else if (fence == f2) {
f2 = NULL;
} else {
pr_err("Unexpected fence!\n");
err = -EINVAL;
}
- }
- if (f1 || f2) {
pr_err("Not all fences seen!\n");
err = -EINVAL;
- }
- dma_fence_put(f3);
+error_put_f2:
- dma_fence_put(f2);
+error_put_f1:
- dma_fence_put(f1);
- return err;
+}
- int dma_fence_unwrap(void) { static const struct subtest tests[] = {
@@ -247,6 +293,7 @@ int dma_fence_unwrap(void) SUBTEST(unwrap_array), SUBTEST(unwrap_chain), SUBTEST(unwrap_chain_array),
};SUBTEST(unwrap_merge),
return subtests(tests, NULL); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 0fe564539166..fe149d7e3ce2 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -146,50 +146,6 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) return buf; } -static int sync_file_set_fence(struct sync_file *sync_file,
struct dma_fence **fences, int num_fences)
-{
- struct dma_fence_array *array;
- /*
* The reference for the fences in the new sync_file and held
* in add_fence() during the merge procedure, so for num_fences == 1
* we already own a new reference to the fence. For num_fence > 1
* we own the reference of the dma_fence_array creation.
*/
- if (num_fences == 0) {
sync_file->fence = dma_fence_get_stub();
kfree(fences);
- } else if (num_fences == 1) {
sync_file->fence = fences[0];
kfree(fences);
- } else {
array = dma_fence_array_create(num_fences, fences,
dma_fence_context_alloc(1),
1, false);
if (!array)
return -ENOMEM;
sync_file->fence = &array->base;
- }
- return 0;
-}
-static void add_fence(struct dma_fence **fences,
int *i, struct dma_fence *fence)
-{
- fences[*i] = fence;
- if (!dma_fence_is_signaled(fence)) {
dma_fence_get(fence);
(*i)++;
- }
-}
- /**
- sync_file_merge() - merge two sync_files
- @name: name of new fence
@@ -203,84 +159,21 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) {
- struct dma_fence *a_fence, *b_fence, **fences;
- struct dma_fence_unwrap a_iter, b_iter;
- unsigned int index, num_fences; struct sync_file *sync_file;
- struct dma_fence *fence;
sync_file = sync_file_alloc(); if (!sync_file) return NULL;
- num_fences = 0;
- dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence)
++num_fences;
- dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence)
++num_fences;
- if (num_fences > INT_MAX)
goto err_free_sync_file;
- fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
- if (!fences)
goto err_free_sync_file;
- /*
* We can't guarantee that fences in both a and b are ordered, but it is
* still quite likely.
*
* So attempt to order the fences as we pass over them and merge fences
* with the same context.
*/
- index = 0;
- for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter),
b_fence = dma_fence_unwrap_first(b->fence, &b_iter);
a_fence || b_fence; ) {
if (!b_fence) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
} else if (!a_fence) {
add_fence(fences, &index, b_fence);
b_fence = dma_fence_unwrap_next(&b_iter);
} else if (a_fence->context < b_fence->context) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
} else if (b_fence->context < a_fence->context) {
add_fence(fences, &index, b_fence);
b_fence = dma_fence_unwrap_next(&b_iter);
} else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno,
a_fence->ops)) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
b_fence = dma_fence_unwrap_next(&b_iter);
} else {
add_fence(fences, &index, b_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
b_fence = dma_fence_unwrap_next(&b_iter);
}
- fence = dma_fence_merge(a->fence, b->fence);
- if (!fence) {
fput(sync_file->file);
}return NULL;
- if (sync_file_set_fence(sync_file, fences, index) < 0)
goto err_put_fences;
- sync_file->fence = fence; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file;
-err_put_fences:
- while (index)
dma_fence_put(fences[--index]);
- kfree(fences);
-err_free_sync_file:
- fput(sync_file->file);
- return NULL; }
static int sync_file_release(struct inode *inode, struct file *file) diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index e7c219da4ed7..7c0fab318301 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -48,4 +48,28 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); for (fence = dma_fence_unwrap_first(head, cursor); fence; \ fence = dma_fence_unwrap_next(cursor)) +struct dma_fence *__dma_fence_merge(unsigned int num_fences,
struct dma_fence **fences,
struct dma_fence_unwrap *cursors);
+/**
- dma_fence_merge - unwrap and merge fences
- All fences given as parameters are unwrapped and merged back together as flat
- dma_fence_array. Useful if multiple containers need to be merged together.
- Implemented as a macro to allocate the necessary arrays on the stack and
- account the stack frame size to the caller.
- Returns NULL on memory allocation failure, a dma_fence object representing
- all the given fences otherwise.
- */
+#define dma_fence_merge(...) \
- ({ \
struct dma_fence *__f[] = { __VA_ARGS__ }; \
struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \
\
__dma_fence_merge(ARRAY_SIZE(__f), __f, __c); \
- })
- #endif
-- 2.25.1
On Tue, Apr 26, 2022 at 02:46:37PM +0200, Christian König wrote:
Introduce a dma_fence_merge() macro which allows to unwrap fences which potentially can be containers as well and then merge them back together into a flat dma_fence_array.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-unwrap.c | 95 ++++++++++++++++++++ drivers/dma-buf/st-dma-fence-unwrap.c | 47 ++++++++++ drivers/dma-buf/sync_file.c | 119 ++------------------------ include/linux/dma-fence-unwrap.h | 24 ++++++ 4 files changed, 172 insertions(+), 113 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 711be125428c..c9becc74896d 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -11,6 +11,7 @@ #include <linux/dma-fence-array.h> #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> +#include <linux/slab.h> /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -57,3 +58,97 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) return __dma_fence_unwrap_array(cursor); } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next);
+/* Implementation for the dma_fence_merge() marco, don't use directly */ +struct dma_fence *__dma_fence_merge(unsigned int num_fences,
struct dma_fence **fences,
struct dma_fence_unwrap *iter)
+{
- struct dma_fence_array *result;
- struct dma_fence *tmp, **array;
- unsigned int i, count;
- count = 0;
- for (i = 0; i < num_fences; ++i) {
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i])
if (!dma_fence_is_signaled(tmp))
So I realized that dma_fence_array don't filter out signalled fences, but dma_fence_chain does. I wonder whether we shouldn't be more consistent here and push these checks into dma_fence_unwrap for everyone, and then also add a huge warning that every time you iterate you might get fewer fences, since that could lead to surprises :-)
Anyway kinda orthogonal.
++count;
- }
- if (count == 0)
return dma_fence_get_stub();
- if (count > INT_MAX)
return NULL;
If you actually want to make this secure you need to bail out when the count goes above INT_MAX for the first time, since you might have wrapped still. It's a bit annoying to fix though since there's no dma_fence_unwrap_end to clean up the temp references.
- array = kmalloc_array(count, sizeof(*array), GFP_KERNEL);
- if (!array)
return NULL;
- /*
* We can't guarantee that inpute fences are ordered by context, but
* it is still quite likely when this function is used multiple times.
* So attempt to order the fences by context as we pass over them and
* merge fences with the same context.
*/
- for (i = 0; i < num_fences; ++i)
fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
Maybe add a comment here like "Since this function is only used through the dma_fence_merge macro we can thrash the argument array and use it as scratch space" or something like that, I was freaked out for a bit what's going on here :-)
- count = 0;
- do {
unsigned int sel;
+restart:
tmp = NULL;
for (i = 0; i < num_fences; ++i) {
struct dma_fence *next = fences[i];
if (!next || dma_fence_is_signaled(next))
continue;
if (!tmp || tmp->context > next->context) {
tmp = next;
sel = i;
} else if (tmp->context < next->context) {
continue;
} else if (dma_fence_is_later(tmp, next)) {
fences[i] = dma_fence_unwrap_next(&iter[i]);
goto restart;
} else {
fences[sel] = dma_fence_unwrap_next(&iter[sel]);
goto restart;
}
}
if (tmp) {
array[count++] = dma_fence_get(tmp);
fences[sel] = dma_fence_unwrap_next(&iter[sel]);
}
- } while (tmp);
- if (count == 0) {
tmp = dma_fence_get_stub();
goto return_tmp;
- }
- if (count == 1) {
tmp = array[0];
goto return_tmp;
- }
- result = dma_fence_array_create(count, array,
dma_fence_context_alloc(1),
1, false);
- if (!result) {
tmp = NULL;
goto return_tmp;
- }
- return &result->base;
+return_tmp:
- kfree(array);
- return tmp;
+} +EXPORT_SYMBOL_GPL(__dma_fence_merge); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 59628add93f5..23ab134417ed 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -240,6 +240,52 @@ static int unwrap_chain_array(void *arg) return err; } +static int unwrap_merge(void *arg) +{
- struct dma_fence *fence, *f1, *f2, *f3;
- struct dma_fence_unwrap iter;
- int err = 0;
- f1 = mock_fence();
- if (!f1)
return -ENOMEM;
- f2 = mock_fence();
- if (!f2) {
err = -ENOMEM;
goto error_put_f1;
- }
- f3 = dma_fence_merge(f1, f2);
- if (!f3) {
err = -ENOMEM;
goto error_put_f2;
- }
- dma_fence_unwrap_for_each(fence, &iter, f3) {
if (fence == f1) {
f1 = NULL;
} else if (fence == f2) {
f2 = NULL;
} else {
pr_err("Unexpected fence!\n");
err = -EINVAL;
}
- }
- if (f1 || f2) {
pr_err("Not all fences seen!\n");
err = -EINVAL;
- }
- dma_fence_put(f3);
+error_put_f2:
- dma_fence_put(f2);
+error_put_f1:
- dma_fence_put(f1);
- return err;
+}
This doesn't really exercise any of the interesting cases in your merge loop, i.e. when there's multiple fences on the same timeline from different containters. Would be good to exercise these cases too, since it took me a while to understand what you're doing.
int dma_fence_unwrap(void) { static const struct subtest tests[] = { @@ -247,6 +293,7 @@ int dma_fence_unwrap(void) SUBTEST(unwrap_array), SUBTEST(unwrap_chain), SUBTEST(unwrap_chain_array),
};SUBTEST(unwrap_merge),
return subtests(tests, NULL); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 0fe564539166..fe149d7e3ce2 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -146,50 +146,6 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) return buf; } -static int sync_file_set_fence(struct sync_file *sync_file,
struct dma_fence **fences, int num_fences)
-{
- struct dma_fence_array *array;
- /*
* The reference for the fences in the new sync_file and held
* in add_fence() during the merge procedure, so for num_fences == 1
* we already own a new reference to the fence. For num_fence > 1
* we own the reference of the dma_fence_array creation.
*/
- if (num_fences == 0) {
sync_file->fence = dma_fence_get_stub();
kfree(fences);
- } else if (num_fences == 1) {
sync_file->fence = fences[0];
kfree(fences);
- } else {
array = dma_fence_array_create(num_fences, fences,
dma_fence_context_alloc(1),
1, false);
if (!array)
return -ENOMEM;
sync_file->fence = &array->base;
- }
- return 0;
-}
-static void add_fence(struct dma_fence **fences,
int *i, struct dma_fence *fence)
-{
- fences[*i] = fence;
- if (!dma_fence_is_signaled(fence)) {
dma_fence_get(fence);
(*i)++;
- }
-}
/**
- sync_file_merge() - merge two sync_files
- @name: name of new fence
@@ -203,84 +159,21 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) {
- struct dma_fence *a_fence, *b_fence, **fences;
- struct dma_fence_unwrap a_iter, b_iter;
- unsigned int index, num_fences; struct sync_file *sync_file;
- struct dma_fence *fence;
sync_file = sync_file_alloc(); if (!sync_file) return NULL;
- num_fences = 0;
- dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence)
++num_fences;
- dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence)
++num_fences;
- if (num_fences > INT_MAX)
goto err_free_sync_file;
- fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
- if (!fences)
goto err_free_sync_file;
- /*
* We can't guarantee that fences in both a and b are ordered, but it is
* still quite likely.
*
* So attempt to order the fences as we pass over them and merge fences
* with the same context.
*/
- index = 0;
- for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter),
b_fence = dma_fence_unwrap_first(b->fence, &b_iter);
a_fence || b_fence; ) {
if (!b_fence) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
} else if (!a_fence) {
add_fence(fences, &index, b_fence);
b_fence = dma_fence_unwrap_next(&b_iter);
} else if (a_fence->context < b_fence->context) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
} else if (b_fence->context < a_fence->context) {
add_fence(fences, &index, b_fence);
b_fence = dma_fence_unwrap_next(&b_iter);
} else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno,
a_fence->ops)) {
add_fence(fences, &index, a_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
b_fence = dma_fence_unwrap_next(&b_iter);
} else {
add_fence(fences, &index, b_fence);
a_fence = dma_fence_unwrap_next(&a_iter);
b_fence = dma_fence_unwrap_next(&b_iter);
}
- fence = dma_fence_merge(a->fence, b->fence);
- if (!fence) {
fput(sync_file->file);
}return NULL;
- if (sync_file_set_fence(sync_file, fences, index) < 0)
goto err_put_fences;
- sync_file->fence = fence; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file;
-err_put_fences:
- while (index)
dma_fence_put(fences[--index]);
- kfree(fences);
-err_free_sync_file:
- fput(sync_file->file);
- return NULL;
} static int sync_file_release(struct inode *inode, struct file *file) diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index e7c219da4ed7..7c0fab318301 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -48,4 +48,28 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); for (fence = dma_fence_unwrap_first(head, cursor); fence; \ fence = dma_fence_unwrap_next(cursor)) +struct dma_fence *__dma_fence_merge(unsigned int num_fences,
struct dma_fence **fences,
struct dma_fence_unwrap *cursors);
+/**
- dma_fence_merge - unwrap and merge fences
- All fences given as parameters are unwrapped and merged back together as flat
- dma_fence_array. Useful if multiple containers need to be merged together.
- Implemented as a macro to allocate the necessary arrays on the stack and
- account the stack frame size to the caller.
- Returns NULL on memory allocation failure, a dma_fence object representing
- all the given fences otherwise.
- */
+#define dma_fence_merge(...) \
- ({ \
struct dma_fence *__f[] = { __VA_ARGS__ }; \
struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \
\
__dma_fence_merge(ARRAY_SIZE(__f), __f, __c); \
This is fancy :-)
Aside from the nits lgtm and should be useful. -Daniel
- })
#endif
2.25.1
On Tue, Apr 26, 2022 at 02:46:35PM +0200, Christian König wrote:
krealloc_array() ignores attempts to reduce the array size, so the attempt to save memory is completely pointless here.
Also move testing for the no fence case into sync_file_set_fence(), this way we don't even touch the fence array when we don't have any fences.
Signed-off-by: Christian König christian.koenig@amd.com
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/dma-buf/sync_file.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 514d213261df..0fe564539166 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -157,9 +157,15 @@ static int sync_file_set_fence(struct sync_file *sync_file, * we already own a new reference to the fence. For num_fence > 1 * we own the reference of the dma_fence_array creation. */
- if (num_fences == 1) {
- if (num_fences == 0) {
sync_file->fence = dma_fence_get_stub();
kfree(fences);
- } else if (num_fences == 1) { sync_file->fence = fences[0]; kfree(fences);
- } else { array = dma_fence_array_create(num_fences, fences, dma_fence_context_alloc(1),
@@ -261,19 +267,6 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, } }
- if (index == 0)
fences[index++] = dma_fence_get_stub();
- if (num_fences > index) {
struct dma_fence **tmp;
/* Keep going even when reducing the size failed */
tmp = krealloc_array(fences, index, sizeof(*fences),
GFP_KERNEL);
if (tmp)
fences = tmp;
- }
- if (sync_file_set_fence(sync_file, fences, index) < 0) goto err_put_fences;
2.25.1
linaro-mm-sig@lists.linaro.org