The final call to zlib_deflate(Z_FINISH) may require more output space to be allocated and so needs to re-invoked. Failure to do so in the current code leads to incomplete zlib streams (albeit intact due to the use of Z_SYNC_FLUSH) resulting in the occasional short object capture.
Testcase: igt/i915-error-capture.js Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state") Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: stable@vger.kernel.org # v4.10+ --- drivers/gpu/drm/i915/i915_gpu_error.c | 60 +++++++++++++++++++++------ 1 file changed, 47 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3d5554f14dfd..ed8c16cbfaa4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -237,6 +237,7 @@ static int compress_page(struct compress *c, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream; + int flush = Z_NO_FLUSH;
zstream->next_in = src; if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) @@ -257,8 +258,11 @@ static int compress_page(struct compress *c, zstream->avail_out = PAGE_SIZE; }
- if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK) + if (zlib_deflate(zstream, flush) != Z_OK) return -EIO; + + if (zstream->avail_out) + flush = Z_SYNC_FLUSH; } while (zstream->avail_in);
/* Fallback to uncompressed if we increase size? */ @@ -268,19 +272,43 @@ static int compress_page(struct compress *c, return 0; }
-static void compress_fini(struct compress *c, +static int compress_flush(struct compress *c, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream; + unsigned long page;
- if (dst) { - zlib_deflate(zstream, Z_FINISH); - dst->unused = zstream->avail_out; - } + do { + switch (zlib_deflate(zstream, Z_FINISH)) { + case Z_OK: /* more space requested */ + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) + return -ENOMEM; + + dst->pages[dst->page_count++] = (void *)page; + zstream->next_out = (void *)page; + zstream->avail_out = PAGE_SIZE; + break; + case Z_STREAM_END: + goto end; + default: /* any error */ + return -EIO; + } + } while (1); + +end: + memset(zstream->next_out, 0, zstream->avail_out); + dst->unused = zstream->avail_out; + return 0; +} + +static void compress_fini(struct compress *c, + struct drm_i915_error_object *dst) +{ + struct z_stream_s *zstream = &c->zstream;
zlib_deflateEnd(zstream); kfree(zstream->workspace); - if (c->tmp) free_page((unsigned long)c->tmp); } @@ -319,6 +347,12 @@ static int compress_page(struct compress *c, return 0; }
+static int compress_flush(struct compress *c, + struct drm_i915_error_object *dst) +{ + return 0; +} + static void compress_fini(struct compress *c, struct drm_i915_error_object *dst) { @@ -951,15 +985,15 @@ i915_error_object_create(struct drm_i915_private *i915, if (ret) goto unwind; } - goto out;
+ if (compress_flush(&compress, dst)) { unwind: - while (dst->page_count--) - free_page((unsigned long)dst->pages[dst->page_count]); - kfree(dst); - dst = NULL; + while (dst->page_count--) + free_page((unsigned long)dst->pages[dst->page_count]); + kfree(dst); + dst = NULL; + }
-out: compress_fini(&compress, dst); ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); return dst;
The final call to zlib_deflate(Z_FINISH) may require more output space to be allocated and so needs to re-invoked. Failure to do so in the current code leads to incomplete zlib streams (albeit intact due to the use of Z_SYNC_FLUSH) resulting in the occasional short object capture.
v2: Check against overrunning our pre-allocated page array
Testcase: igt/i915-error-capture.js Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state") Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: stable@vger.kernel.org # v4.10+ Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 92 +++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gpu_error.h | 1 + 2 files changed, 68 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3d5554f14dfd..1035298e9a82 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -232,11 +232,26 @@ static bool compress_init(struct compress *c) return true; }
+static void *compress_next_page(struct drm_i915_error_object *dst) +{ + unsigned long page; + + if (dst->page_count >= dst->num_pages) + return ERR_PTR(-ENOSPC); + + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) + return ERR_PTR(-ENOMEM); + + return dst->pages[dst->page_count++] = (void *)page; +} + static int compress_page(struct compress *c, void *src, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream; + int flush = Z_NO_FLUSH;
zstream->next_in = src; if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) @@ -245,20 +260,18 @@ static int compress_page(struct compress *c,
do { if (zstream->avail_out == 0) { - unsigned long page; - - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); - if (!page) - return -ENOMEM; + zstream->next_out = compress_next_page(dst); + if (IS_ERR(zstream->next_out)) + return PTR_ERR(zstream->next_out);
- dst->pages[dst->page_count++] = (void *)page; - - zstream->next_out = (void *)page; zstream->avail_out = PAGE_SIZE; }
- if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK) + if (zlib_deflate(zstream, flush) != Z_OK) return -EIO; + + if (zstream->avail_out) + flush = Z_SYNC_FLUSH; } while (zstream->avail_in);
/* Fallback to uncompressed if we increase size? */ @@ -268,19 +281,42 @@ static int compress_page(struct compress *c, return 0; }
-static void compress_fini(struct compress *c, +static int compress_flush(struct compress *c, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream;
- if (dst) { - zlib_deflate(zstream, Z_FINISH); - dst->unused = zstream->avail_out; - } + do { + switch (zlib_deflate(zstream, Z_FINISH)) { + case Z_OK: /* more space requested */ + zstream->next_out = compress_next_page(dst); + if (IS_ERR(zstream->next_out)) + return PTR_ERR(zstream->next_out); + + zstream->avail_out = PAGE_SIZE; + break; + + case Z_STREAM_END: + goto end; + + default: /* any error */ + return -EIO; + } + } while (1); + +end: + memset(zstream->next_out, 0, zstream->avail_out); + dst->unused = zstream->avail_out; + return 0; +} + +static void compress_fini(struct compress *c, + struct drm_i915_error_object *dst) +{ + struct z_stream_s *zstream = &c->zstream;
zlib_deflateEnd(zstream); kfree(zstream->workspace); - if (c->tmp) free_page((unsigned long)c->tmp); } @@ -319,6 +355,12 @@ static int compress_page(struct compress *c, return 0; }
+static int compress_flush(struct compress *c, + struct drm_i915_error_object *dst) +{ + return 0; +} + static void compress_fini(struct compress *c, struct drm_i915_error_object *dst) { @@ -917,6 +959,7 @@ i915_error_object_create(struct drm_i915_private *i915, unsigned long num_pages; struct sgt_iter iter; dma_addr_t dma; + int ret;
if (!vma) return NULL; @@ -930,6 +973,7 @@ i915_error_object_create(struct drm_i915_private *i915,
dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0;
@@ -938,28 +982,26 @@ i915_error_object_create(struct drm_i915_private *i915, return NULL; }
+ ret = -EINVAL; for_each_sgt_dma(dma, iter, vma->pages) { void __iomem *s; - int ret;
ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s); - if (ret) - goto unwind; + break; } - goto out;
-unwind: - while (dst->page_count--) - free_page((unsigned long)dst->pages[dst->page_count]); - kfree(dst); - dst = NULL; + if (ret || compress_flush(&compress, dst)) { + while (dst->page_count--) + free_page((unsigned long)dst->pages[dst->page_count]); + kfree(dst); + dst = NULL; + }
-out: compress_fini(&compress, dst); ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); return dst; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index f893a4e8b783..8710fb18ed74 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -135,6 +135,7 @@ struct i915_gpu_state { struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; + int num_pages; int page_count; int unused; u32 *pages[0];
The final call to zlib_deflate(Z_FINISH) may require more output space to be allocated and so needs to re-invoked. Failure to do so in the current code leads to incomplete zlib streams (albeit intact due to the use of Z_SYNC_FLUSH) resulting in the occasional short object capture.
v2: Check against overrunning our pre-allocated page array v3: Drop Z_SYNC_FLUSH entirely
Testcase: igt/i915-error-capture.js Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state") Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: stable@vger.kernel.org # v4.10+ Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 88 +++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gpu_error.h | 1 + 2 files changed, 64 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3d5554f14dfd..705ff122100f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -232,6 +232,20 @@ static bool compress_init(struct compress *c) return true; }
+static void *compress_next_page(struct drm_i915_error_object *dst) +{ + unsigned long page; + + if (dst->page_count >= dst->num_pages) + return ERR_PTR(-ENOSPC); + + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) + return ERR_PTR(-ENOMEM); + + return dst->pages[dst->page_count++] = (void *)page; +} + static int compress_page(struct compress *c, void *src, struct drm_i915_error_object *dst) @@ -245,19 +259,14 @@ static int compress_page(struct compress *c,
do { if (zstream->avail_out == 0) { - unsigned long page; - - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); - if (!page) - return -ENOMEM; + zstream->next_out = compress_next_page(dst); + if (IS_ERR(zstream->next_out)) + return PTR_ERR(zstream->next_out);
- dst->pages[dst->page_count++] = (void *)page; - - zstream->next_out = (void *)page; zstream->avail_out = PAGE_SIZE; }
- if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK) + if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; } while (zstream->avail_in);
@@ -268,19 +277,42 @@ static int compress_page(struct compress *c, return 0; }
-static void compress_fini(struct compress *c, +static int compress_flush(struct compress *c, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream;
- if (dst) { - zlib_deflate(zstream, Z_FINISH); - dst->unused = zstream->avail_out; - } + do { + switch (zlib_deflate(zstream, Z_FINISH)) { + case Z_OK: /* more space requested */ + zstream->next_out = compress_next_page(dst); + if (IS_ERR(zstream->next_out)) + return PTR_ERR(zstream->next_out); + + zstream->avail_out = PAGE_SIZE; + break; + + case Z_STREAM_END: + goto end; + + default: /* any error */ + return -EIO; + } + } while (1); + +end: + memset(zstream->next_out, 0, zstream->avail_out); + dst->unused = zstream->avail_out; + return 0; +} + +static void compress_fini(struct compress *c, + struct drm_i915_error_object *dst) +{ + struct z_stream_s *zstream = &c->zstream;
zlib_deflateEnd(zstream); kfree(zstream->workspace); - if (c->tmp) free_page((unsigned long)c->tmp); } @@ -319,6 +351,12 @@ static int compress_page(struct compress *c, return 0; }
+static int compress_flush(struct compress *c, + struct drm_i915_error_object *dst) +{ + return 0; +} + static void compress_fini(struct compress *c, struct drm_i915_error_object *dst) { @@ -917,6 +955,7 @@ i915_error_object_create(struct drm_i915_private *i915, unsigned long num_pages; struct sgt_iter iter; dma_addr_t dma; + int ret;
if (!vma) return NULL; @@ -930,6 +969,7 @@ i915_error_object_create(struct drm_i915_private *i915,
dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0;
@@ -938,28 +978,26 @@ i915_error_object_create(struct drm_i915_private *i915, return NULL; }
+ ret = -EINVAL; for_each_sgt_dma(dma, iter, vma->pages) { void __iomem *s; - int ret;
ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s); - if (ret) - goto unwind; + break; } - goto out;
-unwind: - while (dst->page_count--) - free_page((unsigned long)dst->pages[dst->page_count]); - kfree(dst); - dst = NULL; + if (ret || compress_flush(&compress, dst)) { + while (dst->page_count--) + free_page((unsigned long)dst->pages[dst->page_count]); + kfree(dst); + dst = NULL; + }
-out: compress_fini(&compress, dst); ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); return dst; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index f893a4e8b783..8710fb18ed74 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -135,6 +135,7 @@ struct i915_gpu_state { struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; + int num_pages; int page_count; int unused; u32 *pages[0];
On 03/10/2018 09:24, Chris Wilson wrote:
The final call to zlib_deflate(Z_FINISH) may require more output space to be allocated and so needs to re-invoked. Failure to do so in the current code leads to incomplete zlib streams (albeit intact due to the use of Z_SYNC_FLUSH) resulting in the occasional short object capture.
v2: Check against overrunning our pre-allocated page array v3: Drop Z_SYNC_FLUSH entirely
Testcase: igt/i915-error-capture.js Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state") Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: stable@vger.kernel.org # v4.10+ Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com
drivers/gpu/drm/i915/i915_gpu_error.c | 88 +++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gpu_error.h | 1 + 2 files changed, 64 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3d5554f14dfd..705ff122100f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -232,6 +232,20 @@ static bool compress_init(struct compress *c) return true; } +static void *compress_next_page(struct drm_i915_error_object *dst) +{
- unsigned long page;
- if (dst->page_count >= dst->num_pages)
return ERR_PTR(-ENOSPC);
- page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
- if (!page)
return ERR_PTR(-ENOMEM);
- return dst->pages[dst->page_count++] = (void *)page;
+}
- static int compress_page(struct compress *c, void *src, struct drm_i915_error_object *dst)
@@ -245,19 +259,14 @@ static int compress_page(struct compress *c, do { if (zstream->avail_out == 0) {
unsigned long page;
page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
if (!page)
return -ENOMEM;
zstream->next_out = compress_next_page(dst);
if (IS_ERR(zstream->next_out))
return PTR_ERR(zstream->next_out);
dst->pages[dst->page_count++] = (void *)page;
}zstream->next_out = (void *)page; zstream->avail_out = PAGE_SIZE;
if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
} while (zstream->avail_in);if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO;
@@ -268,19 +277,42 @@ static int compress_page(struct compress *c, return 0; } -static void compress_fini(struct compress *c, +static int compress_flush(struct compress *c, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream;
- if (dst) {
zlib_deflate(zstream, Z_FINISH);
dst->unused = zstream->avail_out;
- }
- do {
switch (zlib_deflate(zstream, Z_FINISH)) {
case Z_OK: /* more space requested */
zstream->next_out = compress_next_page(dst);
if (IS_ERR(zstream->next_out))
return PTR_ERR(zstream->next_out);
zstream->avail_out = PAGE_SIZE;
break;
case Z_STREAM_END:
goto end;
default: /* any error */
return -EIO;
}
- } while (1);
+end:
- memset(zstream->next_out, 0, zstream->avail_out);
- dst->unused = zstream->avail_out;
- return 0;
+}
+static void compress_fini(struct compress *c,
struct drm_i915_error_object *dst)
+{
- struct z_stream_s *zstream = &c->zstream;
zlib_deflateEnd(zstream); kfree(zstream->workspace);
- if (c->tmp) free_page((unsigned long)c->tmp); }
@@ -319,6 +351,12 @@ static int compress_page(struct compress *c, return 0; } +static int compress_flush(struct compress *c,
struct drm_i915_error_object *dst)
+{
- return 0;
+}
- static void compress_fini(struct compress *c, struct drm_i915_error_object *dst) {
@@ -917,6 +955,7 @@ i915_error_object_create(struct drm_i915_private *i915, unsigned long num_pages; struct sgt_iter iter; dma_addr_t dma;
- int ret;
if (!vma) return NULL; @@ -930,6 +969,7 @@ i915_error_object_create(struct drm_i915_private *i915, dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size;
- dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0;
@@ -938,28 +978,26 @@ i915_error_object_create(struct drm_i915_private *i915, return NULL; }
- ret = -EINVAL; for_each_sgt_dma(dma, iter, vma->pages) > void __iomem *s;
int ret;
ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s);
- if (ret)
goto unwind;
}break;
- goto out;
-unwind:
- while (dst->page_count--)
free_page((unsigned long)dst->pages[dst->page_count]);
- kfree(dst);
- dst = NULL;
- if (ret || compress_flush(&compress, dst)) {
while (dst->page_count--)
free_page((unsigned long)dst->pages[dst->page_count]);
kfree(dst);
dst = NULL;
- }
-out: compress_fini(&compress, dst); ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); return dst; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index f893a4e8b783..8710fb18ed74 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -135,6 +135,7 @@ struct i915_gpu_state { struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size;
int num_pages; int page_count; int unused; u32 *pages[0];
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Regards,
Tvrtko
Quoting Tvrtko Ursulin (2018-10-03 10:04:04)
On 03/10/2018 09:24, Chris Wilson wrote:
The final call to zlib_deflate(Z_FINISH) may require more output space to be allocated and so needs to re-invoked. Failure to do so in the current code leads to incomplete zlib streams (albeit intact due to the use of Z_SYNC_FLUSH) resulting in the occasional short object capture.
v2: Check against overrunning our pre-allocated page array v3: Drop Z_SYNC_FLUSH entirely
Testcase: igt/i915-error-capture.js Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state") Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: stable@vger.kernel.org # v4.10+ Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Ta, thanks for the review. gem_exec_capture with several million objects/pages was happy so pushed.
Now, we just need the O(N^2) fix so that gem_exec_capture runs in a few minutes and not a few _decades_. -Chris
linux-stable-mirror@lists.linaro.org