When the xe buffer-object shrinker allows GPU waits and write-back, (typically from kswapd), perform multiple passes, skipping subsequent passes if the shrinker number of scanned objects target is reached.
1) Without GPU waits and write-back 2) Without write-back 3) With both GPU-waits and write-back
This is to avoid stalls and costly write- and readbacks unless they are really necessary.
v2: - Don't test for scan completion twice. (Stuart Summers) - Update tags.
Reported-by: melvyn melvyn2@dnsense.pub Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557 Cc: Summers Stuart stuart.summers@intel.com Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 1c3c04d52f55..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); }
-static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe, return freed; }
+/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + } + + return freed; +} + static unsigned long xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0)
On 05/08/2025 08:48, Thomas Hellström wrote:
When the xe buffer-object shrinker allows GPU waits and write-back, (typically from kswapd), perform multiple passes, skipping subsequent passes if the shrinker number of scanned objects target is reached.
- Without GPU waits and write-back
- Without write-back
- With both GPU-waits and write-back
This is to avoid stalls and costly write- and readbacks unless they are really necessary.
v2:
- Don't test for scan completion twice. (Stuart Summers)
- Update tags.
Reported-by: melvyn melvyn2@dnsense.pub Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557 Cc: Summers Stuart stuart.summers@intel.com Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 1c3c04d52f55..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_operation_ctx *ctx,
const struct xe_bo_shrink_flags flags,
unsigned long to_scan, unsigned long *scanned)
+static s64 __xe_shrinker_walk(struct xe_device *xe,
struct ttm_operation_ctx *ctx,
const struct xe_bo_shrink_flags flags,
{ unsigned int mem_type; s64 freed = 0, lret;unsigned long to_scan, unsigned long *scanned)
@@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe, return freed; } +/*
- Try shrinking idle objects without writeback first, then if not sufficient,
- try also non-idle objects and finally if that's not sufficient either,
- add writeback. This avoids stalls and explicit writebacks with light or
- moderate memory pressure.
Just one question here, with writeback=false it doesn't really influence which objects are chosen for shrinking, unlike with no_wait_gpu, right? Will having another pass just with writeback=true yield anything different, assuming here that the previous two passes would have already hoovered ~everything up that was a possible candidate, so this pass won't really find anything in practice? If so, does that also mean we never really end up using the writeback=true behaviour any more?
- */
+static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_operation_ctx *ctx,
const struct xe_bo_shrink_flags flags,
unsigned long to_scan, unsigned long *scanned)
+{
- bool no_wait_gpu = true;
- struct xe_bo_shrink_flags save_flags = flags;
- s64 lret, freed;
- swap(no_wait_gpu, ctx->no_wait_gpu);
- save_flags.writeback = false;
- lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
- swap(no_wait_gpu, ctx->no_wait_gpu);
- if (lret < 0 || *scanned >= to_scan)
return lret;
- freed = lret;
- if (!ctx->no_wait_gpu) {
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
if (lret < 0)
return lret;
freed += lret;
if (*scanned >= to_scan)
return freed;
- }
- if (flags.writeback) {
lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
if (lret < 0)
return lret;
freed += lret;
- }
- return freed;
+}
- static unsigned long xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) {
@@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false;
- lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0)
On Tue, 2025-08-05 at 09:48 +0200, Thomas Hellström wrote:
When the xe buffer-object shrinker allows GPU waits and write-back, (typically from kswapd), perform multiple passes, skipping subsequent passes if the shrinker number of scanned objects target is reached.
- Without GPU waits and write-back
- Without write-back
- With both GPU-waits and write-back
This is to avoid stalls and costly write- and readbacks unless they are really necessary.
v2:
- Don't test for scan completion twice. (Stuart Summers)
- Update tags.
Reported-by: melvyn melvyn2@dnsense.pub Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557 Cc: Summers Stuart stuart.summers@intel.com Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
Changes look good to me, thanks Thomas. Before merging, please address the feedback from Matt as well. Reviewed-by: Stuart Summers stuart.summers@intel.com
Thanks, Stuart
drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 1c3c04d52f55..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe, return freed; } +/*
- Try shrinking idle objects without writeback first, then if not
sufficient,
- try also non-idle objects and finally if that's not sufficient
either,
- add writeback. This avoids stalls and explicit writebacks with
light or
- moderate memory pressure.
- */
+static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed;
+ swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret;
+ freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + }
+ if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + }
+ return freed; +}
static unsigned long xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false;
lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0)
linux-stable-mirror@lists.linaro.org