It should wait all existing dio write IOs before block removal,
otherwise, previous direct write IO may overwrite data in the
block which may be reused by other inode.
Cc: stable(a)vger.kernel.org
Signed-off-by: Chao Yu <chao(a)kernel.org>
---
fs/f2fs/file.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 76a6043caf27..f2d0e0de775f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1056,6 +1056,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return err;
}
+ /*
+ * wait for inflight dio, blocks should be removed after
+ * IO completion.
+ */
+ if (attr->ia_size < old_size)
+ inode_dio_wait(inode);
+
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
@@ -1892,6 +1899,12 @@ static long f2fs_fallocate(struct file *file, int mode,
if (ret)
goto out;
+ /*
+ * wait for inflight dio, blocks should be removed after IO
+ * completion.
+ */
+ inode_dio_wait(inode);
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
goto out;
--
2.40.1
As we discussed before[1], soft recovery should be
forwarded to userspace, or we can get into a really
bad state where apps will keep submitting hanging
command buffers cascading us to a hard reset.
1: https://lore.kernel.org/all/bf23d5ed-9a6b-43e7-84ee-8cbfd0d60f18@froggi.es/
Signed-off-by: Joshua Ashton <joshua(a)froggi.es>
Cc: Friedrich Vock <friedrich.vock(a)gmx.de>
Cc: Bas Nieuwenhuizen <bas(a)basnieuwenhuizen.nl>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: André Almeida <andrealmeid(a)igalia.com>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4b3000c21ef2..aebf59855e9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -262,9 +262,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
struct dma_fence *fence = NULL;
int r;
- /* Ignore soft recovered fences here */
r = drm_sched_entity_error(s_entity);
- if (r && r != -ENODATA)
+ if (r)
goto error;
if (!fence && job->gang_submit)
--
2.44.0
In _set_opp() we are normally bailing out when trying to set an OPP that is
the current one. This make perfect sense, but becomes a problem when
_set_required_opps() calls it recursively.
More precisely, when a required OPP is being shared by multiple PM domains,
we end up skipping to request the corresponding performance-state for all
of the PM domains, but the first one. Let's fix the problem, by calling
_set_opp_level() from _set_required_opps() instead.
Fixes: e37440e7e2c2 ("OPP: Call dev_pm_opp_set_opp() for required OPPs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
---
drivers/opp/core.c | 47 +++++++++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 23 deletions(-)
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index cb4611fe1b5b..45eca65f27f9 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1061,6 +1061,28 @@ static int _set_opp_bw(const struct opp_table *opp_table,
return 0;
}
+static int _set_opp_level(struct device *dev, struct opp_table *opp_table,
+ struct dev_pm_opp *opp)
+{
+ unsigned int level = 0;
+ int ret = 0;
+
+ if (opp) {
+ if (opp->level == OPP_LEVEL_UNSET)
+ return 0;
+
+ level = opp->level;
+ }
+
+ /* Request a new performance state through the device's PM domain. */
+ ret = dev_pm_domain_set_performance_state(dev, level);
+ if (ret)
+ dev_err(dev, "Failed to set performance state %u (%d)\n", level,
+ ret);
+
+ return ret;
+}
+
/* This is only called for PM domain for now */
static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
struct dev_pm_opp *opp, bool up)
@@ -1091,7 +1113,8 @@ static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
if (devs[index]) {
required_opp = opp ? opp->required_opps[index] : NULL;
- ret = dev_pm_opp_set_opp(devs[index], required_opp);
+ ret = _set_opp_level(devs[index], opp_table,
+ required_opp);
if (ret)
return ret;
}
@@ -1102,28 +1125,6 @@ static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
return 0;
}
-static int _set_opp_level(struct device *dev, struct opp_table *opp_table,
- struct dev_pm_opp *opp)
-{
- unsigned int level = 0;
- int ret = 0;
-
- if (opp) {
- if (opp->level == OPP_LEVEL_UNSET)
- return 0;
-
- level = opp->level;
- }
-
- /* Request a new performance state through the device's PM domain. */
- ret = dev_pm_domain_set_performance_state(dev, level);
- if (ret)
- dev_err(dev, "Failed to set performance state %u (%d)\n", level,
- ret);
-
- return ret;
-}
-
static void _find_current_opp(struct device *dev, struct opp_table *opp_table)
{
struct dev_pm_opp *opp = ERR_PTR(-ENODEV);
--
2.34.1
Add an explanation for the newly added variable.
Cc: stable(a)vger.kernel.org
Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Daniel Bristot de Oliveira <bristot(a)kernel.org>
---
include/linux/sched.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..abce356932cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -637,6 +637,8 @@ struct sched_dl_entity {
*
* @dl_overrun tells if the task asked to be informed about runtime
* overruns.
+ *
+ * @dl_server tells if this is a server entity.
*/
unsigned int dl_throttled : 1;
unsigned int dl_yielded : 1;
--
2.45.1
From: "Joel Fernandes (Google)" <joel(a)joelfernandes.org>
Paths using put_prev_task_balance() need to do a pick shortly
after. Make sure they also clear the ->dl_server on prev as a
part of that.
Cc: stable(a)vger.kernel.org
Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot(a)kernel.org>
---
kernel/sched/core.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bcf2c4cc0522..08c409457152 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6006,6 +6006,14 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
#endif
put_prev_task(rq, prev);
+
+ /*
+ * We've updated @prev and no longer need the server link, clear it.
+ * Must be done before ->pick_next_task() because that can (re)set
+ * ->dl_server.
+ */
+ if (prev->dl_server)
+ prev->dl_server = NULL;
}
/*
@@ -6049,14 +6057,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
restart:
put_prev_task_balance(rq, prev, rf);
- /*
- * We've updated @prev and no longer need the server link, clear it.
- * Must be done before ->pick_next_task() because that can (re)set
- * ->dl_server.
- */
- if (prev->dl_server)
- prev->dl_server = NULL;
-
for_each_class(class) {
p = class->pick_next_task(rq);
if (p)
--
2.45.1
From: Youssef Esmat <youssefesmat(a)google.com>
In case the previous pick was a DL server pick, ->dl_server might be
set. Clear it in the fast path as well.
Cc: stable(a)vger.kernel.org
Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Youssef Esmat <youssefesmat(a)google.com>
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot(a)kernel.org>
---
kernel/sched/core.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 08c409457152..6d01863f93ca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6044,6 +6044,13 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
p = pick_next_task_idle(rq);
}
+ /*
+ * This is a normal CFS pick, but the previous could be a DL pick.
+ * Clear it as previous is no longer picked.
+ */
+ if (prev->dl_server)
+ prev->dl_server = NULL;
+
/*
* This is the fast path; it cannot be a DL server pick;
* therefore even if @p == @prev, ->dl_server must be NULL.
--
2.45.1
Hello stable folk,
This patch was merged as:
commit 3af7524b1419 ("sched/fair: Use all little CPUs for CPU-bound workloads")
into 6.7, improving the following:
commit 0b0695f2b34a ("sched/fair: Rework load_balance()")
Would it be possible to port it to the 6.1 stable branch ?
The patch should apply cleanly by cherry-picking onto v6.1.94,
Regards,
Pierre
On 12/6/23 10:00, Pierre Gondois wrote:
> Running n CPU-bound tasks on an n CPUs platform:
> - with asymmetric CPU capacity
> - not being a DynamIq system (i.e. having a PKG level sched domain
> without the SD_SHARE_PKG_RESOURCES flag set)
> might result in a task placement where two tasks run on a big CPU
> and none on a little CPU. This placement could be more optimal by
> using all CPUs.
>
> Testing platform:
> Juno-r2:
> - 2 big CPUs (1-2), maximum capacity of 1024
> - 4 little CPUs (0,3-5), maximum capacity of 383
>
> Testing workload ([1]):
> Spawn 6 CPU-bound tasks. During the first 100ms (step 1), each tasks
> is affine to a CPU, except for:
> - one little CPU which is left idle.
> - one big CPU which has 2 tasks affine.
> After the 100ms (step 2), remove the cpumask affinity.
>
> Before patch:
> During step 2, the load balancer running from the idle CPU tags sched
> domains as:
> - little CPUs: 'group_has_spare'. Cf. group_has_capacity() and
> group_is_overloaded(), 3 CPU-bound tasks run on a 4 CPUs
> sched-domain, and the idle CPU provides enough spare capacity
> regarding the imbalance_pct
> - big CPUs: 'group_overloaded'. Indeed, 3 tasks run on a 2 CPUs
> sched-domain, so the following path is used:
> group_is_overloaded()
> \-if (sgs->sum_nr_running <= sgs->group_weight) return true;
>
> The following path which would change the migration type to
> 'migrate_task' is not taken:
> calculate_imbalance()
> \-if (env->idle != CPU_NOT_IDLE && env->imbalance == 0)
> as the local group has some spare capacity, so the imbalance
> is not 0.
>
> The migration type requested is 'migrate_util' and the busiest
> runqueue is the big CPU's runqueue having 2 tasks (each having a
> utilization of 512). The idle little CPU cannot pull one of these
> task as its capacity is too small for the task. The following path
> is used:
> detach_tasks()
> \-case migrate_util:
> \-if (util > env->imbalance) goto next;
>
> After patch:
> As the number of failed balancing attempts grows (with
> 'nr_balance_failed'), progressively make it easier to migrate
> a big task to the idling little CPU. A similar mechanism is
> used for the 'migrate_load' migration type.
>
> Improvement:
> Running the testing workload [1] with the step 2 representing
> a ~10s load for a big CPU:
> Before patch: ~19.3s
> After patch: ~18s (-6.7%)
>
> Similar issue reported at:
> https://lore.kernel.org/lkml/20230716014125.139577-1-qyousef@layalina.io/
>
> v1:
> https://lore.kernel.org/all/20231110125902.2152380-1-pierre.gondois@arm.com/
> v2:
> https://lore.kernel.org/all/20231124153323.3202444-1-pierre.gondois@arm.com/
>
> Suggested-by: Vincent Guittot <vincent.guittot(a)linaro.org>
> Signed-off-by: Pierre Gondois <pierre.gondois(a)arm.com>
> Reviewed-by: Vincent Guittot <vincent.guittot(a)linaro.org>
> Reviewed-by: Dietmar Eggemann <dietmar.eggemann(a)arm.com>
> ---
>
> Notes:
> v2:
> - Used Vincent's approach.
> v3:
> - Updated commit message.
> - Added Reviewed-by tags
>
> kernel/sched/fair.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index d7a3c63a2171..9481b8cff31b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9060,7 +9060,7 @@ static int detach_tasks(struct lb_env *env)
> case migrate_util:
> util = task_util_est(p);
>
> - if (util > env->imbalance)
> + if (shr_bound(util, env->sd->nr_balance_failed) > env->imbalance)
> goto next;
>
> env->imbalance -= util;
Lima DRM driver uses devfreq to perform DVFS, while using simple_ondemand
devfreq governor by default. This causes driver initialization to fail on
boot when simple_ondemand governor isn't built into the kernel statically,
as a result of the missing module dependency and, consequently, the required
governor module not being included in the initial ramdisk. Thus, let's mark
simple_ondemand governor as a softdep for Lima, to have its kernel module
included in the initial ramdisk.
This is a rather longstanding issue that has forced distributions to build
devfreq governors statically into their kernels, [1][2] or may have forced
some users to introduce unnecessary workarounds.
Having simple_ondemand marked as a softdep for Lima may not resolve this
issue for all Linux distributions. In particular, it will remain unresolved
for the distributions whose utilities for the initial ramdisk generation do
not handle the available softdep information [3] properly yet. However, some
Linux distributions already handle softdeps properly while generating their
initial ramdisks, [4] and this is a prerequisite step in the right direction
for the distributions that don't handle them properly yet.
[1] https://gitlab.manjaro.org/manjaro-arm/packages/core/linux-pinephone/-/blob…
[2] https://gitlab.com/postmarketOS/pmaports/-/blob/7f64e287e7732c9eaa029653e73…
[3] https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/commit/?id=49d8e0…
[4] https://github.com/archlinux/mkinitcpio/commit/97ac4d37aae084a050be512f6d8f…
Cc: Philip Muller <philm(a)manjaro.org>
Cc: Oliver Smith <ollieparanoid(a)postmarketos.org>
Cc: Daniel Smith <danct12(a)disroot.org>
Cc: stable(a)vger.kernel.org
Fixes: 1996970773a3 ("drm/lima: Add optional devfreq and cooling device support")
Signed-off-by: Dragan Simic <dsimic(a)manjaro.org>
---
drivers/gpu/drm/lima/lima_drv.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
index 739c865b556f..10bce18b7c31 100644
--- a/drivers/gpu/drm/lima/lima_drv.c
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -501,3 +501,4 @@ module_platform_driver(lima_platform_driver);
MODULE_AUTHOR("Lima Project Developers");
MODULE_DESCRIPTION("Lima DRM Driver");
MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: governor_simpleondemand");
Panfrost DRM driver uses devfreq to perform DVFS, while using simple_ondemand
devfreq governor by default. This causes driver initialization to fail on
boot when simple_ondemand governor isn't built into the kernel statically,
as a result of the missing module dependency and, consequently, the required
governor module not being included in the initial ramdisk. Thus, let's mark
simple_ondemand governor as a softdep for Panfrost, to have its kernel module
included in the initial ramdisk.
This is a rather longstanding issue that has forced distributions to build
devfreq governors statically into their kernels, [1][2] or has forced users
to introduce some unnecessary workarounds. [3]
For future reference, not having support for the simple_ondemand governor in
the initial ramdisk produces errors in the kernel log similar to these below,
which were taken from a Pine64 RockPro64:
panfrost ff9a0000.gpu: [drm:panfrost_devfreq_init [panfrost]] *ERROR* Couldn't initialize GPU devfreq
panfrost ff9a0000.gpu: Fatal error during GPU init
panfrost: probe of ff9a0000.gpu failed with error -22
Having simple_ondemand marked as a softdep for Panfrost may not resolve this
issue for all Linux distributions. In particular, it will remain unresolved
for the distributions whose utilities for the initial ramdisk generation do
not handle the available softdep information [4] properly yet. However, some
Linux distributions already handle softdeps properly while generating their
initial ramdisks, [5] and this is a prerequisite step in the right direction
for the distributions that don't handle them properly yet.
[1] https://gitlab.manjaro.org/manjaro-arm/packages/core/linux/-/blob/linux61/c…
[2] https://salsa.debian.org/kernel-team/linux/-/merge_requests/1066
[3] https://forum.pine64.org/showthread.php?tid=15458
[4] https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/commit/?id=49d8e0…
[5] https://github.com/archlinux/mkinitcpio/commit/97ac4d37aae084a050be512f6d8f…
Cc: Diederik de Haas <didi.debian(a)cknow.org>
Cc: Furkan Kardame <f.kardame(a)manjaro.org>
Cc: stable(a)vger.kernel.org
Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
Signed-off-by: Dragan Simic <dsimic(a)manjaro.org>
---
drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index ef9f6c0716d5..149737d7a07e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -828,3 +828,4 @@ module_platform_driver(panfrost_driver);
MODULE_AUTHOR("Panfrost Project Developers");
MODULE_DESCRIPTION("Panfrost DRM Driver");
MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: governor_simpleondemand");