4.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Prateek Sood prsood@codeaurora.org
commit aa24163b2ee5c92120e32e99b5a93143a0f4258e upstream.
Remove circular dependency deadlock in a scenario where hotplug of CPU is being done while there is updation in cgroup and cpuset triggered from userspace.
Process A => kthreadd => Process B => Process C => Process A
Process A cpu_subsys_offline(); cpu_down(); _cpu_down(); percpu_down_write(&cpu_hotplug_lock); //held cpuhp_invoke_callback(); workqueue_offline_cpu(); queue_work_on(); // unbind_work on system_highpri_wq __queue_work(); insert_work(); wake_up_worker(); flush_work(); wait_for_completion();
worker_thread(); manage_workers(); create_worker(); kthread_create_on_node(); wake_up_process(kthreadd_task);
kthreadd kthreadd(); kernel_thread(); do_fork(); copy_process(); percpu_down_read(&cgroup_threadgroup_rwsem); __rwsem_down_read_failed_common(); //waiting
Process B kernfs_fop_write(); cgroup_file_write(); cgroup_procs_write(); percpu_down_write(&cgroup_threadgroup_rwsem); //held cgroup_attach_task(); cgroup_migrate(); cgroup_migrate_execute(); cpuset_can_attach(); mutex_lock(&cpuset_mutex); //waiting
Process C kernfs_fop_write(); cgroup_file_write(); cpuset_write_resmask(); mutex_lock(&cpuset_mutex); //held update_cpumask(); update_cpumasks_hier(); rebuild_sched_domains_locked(); get_online_cpus(); percpu_down_read(&cpu_hotplug_lock); //waiting
Eliminating deadlock by reversing the locking order for cpuset_mutex and cpu_hotplug_lock.
Signed-off-by: Prateek Sood prsood@codeaurora.org Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Amit Pundir amit.pundir@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
--- kernel/cgroup/cpuset.c | 53 +++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-)
--- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -817,6 +817,18 @@ done: return ndoms; }
+static void cpuset_sched_change_begin(void) +{ + cpus_read_lock(); + mutex_lock(&cpuset_mutex); +} + +static void cpuset_sched_change_end(void) +{ + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); +} + /* * Rebuild scheduler domains. * @@ -826,16 +838,14 @@ done: * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes get_online_cpus(). */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms;
lockdep_assert_held(&cpuset_mutex); - get_online_cpus();
/* * We have raced with CPU hotplug. Don't do anything to avoid @@ -843,27 +853,25 @@ static void rebuild_sched_domains_locked * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - goto out; + return;
/* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr);
/* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); -out: - put_online_cpus(); } #else /* !CONFIG_SMP */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { } #endif /* CONFIG_SMP */
void rebuild_sched_domains(void) { - mutex_lock(&cpuset_mutex); - rebuild_sched_domains_locked(); - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_begin(); + rebuild_sched_domains_cpuslocked(); + cpuset_sched_change_end(); }
/** @@ -949,7 +957,7 @@ static void update_cpumasks_hier(struct rcu_read_unlock();
if (need_rebuild_sched_domains) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); }
/** @@ -1281,7 +1289,7 @@ static int update_relax_domain_level(str cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); }
return 0; @@ -1314,7 +1322,6 @@ static void update_tasks_flags(struct cp * * Call with cpuset_mutex held. */ - static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { @@ -1347,7 +1354,7 @@ static int update_flag(cpuset_flagbits_t spin_unlock_irq(&callback_lock);
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked();
if (spread_flag_changed) update_tasks_flags(cs); @@ -1615,7 +1622,7 @@ static int cpuset_write_u64(struct cgrou cpuset_filetype_t type = cft->private; int retval = 0;
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -1651,7 +1658,7 @@ static int cpuset_write_u64(struct cgrou break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; }
@@ -1662,7 +1669,7 @@ static int cpuset_write_s64(struct cgrou cpuset_filetype_t type = cft->private; int retval = -ENODEV;
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock;
@@ -1675,7 +1682,7 @@ static int cpuset_write_s64(struct cgrou break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; }
@@ -1714,7 +1721,7 @@ static ssize_t cpuset_write_resmask(stru kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work);
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock;
@@ -1738,7 +1745,7 @@ static ssize_t cpuset_write_resmask(stru
free_trial_cpuset(trialcs); out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2039,14 +2046,14 @@ out_unlock: /* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains_locked(). + * will call rebuild_sched_domains_cpuslocked(). */
static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css);
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin();
if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -2054,7 +2061,7 @@ static void cpuset_css_offline(struct cg cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags);
- mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); }
static void cpuset_css_free(struct cgroup_subsys_state *css)