sched: adjust when cpu_active and cpuset configurations are updated during cpu on/offlining
Currently, when a cpu goes down, cpu_active is cleared before CPU_DOWN_PREPARE starts and cpuset configuration is updated from a default priority cpu notifier. When a cpu is coming up, it's set before CPU_ONLINE but cpuset configuration again is updated from the same cpu notifier. For cpu notifiers, this presents an inconsistent state. Threads which a CPU_DOWN_PREPARE notifier expects to be bound to the CPU can be migrated to other cpus because the cpu is no more inactive. Fix it by updating cpu_active in the highest priority cpu notifier and cpuset configuration in the second highest when a cpu is coming up. Down path is updated similarly. This guarantees that all other cpu notifiers see consistent cpu_active and cpuset configuration. cpuset_track_online_cpus() notifier is converted to cpuset_update_active_cpus() which just updates the configuration and now called from cpuset_cpu_[in]active() notifiers registered from sched_init_smp(). If cpuset is disabled, cpuset_update_active_cpus() degenerates into partition_sched_domains() making separate notifier for !CONFIG_CPUSETS unnecessary. This problem is triggered by cmwq. During CPU_DOWN_PREPARE, hotplug callback creates a kthread and kthread_bind()s it to the target cpu, and the thread is expected to run on that cpu. * Ingo's test discovered __cpuinit/exit markups were incorrect. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Menage <menage@google.com>
This commit is contained in:
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
|
||||
* but making no active use of cpusets.
|
||||
*
|
||||
* This routine ensures that top_cpuset.cpus_allowed tracks
|
||||
* cpu_online_map on each CPU hotplug (cpuhp) event.
|
||||
* cpu_active_mask on each CPU hotplug (cpuhp) event.
|
||||
*
|
||||
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
||||
* before calling generate_sched_domains().
|
||||
*/
|
||||
static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
|
||||
unsigned long phase, void *unused_cpu)
|
||||
void __cpuexit cpuset_update_active_cpus(void)
|
||||
{
|
||||
struct sched_domain_attr *attr;
|
||||
cpumask_var_t *doms;
|
||||
int ndoms;
|
||||
|
||||
switch (phase) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
break;
|
||||
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
cgroup_lock();
|
||||
mutex_lock(&callback_mutex);
|
||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
||||
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
|
||||
|
||||
/* Have scheduler rebuild the domains */
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
|
||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
|
||||
hotcpu_notifier(cpuset_track_online_cpus, 0);
|
||||
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
|
||||
|
||||
cpuset_wq = create_singlethread_workqueue("cpuset");
|
||||
|
Reference in New Issue
Block a user