sched: _cpu_down(): Don't play with current->cpus_allowed
_cpu_down() changes the current task's affinity and then recovers it at the end. The problems are well known: we can't restore old_allowed if it was bound to the now-dead-cpu, and we can race with the userspace which can change cpu-affinity during unplug. _cpu_down() should not play with current->cpus_allowed at all. Instead, take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable() removes the dying cpu from cpu_online_mask. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Rafael J. Wysocki <rjw@sisk.pl> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20100315091023.GA9148@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
committed by
Ingo Molnar
parent
30da688ef6
commit
6a1bdc1b57
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void);
|
|||||||
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
||||||
|
|
||||||
#ifdef CONFIG_HOTPLUG_CPU
|
#ifdef CONFIG_HOTPLUG_CPU
|
||||||
|
extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
|
||||||
extern void idle_task_exit(void);
|
extern void idle_task_exit(void);
|
||||||
#else
|
#else
|
||||||
static inline void idle_task_exit(void) {}
|
static inline void idle_task_exit(void) {}
|
||||||
|
18
kernel/cpu.c
18
kernel/cpu.c
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct take_cpu_down_param {
|
struct take_cpu_down_param {
|
||||||
|
struct task_struct *caller;
|
||||||
unsigned long mod;
|
unsigned long mod;
|
||||||
void *hcpu;
|
void *hcpu;
|
||||||
};
|
};
|
||||||
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
|
|||||||
static int __ref take_cpu_down(void *_param)
|
static int __ref take_cpu_down(void *_param)
|
||||||
{
|
{
|
||||||
struct take_cpu_down_param *param = _param;
|
struct take_cpu_down_param *param = _param;
|
||||||
|
unsigned int cpu = (unsigned long)param->hcpu;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/* Ensure this CPU doesn't handle any more interrupts. */
|
/* Ensure this CPU doesn't handle any more interrupts. */
|
||||||
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
|
|||||||
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
|
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
|
||||||
param->hcpu);
|
param->hcpu);
|
||||||
|
|
||||||
|
if (task_cpu(param->caller) == cpu)
|
||||||
|
move_task_off_dead_cpu(cpu, param->caller);
|
||||||
/* Force idle task to run as soon as we yield: it should
|
/* Force idle task to run as soon as we yield: it should
|
||||||
immediately notice cpu is offline and die quickly. */
|
immediately notice cpu is offline and die quickly. */
|
||||||
sched_idle_next();
|
sched_idle_next();
|
||||||
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
|
|||||||
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||||
{
|
{
|
||||||
int err, nr_calls = 0;
|
int err, nr_calls = 0;
|
||||||
cpumask_var_t old_allowed;
|
|
||||||
void *hcpu = (void *)(long)cpu;
|
void *hcpu = (void *)(long)cpu;
|
||||||
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
|
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
|
||||||
struct take_cpu_down_param tcd_param = {
|
struct take_cpu_down_param tcd_param = {
|
||||||
|
.caller = current,
|
||||||
.mod = mod,
|
.mod = mod,
|
||||||
.hcpu = hcpu,
|
.hcpu = hcpu,
|
||||||
};
|
};
|
||||||
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||||||
if (!cpu_online(cpu))
|
if (!cpu_online(cpu))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
cpu_hotplug_begin();
|
cpu_hotplug_begin();
|
||||||
set_cpu_active(cpu, false);
|
set_cpu_active(cpu, false);
|
||||||
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
|
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
|
||||||
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||||||
goto out_release;
|
goto out_release;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ensure that we are not runnable on dying cpu */
|
|
||||||
cpumask_copy(old_allowed, ¤t->cpus_allowed);
|
|
||||||
set_cpus_allowed_ptr(current, cpu_active_mask);
|
|
||||||
|
|
||||||
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
|
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
|
||||||
if (err) {
|
if (err) {
|
||||||
set_cpu_active(cpu, true);
|
set_cpu_active(cpu, true);
|
||||||
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||||||
hcpu) == NOTIFY_BAD)
|
hcpu) == NOTIFY_BAD)
|
||||||
BUG();
|
BUG();
|
||||||
|
|
||||||
goto out_allowed;
|
goto out_release;
|
||||||
}
|
}
|
||||||
BUG_ON(cpu_online(cpu));
|
BUG_ON(cpu_online(cpu));
|
||||||
|
|
||||||
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||||||
|
|
||||||
check_for_tasks(cpu);
|
check_for_tasks(cpu);
|
||||||
|
|
||||||
out_allowed:
|
|
||||||
set_cpus_allowed_ptr(current, old_allowed);
|
|
||||||
out_release:
|
out_release:
|
||||||
cpu_hotplug_done();
|
cpu_hotplug_done();
|
||||||
if (!err) {
|
if (!err) {
|
||||||
@@ -263,7 +258,6 @@ out_release:
|
|||||||
hcpu) == NOTIFY_BAD)
|
hcpu) == NOTIFY_BAD)
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
free_cpumask_var(old_allowed);
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -5442,7 +5442,7 @@ static int migration_thread(void *data)
|
|||||||
/*
|
/*
|
||||||
* Figure out where task on dead CPU should go, use force if necessary.
|
* Figure out where task on dead CPU should go, use force if necessary.
|
||||||
*/
|
*/
|
||||||
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
|
void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
|
||||||
{
|
{
|
||||||
struct rq *rq = cpu_rq(dead_cpu);
|
struct rq *rq = cpu_rq(dead_cpu);
|
||||||
int needs_cpu, uninitialized_var(dest_cpu);
|
int needs_cpu, uninitialized_var(dest_cpu);
|
||||||
|
Reference in New Issue
Block a user