Merge branches 'sched/urgent' and 'sched/rt' into sched/devel

This commit is contained in:
Ingo Molnar
2008-09-23 16:23:05 +02:00
178 changed files with 1999 additions and 1320 deletions

View File

@@ -843,37 +843,25 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
/**
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
*
* Called with cgroup_mutex held
*
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
* calling callback functions for each.
*
* Return 0 if successful, -errno if not.
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
* if @heap != NULL.
*/
static int update_tasks_cpumask(struct cpuset *cs)
static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
{
struct cgroup_scanner scan;
struct ptr_heap heap;
int retval;
/*
* cgroup_scan_tasks() will initialize heap->gt for us.
* heap_init() is still needed here for we should not change
* cs->cpus_allowed when heap_init() fails.
*/
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
scan.cg = cs->css.cgroup;
scan.test_task = cpuset_test_cpumask;
scan.process_task = cpuset_change_cpumask;
scan.heap = &heap;
retval = cgroup_scan_tasks(&scan);
heap_free(&heap);
return retval;
scan.heap = heap;
cgroup_scan_tasks(&scan);
}
/**
@@ -883,6 +871,7 @@ static int update_tasks_cpumask(struct cpuset *cs)
*/
static int update_cpumask(struct cpuset *cs, const char *buf)
{
struct ptr_heap heap;
struct cpuset trialcs;
int retval;
int is_load_balanced;
@@ -917,6 +906,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
return 0;
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
is_load_balanced = is_sched_load_balance(&trialcs);
mutex_lock(&callback_mutex);
@@ -927,9 +920,9 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
* Scan tasks in the cpuset, and update the cpumasks of any
* that need an update.
*/
retval = update_tasks_cpumask(cs);
if (retval < 0)
return retval;
update_tasks_cpumask(cs, &heap);
heap_free(&heap);
if (is_load_balanced)
async_rebuild_sched_domains();
@@ -1965,7 +1958,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
else {
update_tasks_cpumask(cp);
update_tasks_cpumask(cp, NULL);
update_tasks_nodemask(cp, &oldmems);
}
}

View File

@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
}
static inline int rt_bandwidth_enabled(void)
{
return sysctl_sched_rt_runtime >= 0;
}
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
{
ktime_t now;
if (rt_b->rt_runtime == RUNTIME_INF)
if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
return;
if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
#endif /* CONFIG_RT_GROUP_SCHED */
#else /* !CONFIG_FAIR_GROUP_SCHED */
#else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group
#endif /* CONFIG_FAIR_GROUP_SCHED */
#endif /* CONFIG_USER_SCHED */
/* task_group_lock serializes add/remove of task groups and also changes to
* a task group's cpu shares.
@@ -1087,7 +1092,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_DONE;
}
static void init_hrtick(void)
static __init void init_hrtick(void)
{
hotcpu_notifier(hotplug_hrtick, 0);
}
@@ -1380,6 +1385,51 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
update_load_sub(&rq->load, load);
}
#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
typedef int (*tg_visitor)(struct task_group *, void *);
/*
* Iterate the full tree, calling @down when first entering a node and @up when
* leaving it for the final time.
*/
static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
{
struct task_group *parent, *child;
int ret;
rcu_read_lock();
parent = &root_task_group;
down:
ret = (*down)(parent, data);
if (ret)
goto out_unlock;
list_for_each_entry_rcu(child, &parent->children, siblings) {
parent = child;
goto down;
up:
continue;
}
ret = (*up)(parent, data);
if (ret)
goto out_unlock;
child = parent;
parent = parent->parent;
if (parent)
goto up;
out_unlock:
rcu_read_unlock();
return ret;
}
static int tg_nop(struct task_group *tg, void *data)
{
return 0;
}
#endif
#ifdef CONFIG_SMP
static unsigned long source_load(int cpu, int type);
static unsigned long target_load(int cpu, int type);
@@ -1397,37 +1447,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
#ifdef CONFIG_FAIR_GROUP_SCHED
typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
/*
* Iterate the full tree, calling @down when first entering a node and @up when
* leaving it for the final time.
*/
static void
walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
{
struct task_group *parent, *child;
rcu_read_lock();
parent = &root_task_group;
down:
(*down)(parent, cpu, sd);
list_for_each_entry_rcu(child, &parent->children, siblings) {
parent = child;
goto down;
up:
continue;
}
(*up)(parent, cpu, sd);
child = parent;
parent = parent->parent;
if (parent)
goto up;
rcu_read_unlock();
}
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
/*
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
* This needs to be done in a bottom-up fashion because the rq weight of a
* parent group depends on the shares of its child groups.
*/
static void
tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
static int tg_shares_up(struct task_group *tg, void *data)
{
unsigned long rq_weight = 0;
unsigned long shares = 0;
struct sched_domain *sd = data;
int i;
for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
__update_group_shares_cpu(tg, i, shares, rq_weight);
spin_unlock_irqrestore(&rq->lock, flags);
}
return 0;
}
/*
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
* This needs to be done in a top-down fashion because the load of a child
* group is a fraction of its parents load.
*/
static void
tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
static int tg_load_down(struct task_group *tg, void *data)
{
unsigned long load;
long cpu = (long)data;
if (!tg->parent) {
load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
}
tg->cfs_rq[cpu]->h_load = load;
}
static void
tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
{
return 0;
}
static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
sd->last_update = now;
walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
walk_tg_tree(tg_nop, tg_shares_up, sd);
}
}
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
spin_lock(&rq->lock);
}
static void update_h_load(int cpu)
static void update_h_load(long cpu)
{
walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
}
#else
@@ -5171,7 +5189,8 @@ recheck:
* Do not allow realtime tasks into groups that have no runtime
* assigned.
*/
if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
if (rt_bandwidth_enabled() && rt_policy(policy) &&
task_group(p)->rt_bandwidth.rt_runtime == 0)
return -EPERM;
#endif
@@ -8808,75 +8827,79 @@ static DEFINE_MUTEX(rt_constraints_mutex);
static unsigned long to_ratio(u64 period, u64 runtime)
{
if (runtime == RUNTIME_INF)
return 1ULL << 16;
return 1ULL << 20;
return div64_u64(runtime << 16, period);
return div64_u64(runtime << 20, period);
}
#ifdef CONFIG_CGROUP_SCHED
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
struct task_group *tgi, *parent = tg->parent;
unsigned long total = 0;
if (!parent) {
if (global_rt_period() < period)
return 0;
return to_ratio(period, runtime) <
to_ratio(global_rt_period(), global_rt_runtime());
}
if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
return 0;
rcu_read_lock();
list_for_each_entry_rcu(tgi, &parent->children, siblings) {
if (tgi == tg)
continue;
total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
tgi->rt_bandwidth.rt_runtime);
}
rcu_read_unlock();
return total + to_ratio(period, runtime) <=
to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
parent->rt_bandwidth.rt_runtime);
}
#elif defined CONFIG_USER_SCHED
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
struct task_group *tgi;
unsigned long total = 0;
unsigned long global_ratio =
to_ratio(global_rt_period(), global_rt_runtime());
rcu_read_lock();
list_for_each_entry_rcu(tgi, &task_groups, list) {
if (tgi == tg)
continue;
total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
tgi->rt_bandwidth.rt_runtime);
}
rcu_read_unlock();
return total + to_ratio(period, runtime) < global_ratio;
}
#endif
/* Must be called with tasklist_lock held */
static inline int tg_has_rt_tasks(struct task_group *tg)
{
struct task_struct *g, *p;
do_each_thread(g, p) {
if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
return 1;
} while_each_thread(g, p);
return 0;
}
struct rt_schedulable_data {
struct task_group *tg;
u64 rt_period;
u64 rt_runtime;
};
static int tg_schedulable(struct task_group *tg, void *data)
{
struct rt_schedulable_data *d = data;
struct task_group *child;
unsigned long total, sum = 0;
u64 period, runtime;
period = ktime_to_ns(tg->rt_bandwidth.rt_period);
runtime = tg->rt_bandwidth.rt_runtime;
if (tg == d->tg) {
period = d->rt_period;
runtime = d->rt_runtime;
}
if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
return -EBUSY;
total = to_ratio(period, runtime);
list_for_each_entry_rcu(child, &tg->children, siblings) {
period = ktime_to_ns(child->rt_bandwidth.rt_period);
runtime = child->rt_bandwidth.rt_runtime;
if (child == d->tg) {
period = d->rt_period;
runtime = d->rt_runtime;
}
sum += to_ratio(period, runtime);
}
if (sum > total)
return -EINVAL;
return 0;
}
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
struct rt_schedulable_data data = {
.tg = tg,
.rt_period = period,
.rt_runtime = runtime,
};
return walk_tg_tree(tg_schedulable, tg_nop, &data);
}
static int tg_set_bandwidth(struct task_group *tg,
u64 rt_period, u64 rt_runtime)
{
@@ -8884,14 +8907,9 @@ static int tg_set_bandwidth(struct task_group *tg,
mutex_lock(&rt_constraints_mutex);
read_lock(&tasklist_lock);
if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
err = -EBUSY;
err = __rt_schedulable(tg, rt_period, rt_runtime);
if (err)
goto unlock;
}
if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
err = -EINVAL;
goto unlock;
}
spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8964,12 +8982,16 @@ static int sched_rt_global_constraints(void)
u64 rt_runtime, rt_period;
int ret = 0;
if (sysctl_sched_rt_period <= 0)
return -EINVAL;
rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
rt_runtime = tg->rt_bandwidth.rt_runtime;
mutex_lock(&rt_constraints_mutex);
if (!__rt_schedulable(tg, rt_period, rt_runtime))
ret = -EINVAL;
read_lock(&tasklist_lock);
ret = __rt_schedulable(tg, rt_period, rt_runtime);
read_unlock(&tasklist_lock);
mutex_unlock(&rt_constraints_mutex);
return ret;
@@ -8980,6 +9002,9 @@ static int sched_rt_global_constraints(void)
unsigned long flags;
int i;
if (sysctl_sched_rt_period <= 0)
return -EINVAL;
spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
for_each_possible_cpu(i) {
struct rt_rq *rt_rq = &cpu_rq(i)->rt;

View File

@@ -350,6 +350,7 @@ static void __enable_runtime(struct rq *rq)
spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_runtime = rt_b->rt_runtime;
rt_rq->rt_time = 0;
rt_rq->rt_throttled = 0;
spin_unlock(&rt_rq->rt_runtime_lock);
spin_unlock(&rt_b->rt_runtime_lock);
}
@@ -388,7 +389,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
int i, idle = 1;
cpumask_t span;
if (rt_b->rt_runtime == RUNTIME_INF)
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return 1;
span = sched_rt_period_mask();
@@ -486,6 +487,9 @@ static void update_curr_rt(struct rq *rq)
curr->se.exec_start = rq->clock;
cpuacct_charge(curr, delta_exec);
if (!rt_bandwidth_enabled())
return;
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);

View File

@@ -71,6 +71,16 @@ void clockevents_set_mode(struct clock_event_device *dev,
}
}
/**
* clockevents_shutdown - shutdown the device and clear next_event
* @dev: device to shutdown
*/
void clockevents_shutdown(struct clock_event_device *dev)
{
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
dev->next_event.tv64 = KTIME_MAX;
}
/**
* clockevents_program_event - Reprogram the clock event device.
* @expires: absolute expiry time (monotonic clock)
@@ -206,7 +216,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
if (new) {
BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
clockevents_set_mode(new, CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(new);
}
local_irq_restore(flags);
}

View File

@@ -236,8 +236,7 @@ static void tick_do_broadcast_on_off(void *why)
if (!cpu_isset(cpu, tick_broadcast_mask)) {
cpu_set(cpu, tick_broadcast_mask);
if (td->mode == TICKDEV_MODE_PERIODIC)
clockevents_set_mode(dev,
CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(dev);
}
if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
tick_broadcast_force = 1;
@@ -254,7 +253,7 @@ static void tick_do_broadcast_on_off(void *why)
if (cpus_empty(tick_broadcast_mask)) {
if (!bc_stopped)
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(bc);
} else if (bc_stopped) {
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
@@ -306,7 +305,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpus_empty(tick_broadcast_mask))
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(bc);
}
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@ -321,7 +320,7 @@ void tick_suspend_broadcast(void)
bc = tick_broadcast_device.evtdev;
if (bc)
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(bc);
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}

View File

@@ -249,7 +249,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_set_mode(curdev, CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);
@@ -311,7 +311,7 @@ static void tick_suspend(void)
unsigned long flags;
spin_lock_irqsave(&tick_device_lock, flags);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
clockevents_shutdown(td->evtdev);
spin_unlock_irqrestore(&tick_device_lock, flags);
}

View File

@@ -10,6 +10,8 @@ extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
extern void clockevents_shutdown(struct clock_event_device *dev);
/*
* NO_HZ / high resolution timer shared code
*/

View File

@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
}
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
unsigned long rt_runtime;
int rc;
sscanf(buf, "%lu", &rt_runtime);
sscanf(buf, "%ld", &rt_runtime);
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);