Merge branch 'devel' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into tracing/urgent
This commit is contained in:
@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
||||
list_del(&cgrp->sibling);
|
||||
spin_lock(&cgrp->dentry->d_lock);
|
||||
d = dget(cgrp->dentry);
|
||||
cgrp->dentry = NULL;
|
||||
spin_unlock(&d->d_lock);
|
||||
|
||||
cgroup_d_remove_dir(d);
|
||||
|
@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
|
||||
#endif
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
|
||||
|
||||
const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
|
||||
EXPORT_SYMBOL(cpu_all_bits);
|
||||
|
@ -397,7 +397,7 @@ struct cfs_rq {
|
||||
* 'curr' points to currently running entity on this cfs_rq.
|
||||
* It is set to NULL otherwise (i.e when none are currently running).
|
||||
*/
|
||||
struct sched_entity *curr, *next;
|
||||
struct sched_entity *curr, *next, *last;
|
||||
|
||||
unsigned long nr_spread_over;
|
||||
|
||||
@ -1805,7 +1805,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
|
||||
/*
|
||||
* Buddy candidates are cache hot:
|
||||
*/
|
||||
if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
|
||||
if (sched_feat(CACHE_HOT_BUDDY) &&
|
||||
(&p->se == cfs_rq_of(&p->se)->next ||
|
||||
&p->se == cfs_rq_of(&p->se)->last))
|
||||
return 1;
|
||||
|
||||
if (p->sched_class != &fair_sched_class)
|
||||
@ -6875,15 +6877,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
||||
struct sched_domain *tmp;
|
||||
|
||||
/* Remove the sched domains which do not contribute to scheduling. */
|
||||
for (tmp = sd; tmp; tmp = tmp->parent) {
|
||||
for (tmp = sd; tmp; ) {
|
||||
struct sched_domain *parent = tmp->parent;
|
||||
if (!parent)
|
||||
break;
|
||||
|
||||
if (sd_parent_degenerate(tmp, parent)) {
|
||||
tmp->parent = parent->parent;
|
||||
if (parent->parent)
|
||||
parent->parent->child = tmp;
|
||||
}
|
||||
} else
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
|
||||
if (sd && sd_degenerate(sd)) {
|
||||
@ -7672,6 +7676,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
||||
error:
|
||||
free_sched_groups(cpu_map, tmpmask);
|
||||
SCHED_CPUMASK_FREE((void *)allmasks);
|
||||
kfree(rd);
|
||||
return -ENOMEM;
|
||||
#endif
|
||||
}
|
||||
|
@ -341,23 +341,20 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
cfs_rq->rb_leftmost = next_node;
|
||||
}
|
||||
|
||||
if (cfs_rq->next == se)
|
||||
cfs_rq->next = NULL;
|
||||
|
||||
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
|
||||
}
|
||||
|
||||
static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return cfs_rq->rb_leftmost;
|
||||
}
|
||||
|
||||
static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);
|
||||
struct rb_node *left = cfs_rq->rb_leftmost;
|
||||
|
||||
if (!left)
|
||||
return NULL;
|
||||
|
||||
return rb_entry(left, struct sched_entity, run_node);
|
||||
}
|
||||
|
||||
static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
||||
static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
|
||||
|
||||
@ -741,6 +738,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cfs_rq->last == se)
|
||||
cfs_rq->last = NULL;
|
||||
|
||||
if (cfs_rq->next == se)
|
||||
cfs_rq->next = NULL;
|
||||
|
||||
if (se != cfs_rq->curr)
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
account_entity_dequeue(cfs_rq, se);
|
||||
@ -794,24 +797,15 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
static int
|
||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
||||
|
||||
static struct sched_entity *
|
||||
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1)
|
||||
return se;
|
||||
|
||||
return cfs_rq->next;
|
||||
}
|
||||
|
||||
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *se = NULL;
|
||||
struct sched_entity *se = __pick_next_entity(cfs_rq);
|
||||
|
||||
if (first_fair(cfs_rq)) {
|
||||
se = __pick_next_entity(cfs_rq);
|
||||
se = pick_next(cfs_rq, se);
|
||||
set_next_entity(cfs_rq, se);
|
||||
}
|
||||
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
|
||||
return cfs_rq->next;
|
||||
|
||||
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
|
||||
return cfs_rq->last;
|
||||
|
||||
return se;
|
||||
}
|
||||
@ -1325,26 +1319,53 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_last_buddy(struct sched_entity *se)
|
||||
{
|
||||
for_each_sched_entity(se)
|
||||
cfs_rq_of(se)->last = se;
|
||||
}
|
||||
|
||||
static void set_next_buddy(struct sched_entity *se)
|
||||
{
|
||||
for_each_sched_entity(se)
|
||||
cfs_rq_of(se)->next = se;
|
||||
}
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||
|
||||
if (unlikely(rt_prio(p->prio))) {
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
|
||||
update_rq_clock(rq);
|
||||
update_curr(cfs_rq);
|
||||
resched_task(curr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(p->sched_class != &fair_sched_class))
|
||||
return;
|
||||
|
||||
if (unlikely(se == pse))
|
||||
return;
|
||||
|
||||
cfs_rq_of(pse)->next = pse;
|
||||
/*
|
||||
* Only set the backward buddy when the current task is still on the
|
||||
* rq. This can happen when a wakeup gets interleaved with schedule on
|
||||
* the ->pre_schedule() or idle_balance() point, either of which can
|
||||
* drop the rq lock.
|
||||
*
|
||||
* Also, during early boot the idle thread is in the fair class, for
|
||||
* obvious reasons its a bad idea to schedule back to the idle thread.
|
||||
*/
|
||||
if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
|
||||
set_last_buddy(se);
|
||||
set_next_buddy(pse);
|
||||
|
||||
/*
|
||||
* We can come here with TIF_NEED_RESCHED already set from new task
|
||||
@ -1396,6 +1417,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
|
||||
do {
|
||||
se = pick_next_entity(cfs_rq);
|
||||
set_next_entity(cfs_rq, se);
|
||||
cfs_rq = group_cfs_rq(se);
|
||||
} while (cfs_rq);
|
||||
|
||||
|
@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1)
|
||||
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
|
||||
SCHED_FEAT(ASYM_EFF_LOAD, 1)
|
||||
SCHED_FEAT(WAKEUP_OVERLAP, 0)
|
||||
SCHED_FEAT(LAST_BUDDY, 1)
|
||||
|
18
kernel/smp.c
18
kernel/smp.c
@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data)
|
||||
{
|
||||
/* Wait for response */
|
||||
do {
|
||||
/*
|
||||
* We need to see the flags store in the IPI handler
|
||||
*/
|
||||
smp_mb();
|
||||
if (!(data->flags & CSD_FLAG_WAIT))
|
||||
break;
|
||||
cpu_relax();
|
||||
@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
|
||||
list_add_tail(&data->list, &dst->list);
|
||||
spin_unlock_irqrestore(&dst->lock, flags);
|
||||
|
||||
/*
|
||||
* Make the list addition visible before sending the ipi.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (ipi)
|
||||
arch_send_call_function_single_ipi(cpu);
|
||||
|
||||
@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void)
|
||||
* Need to see other stores to list head for checking whether
|
||||
* list is empty without holding q->lock
|
||||
*/
|
||||
smp_mb();
|
||||
smp_read_barrier_depends();
|
||||
while (!list_empty(&q->list)) {
|
||||
unsigned int data_flags;
|
||||
|
||||
@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void)
|
||||
/*
|
||||
* See comment on outer loop
|
||||
*/
|
||||
smp_mb();
|
||||
smp_read_barrier_depends();
|
||||
}
|
||||
}
|
||||
|
||||
@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
|
||||
list_add_tail_rcu(&data->csd.list, &call_function_queue);
|
||||
spin_unlock_irqrestore(&call_function_lock, flags);
|
||||
|
||||
/*
|
||||
* Make the list addition visible before sending the ipi.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
/* Send a message to all CPUs in the map */
|
||||
arch_send_call_function_ipi(mask);
|
||||
|
||||
|
149
kernel/timer.c
149
kernel/timer.c
@ -112,6 +112,44 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
|
||||
tbase_get_deferrable(timer->base));
|
||||
}
|
||||
|
||||
static unsigned long round_jiffies_common(unsigned long j, int cpu,
|
||||
bool force_up)
|
||||
{
|
||||
int rem;
|
||||
unsigned long original = j;
|
||||
|
||||
/*
|
||||
* We don't want all cpus firing their timers at once hitting the
|
||||
* same lock or cachelines, so we skew each extra cpu with an extra
|
||||
* 3 jiffies. This 3 jiffies came originally from the mm/ code which
|
||||
* already did this.
|
||||
* The skew is done by adding 3*cpunr, then round, then subtract this
|
||||
* extra offset again.
|
||||
*/
|
||||
j += cpu * 3;
|
||||
|
||||
rem = j % HZ;
|
||||
|
||||
/*
|
||||
* If the target jiffie is just after a whole second (which can happen
|
||||
* due to delays of the timer irq, long irq off times etc etc) then
|
||||
* we should round down to the whole second, not up. Use 1/4th second
|
||||
* as cutoff for this rounding as an extreme upper bound for this.
|
||||
* But never round down if @force_up is set.
|
||||
*/
|
||||
if (rem < HZ/4 && !force_up) /* round down */
|
||||
j = j - rem;
|
||||
else /* round up */
|
||||
j = j - rem + HZ;
|
||||
|
||||
/* now that we have rounded, subtract the extra skew again */
|
||||
j -= cpu * 3;
|
||||
|
||||
if (j <= jiffies) /* rounding ate our timeout entirely; */
|
||||
return original;
|
||||
return j;
|
||||
}
|
||||
|
||||
/**
|
||||
* __round_jiffies - function to round jiffies to a full second
|
||||
* @j: the time in (absolute) jiffies that should be rounded
|
||||
@ -134,38 +172,7 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
|
||||
*/
|
||||
unsigned long __round_jiffies(unsigned long j, int cpu)
|
||||
{
|
||||
int rem;
|
||||
unsigned long original = j;
|
||||
|
||||
/*
|
||||
* We don't want all cpus firing their timers at once hitting the
|
||||
* same lock or cachelines, so we skew each extra cpu with an extra
|
||||
* 3 jiffies. This 3 jiffies came originally from the mm/ code which
|
||||
* already did this.
|
||||
* The skew is done by adding 3*cpunr, then round, then subtract this
|
||||
* extra offset again.
|
||||
*/
|
||||
j += cpu * 3;
|
||||
|
||||
rem = j % HZ;
|
||||
|
||||
/*
|
||||
* If the target jiffie is just after a whole second (which can happen
|
||||
* due to delays of the timer irq, long irq off times etc etc) then
|
||||
* we should round down to the whole second, not up. Use 1/4th second
|
||||
* as cutoff for this rounding as an extreme upper bound for this.
|
||||
*/
|
||||
if (rem < HZ/4) /* round down */
|
||||
j = j - rem;
|
||||
else /* round up */
|
||||
j = j - rem + HZ;
|
||||
|
||||
/* now that we have rounded, subtract the extra skew again */
|
||||
j -= cpu * 3;
|
||||
|
||||
if (j <= jiffies) /* rounding ate our timeout entirely; */
|
||||
return original;
|
||||
return j;
|
||||
return round_jiffies_common(j, cpu, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__round_jiffies);
|
||||
|
||||
@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
|
||||
*/
|
||||
unsigned long __round_jiffies_relative(unsigned long j, int cpu)
|
||||
{
|
||||
/*
|
||||
* In theory the following code can skip a jiffy in case jiffies
|
||||
* increments right between the addition and the later subtraction.
|
||||
* However since the entire point of this function is to use approximate
|
||||
* timeouts, it's entirely ok to not handle that.
|
||||
*/
|
||||
return __round_jiffies(j + jiffies, cpu) - jiffies;
|
||||
unsigned long j0 = jiffies;
|
||||
|
||||
/* Use j0 because jiffies might change while we run */
|
||||
return round_jiffies_common(j + j0, cpu, false) - j0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__round_jiffies_relative);
|
||||
|
||||
@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
|
||||
*/
|
||||
unsigned long round_jiffies(unsigned long j)
|
||||
{
|
||||
return __round_jiffies(j, raw_smp_processor_id());
|
||||
return round_jiffies_common(j, raw_smp_processor_id(), false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(round_jiffies);
|
||||
|
||||
@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(round_jiffies_relative);
|
||||
|
||||
/**
|
||||
* __round_jiffies_up - function to round jiffies up to a full second
|
||||
* @j: the time in (absolute) jiffies that should be rounded
|
||||
* @cpu: the processor number on which the timeout will happen
|
||||
*
|
||||
* This is the same as __round_jiffies() except that it will never
|
||||
* round down. This is useful for timeouts for which the exact time
|
||||
* of firing does not matter too much, as long as they don't fire too
|
||||
* early.
|
||||
*/
|
||||
unsigned long __round_jiffies_up(unsigned long j, int cpu)
|
||||
{
|
||||
return round_jiffies_common(j, cpu, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__round_jiffies_up);
|
||||
|
||||
/**
|
||||
* __round_jiffies_up_relative - function to round jiffies up to a full second
|
||||
* @j: the time in (relative) jiffies that should be rounded
|
||||
* @cpu: the processor number on which the timeout will happen
|
||||
*
|
||||
* This is the same as __round_jiffies_relative() except that it will never
|
||||
* round down. This is useful for timeouts for which the exact time
|
||||
* of firing does not matter too much, as long as they don't fire too
|
||||
* early.
|
||||
*/
|
||||
unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
|
||||
{
|
||||
unsigned long j0 = jiffies;
|
||||
|
||||
/* Use j0 because jiffies might change while we run */
|
||||
return round_jiffies_common(j + j0, cpu, true) - j0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
|
||||
|
||||
/**
|
||||
* round_jiffies_up - function to round jiffies up to a full second
|
||||
* @j: the time in (absolute) jiffies that should be rounded
|
||||
*
|
||||
* This is the same as round_jiffies() except that it will never
|
||||
* round down. This is useful for timeouts for which the exact time
|
||||
* of firing does not matter too much, as long as they don't fire too
|
||||
* early.
|
||||
*/
|
||||
unsigned long round_jiffies_up(unsigned long j)
|
||||
{
|
||||
return round_jiffies_common(j, raw_smp_processor_id(), true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(round_jiffies_up);
|
||||
|
||||
/**
|
||||
* round_jiffies_up_relative - function to round jiffies up to a full second
|
||||
* @j: the time in (relative) jiffies that should be rounded
|
||||
*
|
||||
* This is the same as round_jiffies_relative() except that it will never
|
||||
* round down. This is useful for timeouts for which the exact time
|
||||
* of firing does not matter too much, as long as they don't fire too
|
||||
* early.
|
||||
*/
|
||||
unsigned long round_jiffies_up_relative(unsigned long j)
|
||||
{
|
||||
return __round_jiffies_up_relative(j, raw_smp_processor_id());
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
|
||||
|
||||
|
||||
static inline void set_running_timer(struct tvec_base *base,
|
||||
struct timer_list *timer)
|
||||
|
@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
|
||||
/* Did the write stamp get updated already? */
|
||||
if (unlikely(ts < cpu_buffer->write_stamp))
|
||||
goto again;
|
||||
delta = 0;
|
||||
|
||||
if (test_time_stamp(delta)) {
|
||||
|
||||
|
@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
|
||||
{
|
||||
unsigned long val;
|
||||
char buf[64];
|
||||
int ret;
|
||||
int ret, cpu;
|
||||
struct trace_array *tr = filp->private_data;
|
||||
|
||||
if (cnt >= sizeof(buf))
|
||||
@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* disable all cpu buffers */
|
||||
for_each_tracing_cpu(cpu) {
|
||||
if (global_trace.data[cpu])
|
||||
atomic_inc(&global_trace.data[cpu]->disabled);
|
||||
if (max_tr.data[cpu])
|
||||
atomic_inc(&max_tr.data[cpu]->disabled);
|
||||
}
|
||||
|
||||
if (val != global_trace.entries) {
|
||||
ret = ring_buffer_resize(global_trace.buffer, val);
|
||||
if (ret < 0) {
|
||||
@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
|
||||
if (tracing_disabled)
|
||||
cnt = -ENOMEM;
|
||||
out:
|
||||
for_each_tracing_cpu(cpu) {
|
||||
if (global_trace.data[cpu])
|
||||
atomic_dec(&global_trace.data[cpu]->disabled);
|
||||
if (max_tr.data[cpu])
|
||||
atomic_dec(&max_tr.data[cpu]->disabled);
|
||||
}
|
||||
|
||||
max_tr.entries = global_trace.entries;
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
|
@ -970,6 +970,51 @@ undo:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
struct work_for_cpu {
|
||||
struct work_struct work;
|
||||
long (*fn)(void *);
|
||||
void *arg;
|
||||
long ret;
|
||||
};
|
||||
|
||||
static void do_work_for_cpu(struct work_struct *w)
|
||||
{
|
||||
struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
|
||||
|
||||
wfc->ret = wfc->fn(wfc->arg);
|
||||
}
|
||||
|
||||
/**
|
||||
* work_on_cpu - run a function in user context on a particular cpu
|
||||
* @cpu: the cpu to run on
|
||||
* @fn: the function to run
|
||||
* @arg: the function arg
|
||||
*
|
||||
* This will return -EINVAL in the cpu is not online, or the return value
|
||||
* of @fn otherwise.
|
||||
*/
|
||||
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
|
||||
{
|
||||
struct work_for_cpu wfc;
|
||||
|
||||
INIT_WORK(&wfc.work, do_work_for_cpu);
|
||||
wfc.fn = fn;
|
||||
wfc.arg = arg;
|
||||
get_online_cpus();
|
||||
if (unlikely(!cpu_online(cpu)))
|
||||
wfc.ret = -EINVAL;
|
||||
else {
|
||||
schedule_work_on(cpu, &wfc.work);
|
||||
flush_work(&wfc.work);
|
||||
}
|
||||
put_online_cpus();
|
||||
|
||||
return wfc.ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(work_on_cpu);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
void __init init_workqueues(void)
|
||||
{
|
||||
cpu_populated_map = cpu_online_map;
|
||||
|
Reference in New Issue
Block a user