Merge branch 'core/rcu' into core/rcu-for-linus

This commit is contained in:
Ingo Molnar
2008-07-15 21:10:12 +02:00
33 changed files with 1347 additions and 530 deletions

View File

@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/rculist.h>
#include <linux/bootmem.h>
#include <linux/hash.h>
#include <linux/pid_namespace.h>

View File

@@ -387,6 +387,10 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp,
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
local_irq_disable();
this_rdp->qlen += rdp->qlen;
local_irq_enable();
}
static void rcu_offline_cpu(int cpu)
@@ -516,10 +520,38 @@ void rcu_check_callbacks(int cpu, int user)
if (user ||
(idle_cpu(cpu) && !in_softirq() &&
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
/*
* Get here if this CPU took its interrupt from user
* mode or from the idle loop, and if this is not a
* nested interrupt. In this case, the CPU is in
* a quiescent state, so count it.
*
* Also do a memory barrier. This is needed to handle
* the case where writes from a preempt-disable section
* of code get reordered into schedule() by this CPU's
* write buffer. The memory barrier makes sure that
* the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
* by other CPUs to happen after any such write.
*/
smp_mb(); /* See above block comment. */
rcu_qsctr_inc(cpu);
rcu_bh_qsctr_inc(cpu);
} else if (!in_softirq())
} else if (!in_softirq()) {
/*
* Get here if this CPU did not take its interrupt from
* softirq, in other words, if it is not interrupting
* a rcu_bh read-side critical section. This is an _bh
* critical section, so count it. The memory barrier
* is needed for the same reason as is the above one.
*/
smp_mb(); /* See above block comment. */
rcu_bh_qsctr_inc(cpu);
}
raise_rcu_softirq();
}

View File

@@ -39,16 +39,16 @@
#include <linux/sched.h>
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/completion.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
struct rcu_synchronize {
struct rcu_head head;
struct completion completion;
enum rcu_barrier {
RCU_BARRIER_STD,
RCU_BARRIER_BH,
RCU_BARRIER_SCHED,
};
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
*/
static void wakeme_after_rcu(struct rcu_head *head)
void wakeme_after_rcu(struct rcu_head *head)
{
struct rcu_synchronize *rcu;
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head *head)
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
void synchronize_rcu(void)
{
struct rcu_synchronize rcu;
init_completion(&rcu.completion);
/* Will wake me after RCU finished */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it */
wait_for_completion(&rcu.completion);
}
synchronize_rcu_xxx(synchronize_rcu, call_rcu)
EXPORT_SYMBOL_GPL(synchronize_rcu);
static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
/*
* Called with preemption disabled, and from cross-cpu IRQ context.
*/
static void rcu_barrier_func(void *notused)
static void rcu_barrier_func(void *type)
{
int cpu = smp_processor_id();
struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
atomic_inc(&rcu_barrier_cpu_count);
call_rcu(head, rcu_barrier_callback);
switch ((enum rcu_barrier)type) {
case RCU_BARRIER_STD:
call_rcu(head, rcu_barrier_callback);
break;
case RCU_BARRIER_BH:
call_rcu_bh(head, rcu_barrier_callback);
break;
case RCU_BARRIER_SCHED:
call_rcu_sched(head, rcu_barrier_callback);
break;
}
}
/**
* rcu_barrier - Wait until all the in-flight RCUs are complete.
/*
* Orchestrate the specified type of RCU barrier, waiting for all
* RCU callbacks of the specified type to complete.
*/
void rcu_barrier(void)
static void _rcu_barrier(enum rcu_barrier type)
{
BUG_ON(in_interrupt());
/* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
* until all the callbacks are queued.
*/
rcu_read_lock();
on_each_cpu(rcu_barrier_func, NULL, 0, 1);
on_each_cpu(rcu_barrier_func, (void *)type, 0, 1);
rcu_read_unlock();
wait_for_completion(&rcu_barrier_completion);
mutex_unlock(&rcu_barrier_mutex);
}
/**
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
*/
void rcu_barrier(void)
{
_rcu_barrier(RCU_BARRIER_STD);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
/**
* rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
*/
void rcu_barrier_bh(void)
{
_rcu_barrier(RCU_BARRIER_BH);
}
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
/**
* rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
*/
void rcu_barrier_sched(void)
{
_rcu_barrier(RCU_BARRIER_SCHED);
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
void __init rcu_init(void)
{
__rcu_init();

View File

@@ -46,11 +46,11 @@
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/cpu.h>
#include <linux/random.h>
#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
spinlock_t lock; /* Protect rcu_data fields. */
long completed; /* Number of last completed batch. */
int waitlistcount;
struct tasklet_struct rcu_tasklet;
struct rcu_head *nextlist;
struct rcu_head **nexttail;
struct rcu_head *waitlist[GP_STAGES];
struct rcu_head **waittail[GP_STAGES];
struct rcu_head *donelist;
struct rcu_head *donelist; /* from waitlist & waitschedlist */
struct rcu_head **donetail;
long rcu_flipctr[2];
struct rcu_head *nextschedlist;
struct rcu_head **nextschedtail;
struct rcu_head *waitschedlist;
struct rcu_head **waitschedtail;
int rcu_sched_sleeping;
#ifdef CONFIG_RCU_TRACE
struct rcupreempt_trace trace;
#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
rcu_try_flip_waitmb_state,
};
/*
* States for rcu_ctrlblk.rcu_sched_sleep.
*/
enum rcu_sched_sleep_states {
rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
};
struct rcu_ctrlblk {
spinlock_t fliplock; /* Protect state-machine transitions. */
long completed; /* Number of last completed batch. */
enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
the rcu state machine */
spinlock_t schedlock; /* Protect rcu_sched sleep state. */
enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
};
static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
.completed = 0,
.rcu_try_flip_state = rcu_try_flip_idle_state,
.schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
.sched_sleep = rcu_sched_not_sleeping,
.sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
};
static struct task_struct *rcu_sched_grace_period_task;
#ifdef CONFIG_RCU_TRACE
static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
*/
#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
#define RCU_SCHED_BATCH_TIME (HZ / 50)
/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
}
}
#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
.dynticks = 1,
};
DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
#ifdef CONFIG_NO_HZ
static DEFINE_PER_CPU(int, rcu_update_flag);
/**
* rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
*
* If the CPU was idle with dynamic ticks active, this updates the
* dynticks_progress_counter to let the RCU handling know that the
* rcu_dyntick_sched.dynticks to let the RCU handling know that the
* CPU is active.
*/
void rcu_irq_enter(void)
{
int cpu = smp_processor_id();
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
if (per_cpu(rcu_update_flag, cpu))
per_cpu(rcu_update_flag, cpu)++;
/*
* Only update if we are coming from a stopped ticks mode
* (dynticks_progress_counter is even).
* (rcu_dyntick_sched.dynticks is even).
*/
if (!in_interrupt() &&
(per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
(rdssp->dynticks & 0x1) == 0) {
/*
* The following might seem like we could have a race
* with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
* RCU read-side critical sections on this CPU would
* have already completed.
*/
per_cpu(dynticks_progress_counter, cpu)++;
rdssp->dynticks++;
/*
* The following memory barrier ensures that any
* rcu_read_lock() primitives in the irq handler
* are seen by other CPUs to follow the above
* increment to dynticks_progress_counter. This is
* increment to rcu_dyntick_sched.dynticks. This is
* required in order for other CPUs to correctly
* determine when it is safe to advance the RCU
* grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
smp_mb(); /* see above block comment. */
/*
* Since we can't determine the dynamic tick mode from
* the dynticks_progress_counter after this routine,
* the rcu_dyntick_sched.dynticks after this routine,
* we use a second flag to acknowledge that we came
* from an idle state with ticks stopped.
*/
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
/*
* If we take an NMI/SMI now, they will also increment
* the rcu_update_flag, and will not update the
* dynticks_progress_counter on exit. That is for
* rcu_dyntick_sched.dynticks on exit. That is for
* this IRQ to do.
*/
}
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
* rcu_irq_exit - Called from exiting Hard irq context.
*
* If the CPU was idle with dynamic ticks active, update the
* dynticks_progress_counter to put let the RCU handling be
* rcu_dyntick_sched.dynticks to put let the RCU handling be
* aware that the CPU is going back to idle with no ticks.
*/
void rcu_irq_exit(void)
{
int cpu = smp_processor_id();
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
/*
* rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
* Once this occurs, we keep track of interrupt nesting
* because a NMI/SMI could also come in, and we still
* only want the IRQ that started the increment of the
* dynticks_progress_counter to be the one that modifies
* rcu_dyntick_sched.dynticks to be the one that modifies
* it on exit.
*/
if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
/*
* If an NMI/SMI happens now we are still
* protected by the dynticks_progress_counter being odd.
* protected by the rcu_dyntick_sched.dynticks being odd.
*/
/*
* The following memory barrier ensures that any
* rcu_read_unlock() primitives in the irq handler
* are seen by other CPUs to preceed the following
* increment to dynticks_progress_counter. This
* increment to rcu_dyntick_sched.dynticks. This
* is required in order for other CPUs to determine
* when it is safe to advance the RCU grace-period
* state machine.
*/
smp_mb(); /* see above block comment. */
per_cpu(dynticks_progress_counter, cpu)++;
WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
rdssp->dynticks++;
WARN_ON(rdssp->dynticks & 0x1);
}
}
static void dyntick_save_progress_counter(int cpu)
{
per_cpu(rcu_dyntick_snapshot, cpu) =
per_cpu(dynticks_progress_counter, cpu);
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
rdssp->dynticks_snap = rdssp->dynticks;
}
static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
{
long curr;
long snap;
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
curr = per_cpu(dynticks_progress_counter, cpu);
snap = per_cpu(rcu_dyntick_snapshot, cpu);
curr = rdssp->dynticks;
snap = rdssp->dynticks_snap;
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
/*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
* that this CPU already acknowledged the counter.
*/
if ((curr - snap) > 2 || (snap & 0x1) == 0)
if ((curr - snap) > 2 || (curr & 0x1) == 0)
return 0;
/* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
{
long curr;
long snap;
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
curr = per_cpu(dynticks_progress_counter, cpu);
snap = per_cpu(rcu_dyntick_snapshot, cpu);
curr = rdssp->dynticks;
snap = rdssp->dynticks_snap;
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
/*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
return 1;
}
static void dyntick_save_progress_counter_sched(int cpu)
{
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
rdssp->sched_dynticks_snap = rdssp->dynticks;
}
static int rcu_qsctr_inc_needed_dyntick(int cpu)
{
long curr;
long snap;
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
curr = rdssp->dynticks;
snap = rdssp->sched_dynticks_snap;
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
/*
* If the CPU remained in dynticks mode for the entire time
* and didn't take any interrupts, NMIs, SMIs, or whatever,
* then it cannot be in the middle of an rcu_read_lock(), so
* the next rcu_read_lock() it executes must use the new value
* of the counter. Therefore, this CPU has been in a quiescent
* state the entire time, and we don't need to wait for it.
*/
if ((curr == snap) && ((curr & 0x1) == 0))
return 0;
/*
* If the CPU passed through or entered a dynticks idle phase with
* no active irq handlers, then, as above, this CPU has already
* passed through a quiescent state.
*/
if ((curr - snap) > 2 || (snap & 0x1) == 0)
return 0;
/* We need this CPU to go through a quiescent state. */
return 1;
}
#else /* !CONFIG_NO_HZ */
# define dyntick_save_progress_counter(cpu) do { } while (0)
# define rcu_try_flip_waitack_needed(cpu) (1)
# define rcu_try_flip_waitmb_needed(cpu) (1)
# define dyntick_save_progress_counter(cpu) do { } while (0)
# define rcu_try_flip_waitack_needed(cpu) (1)
# define rcu_try_flip_waitmb_needed(cpu) (1)
# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
#endif /* CONFIG_NO_HZ */
static void save_qsctr_sched(int cpu)
{
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
rdssp->sched_qs_snap = rdssp->sched_qs;
}
static inline int rcu_qsctr_inc_needed(int cpu)
{
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
/*
* If there has been a quiescent state, no more need to wait
* on this CPU.
*/
if (rdssp->sched_qs != rdssp->sched_qs_snap) {
smp_mb(); /* force ordering with cpu entering schedule(). */
return 0;
}
/* We need this CPU to go through a quiescent state. */
return 1;
}
/*
* Get here when RCU is idle. Decide whether we need to
* move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
unsigned long flags;
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
/*
* If this CPU took its interrupt from user mode or from the
* idle loop, and this is not a nested interrupt, then
* this CPU has to have exited all prior preept-disable
* sections of code. So increment the counter to note this.
*
* The memory barrier is needed to handle the case where
* writes from a preempt-disable section of code get reordered
* into schedule() by this CPU's write buffer. So the memory
* barrier makes sure that the rcu_qsctr_inc() is seen by other
* CPUs to happen after any such write.
*/
if (user ||
(idle_cpu(cpu) && !in_softirq() &&
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
smp_mb(); /* Guard against aggressive schedule(). */
rcu_qsctr_inc(cpu);
}
rcu_check_mb(cpu);
if (rcu_ctrlblk.completed == rdp->completed)
rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
struct rcu_head *list = NULL;
unsigned long flags;
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
struct rcu_head *schedlist = NULL;
struct rcu_head **schedtail = &schedlist;
struct rcu_head **tail = &list;
/*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
list, tail);
rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
schedlist, schedtail);
rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
schedlist, schedtail);
rdp->rcu_sched_sleeping = 0;
spin_unlock_irqrestore(&rdp->lock, flags);
rdp->waitlistcount = 0;
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
* fix.
*/
local_irq_save(flags);
local_irq_save(flags); /* disable preempt till we know what lock. */
rdp = RCU_DATA_ME();
spin_lock(&rdp->lock);
*rdp->nexttail = list;
if (list)
rdp->nexttail = tail;
*rdp->nextschedtail = schedlist;
if (schedlist)
rdp->nextschedtail = schedtail;
spin_unlock_irqrestore(&rdp->lock, flags);
}
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
void __cpuinit rcu_online_cpu(int cpu)
{
unsigned long flags;
struct rcu_data *rdp;
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
cpu_set(cpu, rcu_cpu_online_map);
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
/*
* The rcu_sched grace-period processing might have bypassed
* this CPU, given that it was not in the rcu_cpu_online_map
* when the grace-period scan started. This means that the
* grace-period task might sleep. So make sure that if this
* should happen, the first callback posted to this CPU will
* wake up the grace-period task if need be.
*/
rdp = RCU_DATA_CPU(cpu);
spin_lock_irqsave(&rdp->lock, flags);
rdp->rcu_sched_sleeping = 1;
spin_unlock_irqrestore(&rdp->lock, flags);
}
static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,32 +1128,197 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
*rdp->nexttail = head;
rdp->nexttail = &head->next;
RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
spin_unlock(&rdp->lock);
local_irq_restore(flags);
spin_unlock_irqrestore(&rdp->lock, flags);
}
EXPORT_SYMBOL_GPL(call_rcu);
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
unsigned long flags;
struct rcu_data *rdp;
int wake_gp = 0;
head->func = func;
head->next = NULL;
local_irq_save(flags);
rdp = RCU_DATA_ME();
spin_lock(&rdp->lock);
*rdp->nextschedtail = head;
rdp->nextschedtail = &head->next;
if (rdp->rcu_sched_sleeping) {
/* Grace-period processing might be sleeping... */
rdp->rcu_sched_sleeping = 0;
wake_gp = 1;
}
spin_unlock_irqrestore(&rdp->lock, flags);
if (wake_gp) {
/* Wake up grace-period processing, unless someone beat us. */
spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
wake_gp = 0;
rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
if (wake_gp)
wake_up_interruptible(&rcu_ctrlblk.sched_wq);
}
}
EXPORT_SYMBOL_GPL(call_rcu_sched);
/*
* Wait until all currently running preempt_disable() code segments
* (including hardware-irq-disable segments) complete. Note that
* in -rt this does -not- necessarily result in all currently executing
* interrupt -handlers- having completed.
*/
void __synchronize_sched(void)
{
cpumask_t oldmask;
int cpu;
if (sched_getaffinity(0, &oldmask) < 0)
oldmask = cpu_possible_map;
for_each_online_cpu(cpu) {
sched_setaffinity(0, &cpumask_of_cpu(cpu));
schedule();
}
sched_setaffinity(0, &oldmask);
}
synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
EXPORT_SYMBOL_GPL(__synchronize_sched);
/*
* kthread function that manages call_rcu_sched grace periods.
*/
static int rcu_sched_grace_period(void *arg)
{
int couldsleep; /* might sleep after current pass. */
int couldsleepnext = 0; /* might sleep after next pass. */
int cpu;
unsigned long flags;
struct rcu_data *rdp;
int ret;
/*
* Each pass through the following loop handles one
* rcu_sched grace period cycle.
*/
do {
/* Save each CPU's current state. */
for_each_online_cpu(cpu) {
dyntick_save_progress_counter_sched(cpu);
save_qsctr_sched(cpu);
}
/*
* Sleep for about an RCU grace-period's worth to
* allow better batching and to consume less CPU.
*/
schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
/*
* If there was nothing to do last time, prepare to
* sleep at the end of the current grace period cycle.
*/
couldsleep = couldsleepnext;
couldsleepnext = 1;
if (couldsleep) {
spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
}
/*
* Wait on each CPU in turn to have either visited
* a quiescent state or been in dynticks-idle mode.
*/
for_each_online_cpu(cpu) {
while (rcu_qsctr_inc_needed(cpu) &&
rcu_qsctr_inc_needed_dyntick(cpu)) {
/* resched_cpu(cpu); @@@ */
schedule_timeout_interruptible(1);
}
}
/* Advance callbacks for each CPU. */
for_each_online_cpu(cpu) {
rdp = RCU_DATA_CPU(cpu);
spin_lock_irqsave(&rdp->lock, flags);
/*
* We are running on this CPU irq-disabled, so no
* CPU can go offline until we re-enable irqs.
* The current CPU might have already gone
* offline (between the for_each_offline_cpu and
* the spin_lock_irqsave), but in that case all its
* callback lists will be empty, so no harm done.
*
* Advance the callbacks! We share normal RCU's
* donelist, since callbacks are invoked the
* same way in either case.
*/
if (rdp->waitschedlist != NULL) {
*rdp->donetail = rdp->waitschedlist;
rdp->donetail = rdp->waitschedtail;
/*
* Next rcu_check_callbacks() will
* do the required raise_softirq().
*/
}
if (rdp->nextschedlist != NULL) {
rdp->waitschedlist = rdp->nextschedlist;
rdp->waitschedtail = rdp->nextschedtail;
couldsleep = 0;
couldsleepnext = 0;
} else {
rdp->waitschedlist = NULL;
rdp->waitschedtail = &rdp->waitschedlist;
}
rdp->nextschedlist = NULL;
rdp->nextschedtail = &rdp->nextschedlist;
/* Mark sleep intention. */
rdp->rcu_sched_sleeping = couldsleep;
spin_unlock_irqrestore(&rdp->lock, flags);
}
/* If we saw callbacks on the last scan, go deal with them. */
if (!couldsleep)
continue;
/* Attempt to block... */
spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
/*
* Someone posted a callback after we scanned.
* Go take care of it.
*/
spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
couldsleepnext = 0;
continue;
}
/* Block until the next person posts a callback. */
rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
ret = 0;
__wait_event_interruptible(rcu_ctrlblk.sched_wq,
rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
ret);
/*
* Signals would prevent us from sleeping, and we cannot
* do much with them in any case. So flush them.
*/
if (ret)
flush_signals(current);
couldsleepnext = 0;
} while (!kthread_should_stop());
return (0);
}
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
return (rdp->donelist != NULL ||
!!rdp->waitlistcount ||
rdp->nextlist != NULL);
rdp->nextlist != NULL ||
rdp->nextschedlist != NULL ||
rdp->waitschedlist != NULL);
}
int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
if (rdp->donelist != NULL ||
!!rdp->waitlistcount ||
rdp->nextlist != NULL)
rdp->nextlist != NULL ||
rdp->nextschedlist != NULL ||
rdp->waitschedlist != NULL)
return 1;
/* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
rdp->donetail = &rdp->donelist;
rdp->rcu_flipctr[0] = 0;
rdp->rcu_flipctr[1] = 0;
rdp->nextschedlist = NULL;
rdp->nextschedtail = &rdp->nextschedlist;
rdp->waitschedlist = NULL;
rdp->waitschedtail = &rdp->waitschedlist;
rdp->rcu_sched_sleeping = 0;
}
register_cpu_notifier(&rcu_nb);
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
}
/*
* Deprecated, use synchronize_rcu() or synchronize_sched() instead.
* Late-boot-time RCU initialization that must wait until after scheduler
* has been initialized.
*/
void synchronize_kernel(void)
void __init rcu_init_sched(void)
{
synchronize_rcu();
rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
NULL,
"rcu_sched_grace_period");
WARN_ON(IS_ERR(rcu_sched_grace_period_task));
}
#ifdef CONFIG_RCU_TRACE

View File

@@ -38,7 +38,6 @@
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/rcupreempt_trace.h>

View File

@@ -57,7 +57,9 @@ static int stat_interval; /* Interval between stats, in seconds. */
/* Defaults to "only at end of test". */
static int verbose; /* Print more debug info. */
static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@@ -72,6 +74,10 @@ module_param(test_no_idle_hz, bool, 0444);
MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
module_param(shuffle_interval, int, 0444);
MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
module_param(stutter, int, 0444);
MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
module_param(irqreader, int, 0444);
MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -91,6 +97,7 @@ static struct task_struct **fakewriter_tasks;
static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
#define RCU_TORTURE_PIPE_LEN 10
@@ -117,8 +124,18 @@ static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_timers = 0;
static struct list_head rcu_torture_removed;
static int stutter_pause_test = 0;
#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
#define RCUTORTURE_RUNNABLE_INIT 1
#else
#define RCUTORTURE_RUNNABLE_INIT 0
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
/*
* Allocate an element from the rcu_tortures pool.
*/
@@ -179,6 +196,16 @@ rcu_random(struct rcu_random_state *rrsp)
return swahw32(rrsp->rrs_state);
}
static void
rcu_stutter_wait(void)
{
while (stutter_pause_test || !rcutorture_runnable)
if (rcutorture_runnable)
schedule_timeout_interruptible(1);
else
schedule_timeout_interruptible(round_jiffies_relative(HZ));
}
/*
* Operations vector for selecting different types of tests.
*/
@@ -192,7 +219,9 @@ struct rcu_torture_ops {
int (*completed)(void);
void (*deferredfree)(struct rcu_torture *p);
void (*sync)(void);
void (*cb_barrier)(void);
int (*stats)(char *page);
int irqcapable;
char *name;
};
static struct rcu_torture_ops *cur_ops = NULL;
@@ -265,7 +294,9 @@ static struct rcu_torture_ops rcu_ops = {
.completed = rcu_torture_completed,
.deferredfree = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = rcu_barrier,
.stats = NULL,
.irqcapable = 1,
.name = "rcu"
};
@@ -304,7 +335,9 @@ static struct rcu_torture_ops rcu_sync_ops = {
.completed = rcu_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = NULL,
.stats = NULL,
.irqcapable = 1,
.name = "rcu_sync"
};
@@ -364,7 +397,9 @@ static struct rcu_torture_ops rcu_bh_ops = {
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = rcu_barrier_bh,
.stats = NULL,
.irqcapable = 1,
.name = "rcu_bh"
};
@@ -377,7 +412,9 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = NULL,
.stats = NULL,
.irqcapable = 1,
.name = "rcu_bh_sync"
};
@@ -458,6 +495,7 @@ static struct rcu_torture_ops srcu_ops = {
.completed = srcu_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu"
};
@@ -482,12 +520,32 @@ static int sched_torture_completed(void)
return 0;
}
static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
}
static void sched_torture_synchronize(void)
{
synchronize_sched();
}
static struct rcu_torture_ops sched_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.deferredfree = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
.stats = NULL,
.irqcapable = 1,
.name = "sched"
};
static struct rcu_torture_ops sched_ops_sync = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
@@ -496,8 +554,9 @@ static struct rcu_torture_ops sched_ops = {
.completed = sched_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
.stats = NULL,
.name = "sched"
.name = "sched_sync"
};
/*
@@ -537,6 +596,7 @@ rcu_torture_writer(void *arg)
}
rcu_torture_current_version++;
oldbatch = cur_ops->completed();
rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
while (!kthread_should_stop())
@@ -560,6 +620,7 @@ rcu_torture_fakewriter(void *arg)
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
udelay(rcu_random(&rand) & 0x3ff);
cur_ops->sync();
rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
@@ -568,6 +629,52 @@ rcu_torture_fakewriter(void *arg)
return 0;
}
/*
* RCU torture reader from timer handler. Dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
* counter in the element should never be greater than 1, otherwise, the
* RCU implementation is broken.
*/
static void rcu_torture_timer(unsigned long unused)
{
int idx;
int completed;
static DEFINE_RCU_RANDOM(rand);
static DEFINE_SPINLOCK(rand_lock);
struct rcu_torture *p;
int pipe_count;
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference(rcu_torture_current);
if (p == NULL) {
/* Leave because rcu_torture_writer is not yet underway */
cur_ops->readunlock(idx);
return;
}
if (p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
spin_lock(&rand_lock);
cur_ops->readdelay(&rand);
n_rcu_torture_timers++;
spin_unlock(&rand_lock);
preempt_disable();
pipe_count = p->rtort_pipe_count;
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
++__get_cpu_var(rcu_torture_count)[pipe_count];
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
++__get_cpu_var(rcu_torture_batch)[completed];
preempt_enable();
cur_ops->readunlock(idx);
}
/*
* RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
@@ -582,11 +689,18 @@ rcu_torture_reader(void *arg)
DEFINE_RCU_RANDOM(rand);
struct rcu_torture *p;
int pipe_count;
struct timer_list t;
VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
set_user_nice(current, 19);
if (irqreader && cur_ops->irqcapable)
setup_timer_on_stack(&t, rcu_torture_timer, 0);
do {
if (irqreader && cur_ops->irqcapable) {
if (!timer_pending(&t))
mod_timer(&t, 1);
}
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference(rcu_torture_current);
@@ -615,8 +729,11 @@ rcu_torture_reader(void *arg)
preempt_enable();
cur_ops->readunlock(idx);
schedule();
rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
if (irqreader && cur_ops->irqcapable)
del_timer_sync(&t);
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
@@ -647,20 +764,22 @@ rcu_torture_printk(char *page)
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
"rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
"rtmbe: %d",
"rtmbe: %d nt: %ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free),
atomic_read(&n_rcu_torture_mberror));
atomic_read(&n_rcu_torture_mberror),
n_rcu_torture_timers);
if (atomic_read(&n_rcu_torture_mberror) != 0)
cnt += sprintf(&page[cnt], " !!!");
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
if (i > 1) {
cnt += sprintf(&page[cnt], "!!! ");
atomic_inc(&n_rcu_torture_error);
WARN_ON_ONCE(1);
}
cnt += sprintf(&page[cnt], "Reader Pipe: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -785,15 +904,34 @@ rcu_torture_shuffle(void *arg)
return 0;
}
/* Cause the rcutorture test to "stutter", starting and stopping all
* threads periodically.
*/
static int
rcu_torture_stutter(void *arg)
{
VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
do {
schedule_timeout_interruptible(stutter * HZ);
stutter_pause_test = 1;
if (!kthread_should_stop())
schedule_timeout_interruptible(stutter * HZ);
stutter_pause_test = 0;
} while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
return 0;
}
static inline void
rcu_torture_print_module_parms(char *tag)
{
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
"shuffle_interval = %d\n",
"shuffle_interval=%d stutter=%d irqreader=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval);
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader);
}
static void
@@ -802,6 +940,11 @@ rcu_torture_cleanup(void)
int i;
fullstop = 1;
if (stutter_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
kthread_stop(stutter_task);
}
stutter_task = NULL;
if (shuffler_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
kthread_stop(shuffler_task);
@@ -848,7 +991,9 @@ rcu_torture_cleanup(void)
stats_task = NULL;
/* Wait for all RCU callbacks to fire. */
rcu_barrier();
if (cur_ops->cb_barrier != NULL)
cur_ops->cb_barrier();
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
@@ -868,7 +1013,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
&srcu_ops, &sched_ops, };
&srcu_ops, &sched_ops, &sched_ops_sync, };
/* Process args and tell the world that the torturer is on the job. */
for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -988,6 +1133,19 @@ rcu_torture_init(void)
goto unwind;
}
}
if (stutter < 0)
stutter = 0;
if (stutter) {
/* Create the stutter thread */
stutter_task = kthread_run(rcu_torture_stutter, NULL,
"rcu_torture_stutter");
if (IS_ERR(stutter_task)) {
firsterr = PTR_ERR(stutter_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
stutter_task = NULL;
goto unwind;
}
}
return 0;
unwind:

View File

@@ -83,6 +83,9 @@ extern int maps_protect;
extern int sysctl_stat_interval;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
@@ -820,6 +823,16 @@ static struct ctl_table kern_table[] = {
.child = key_sysctls,
},
#endif
#ifdef CONFIG_RCU_TORTURE_TEST
{
.ctl_name = CTL_UNNUMBERED,
.procname = "rcutorture_runnable",
.data = &rcutorture_runnable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt