Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
  cpu: Export cpu_up()
  rcu: Apply ACCESS_ONCE() to rcu_boost() return value
  Revert "rcu: Permit rt_mutex_unlock() with irqs disabled"
  docs: Additional LWN links to RCU API
  rcu: Augment rcu_batch_end tracing for idle and callback state
  rcu: Add rcutorture tests for srcu_read_lock_raw()
  rcu: Make rcutorture test for hotpluggability before offlining CPUs
  driver-core/cpu: Expose hotpluggability to the rest of the kernel
  rcu: Remove redundant rcu_cpu_stall_suppress declaration
  rcu: Adaptive dyntick-idle preparation
  rcu: Keep invoking callbacks if CPU otherwise idle
  rcu: Irq nesting is always 0 on rcu_enter_idle_common
  rcu: Don't check irq nesting from rcu idle entry/exit
  rcu: Permit dyntick-idle with callbacks pending
  rcu: Document same-context read-side constraints
  rcu: Identify dyntick-idle CPUs on first force_quiescent_state() pass
  rcu: Remove dynticks false positives and RCU failures
  rcu: Reduce latency of rcu_prepare_for_idle()
  rcu: Eliminate RCU_FAST_NO_HZ grace-period hang
  rcu: Avoid needlessly IPIing CPUs at GP end
  ...
This commit is contained in:
Linus Torvalds
2012-01-06 08:02:40 -08:00
58 changed files with 1520 additions and 415 deletions

View File

@ -380,6 +380,7 @@ out:
cpu_maps_update_done();
return err;
}
EXPORT_SYMBOL_GPL(cpu_up);
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;

View File

@ -636,7 +636,7 @@ char kdb_task_state_char (const struct task_struct *p)
(p->exit_state & EXIT_ZOMBIE) ? 'Z' :
(p->exit_state & EXIT_DEAD) ? 'E' :
(p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
if (p->pid == 0) {
if (is_idle_task(p)) {
/* Idle task. Is it really idle, apart from the kdb
* interrupt? */
if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {

View File

@ -5366,7 +5366,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && current->pid == 0))
if (!(event->attr.exclude_idle && is_idle_task(current)))
if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}

View File

@ -4181,6 +4181,28 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
printk("%s:%d %s!\n", file, line, s);
printk("\nother info that might help us debug this:\n\n");
printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
/*
* If a CPU is in the RCU-free window in idle (ie: in the section
* between rcu_idle_enter() and rcu_idle_exit(), then RCU
* considers that CPU to be in an "extended quiescent state",
* which means that RCU will be completely ignoring that CPU.
* Therefore, rcu_read_lock() and friends have absolutely no
* effect on a CPU running in that state. In other words, even if
* such an RCU-idle CPU has called rcu_read_lock(), RCU might well
* delete data structures out from under it. RCU really has no
* choice here: we need to keep an RCU-free window in idle where
* the CPU may possibly enter into low power mode. This way we can
* notice an extended quiescent state to other CPUs that started a grace
* period. Otherwise we would delay any grace period as long as we run
* in the idle task.
*
* So complain bitterly if someone does call rcu_read_lock(),
* rcu_read_lock_bh() and so on from extended quiescent states.
*/
if (rcu_is_cpu_idle())
printk("RCU used illegally from extended quiescent state!\n");
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();

View File

@ -29,6 +29,13 @@
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/*
* Process-level increment to ->dynticks_nesting field. This allows for
* architectures that use half-interrupts and half-exceptions from
* process context.
*/
#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1)
/*
* debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
* by call_rcu() and rcu callback execution, and are therefore not part of the

View File

@ -93,6 +93,8 @@ int rcu_read_lock_bh_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
if (rcu_is_cpu_idle())
return 0;
return in_softirq() || irqs_disabled();
}
EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
@ -316,3 +318,13 @@ struct debug_obj_descr rcuhead_debug_descr = {
};
EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
{
trace_rcu_torture_read(rcutorturename, rhp);
}
EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
#else
#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
#endif

View File

@ -53,31 +53,137 @@ static void __call_rcu(struct rcu_head *head,
#include "rcutiny_plugin.h"
#ifdef CONFIG_NO_HZ
static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
static long rcu_dynticks_nesting = 1;
/*
* Enter dynticks-idle mode, which is an extended quiescent state
* if we have fully entered that mode (i.e., if the new value of
* dynticks_nesting is zero).
*/
void rcu_enter_nohz(void)
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
static void rcu_idle_enter_common(long long oldval)
{
if (--rcu_dynticks_nesting == 0)
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
if (rcu_dynticks_nesting) {
RCU_TRACE(trace_rcu_dyntick("--=",
oldval, rcu_dynticks_nesting));
return;
}
RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting));
if (!is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
oldval, rcu_dynticks_nesting));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
}
/*
* Exit dynticks-idle mode, so that we are no longer in an extended
* quiescent state.
* Enter idle, which is an extended quiescent state if we have fully
* entered that mode (i.e., if the new value of dynticks_nesting is zero).
*/
void rcu_exit_nohz(void)
void rcu_idle_enter(void)
{
unsigned long flags;
long long oldval;
local_irq_save(flags);
oldval = rcu_dynticks_nesting;
rcu_dynticks_nesting = 0;
rcu_idle_enter_common(oldval);
local_irq_restore(flags);
}
/*
* Exit an interrupt handler towards idle.
*/
void rcu_irq_exit(void)
{
unsigned long flags;
long long oldval;
local_irq_save(flags);
oldval = rcu_dynticks_nesting;
rcu_dynticks_nesting--;
WARN_ON_ONCE(rcu_dynticks_nesting < 0);
rcu_idle_enter_common(oldval);
local_irq_restore(flags);
}
/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
static void rcu_idle_exit_common(long long oldval)
{
if (oldval) {
RCU_TRACE(trace_rcu_dyntick("++=",
oldval, rcu_dynticks_nesting));
return;
}
RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting));
if (!is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
oldval, rcu_dynticks_nesting));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
}
/*
* Exit idle, so that we are no longer in an extended quiescent state.
*/
void rcu_idle_exit(void)
{
unsigned long flags;
long long oldval;
local_irq_save(flags);
oldval = rcu_dynticks_nesting;
WARN_ON_ONCE(oldval != 0);
rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}
/*
* Enter an interrupt handler, moving away from idle.
*/
void rcu_irq_enter(void)
{
unsigned long flags;
long long oldval;
local_irq_save(flags);
oldval = rcu_dynticks_nesting;
rcu_dynticks_nesting++;
WARN_ON_ONCE(rcu_dynticks_nesting == 0);
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}
#endif /* #ifdef CONFIG_NO_HZ */
#ifdef CONFIG_PROVE_RCU
/*
* Test whether RCU thinks that the current CPU is idle.
*/
int rcu_is_cpu_idle(void)
{
return !rcu_dynticks_nesting;
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
#endif /* #ifdef CONFIG_PROVE_RCU */
/*
* Test whether the current CPU was interrupted from idle. Nested
* interrupts don't count, we must be running at the first interrupt
* level.
*/
int rcu_is_cpu_rrupt_from_idle(void)
{
return rcu_dynticks_nesting <= 0;
}
/*
* Helper function for rcu_sched_qs() and rcu_bh_qs().
@ -126,14 +232,13 @@ void rcu_bh_qs(int cpu)
/*
* Check to see if the scheduling-clock interrupt came from an extended
* quiescent state, and, if so, tell RCU about it.
* quiescent state, and, if so, tell RCU about it. This function must
* be called from hardirq context. It is normally called from the
* scheduling-clock interrupt.
*/
void rcu_check_callbacks(int cpu, int user)
{
if (user ||
(idle_cpu(cpu) &&
!in_softirq() &&
hardirq_count() <= (1 << HARDIRQ_SHIFT)))
if (user || rcu_is_cpu_rrupt_from_idle())
rcu_sched_qs(cpu);
else if (!in_softirq())
rcu_bh_qs(cpu);
@ -154,7 +259,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail) {
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
RCU_TRACE(trace_rcu_batch_end(rcp->name, 0));
RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
ACCESS_ONCE(rcp->rcucblist),
need_resched(),
is_idle_task(current),
rcu_is_callbacks_kthread()));
return;
}
@ -183,7 +292,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
RCU_TRACE(cb_count++);
}
RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count));
RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
is_idle_task(current),
rcu_is_callbacks_kthread()));
}
static void rcu_process_callbacks(struct softirq_action *unused)

View File

@ -312,8 +312,8 @@ static int rcu_boost(void)
rt_mutex_lock(&mtx);
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
return rcu_preempt_ctrlblk.boost_tasks != NULL ||
rcu_preempt_ctrlblk.exp_tasks != NULL;
return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
}
/*
@ -885,6 +885,19 @@ static void invoke_rcu_callbacks(void)
wake_up(&rcu_kthread_wq);
}
#ifdef CONFIG_RCU_TRACE
/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
static bool rcu_is_callbacks_kthread(void)
{
return rcu_kthread_task == current;
}
#endif /* #ifdef CONFIG_RCU_TRACE */
/*
* This kthread invokes RCU callbacks whose grace periods have
* elapsed. It is awakened as needed, and takes the place of the
@ -938,6 +951,18 @@ void invoke_rcu_callbacks(void)
raise_softirq(RCU_SOFTIRQ);
}
#ifdef CONFIG_RCU_TRACE
/*
* There is no callback kthread, so this thread is never it.
*/
static bool rcu_is_callbacks_kthread(void)
{
return false;
}
#endif /* #ifdef CONFIG_RCU_TRACE */
void rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

View File

@ -61,9 +61,11 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff = 0; /* Hold time within burst (us). */
static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
@ -91,6 +93,10 @@ module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
module_param(onoff_interval, int, 0444);
MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
module_param(shutdown_secs, int, 0444);
MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable.");
module_param(test_boost, int, 0444);
MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
module_param(test_boost_interval, int, 0444);
@ -119,6 +125,10 @@ static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
static struct task_struct *fqs_task;
static struct task_struct *boost_tasks[NR_CPUS];
static struct task_struct *shutdown_task;
#ifdef CONFIG_HOTPLUG_CPU
static struct task_struct *onoff_task;
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#define RCU_TORTURE_PIPE_LEN 10
@ -149,6 +159,10 @@ static long n_rcu_torture_boost_rterror;
static long n_rcu_torture_boost_failure;
static long n_rcu_torture_boosts;
static long n_rcu_torture_timers;
static long n_offline_attempts;
static long n_offline_successes;
static long n_online_attempts;
static long n_online_successes;
static struct list_head rcu_torture_removed;
static cpumask_var_t shuffle_tmp_mask;
@ -160,6 +174,8 @@ static int stutter_pause_test;
#define RCUTORTURE_RUNNABLE_INIT 0
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
module_param(rcutorture_runnable, int, 0444);
MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
#define rcu_can_boost() 1
@ -167,6 +183,7 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
#define rcu_can_boost() 0
#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
static unsigned long shutdown_time; /* jiffies to system shutdown. */
static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
@ -182,6 +199,9 @@ static int fullstop = FULLSTOP_RMMOD;
*/
static DEFINE_MUTEX(fullstop_mutex);
/* Forward reference. */
static void rcu_torture_cleanup(void);
/*
* Detect and respond to a system shutdown.
*/
@ -612,6 +632,30 @@ static struct rcu_torture_ops srcu_ops = {
.name = "srcu"
};
static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
{
return srcu_read_lock_raw(&srcu_ctl);
}
static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
{
srcu_read_unlock_raw(&srcu_ctl, idx);
}
static struct rcu_torture_ops srcu_raw_ops = {
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock_raw,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu_raw"
};
static void srcu_torture_synchronize_expedited(void)
{
synchronize_srcu_expedited(&srcu_ctl);
@ -913,6 +957,18 @@ rcu_torture_fakewriter(void *arg)
return 0;
}
void rcutorture_trace_dump(void)
{
static atomic_t beenhere = ATOMIC_INIT(0);
if (atomic_read(&beenhere))
return;
if (atomic_xchg(&beenhere, 1) != 0)
return;
do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
ftrace_dump(DUMP_ALL);
}
/*
* RCU torture reader from timer handler. Dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
@ -934,6 +990,7 @@ static void rcu_torture_timer(unsigned long unused)
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
if (p == NULL) {
/* Leave because rcu_torture_writer is not yet underway */
cur_ops->readunlock(idx);
@ -951,6 +1008,8 @@ static void rcu_torture_timer(unsigned long unused)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
if (pipe_count > 1)
rcutorture_trace_dump();
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
@ -994,6 +1053,7 @@ rcu_torture_reader(void *arg)
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
cur_ops->readunlock(idx);
@ -1009,6 +1069,8 @@ rcu_torture_reader(void *arg)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
if (pipe_count > 1)
rcutorture_trace_dump();
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
@ -1056,7 +1118,8 @@ rcu_torture_printk(char *page)
cnt += sprintf(&page[cnt],
"rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
"rtmbe: %d rtbke: %ld rtbre: %ld "
"rtbf: %ld rtb: %ld nt: %ld",
"rtbf: %ld rtb: %ld nt: %ld "
"onoff: %ld/%ld:%ld/%ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
@ -1068,7 +1131,11 @@ rcu_torture_printk(char *page)
n_rcu_torture_boost_rterror,
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
n_rcu_torture_timers);
n_rcu_torture_timers,
n_online_successes,
n_online_attempts,
n_offline_successes,
n_offline_attempts);
if (atomic_read(&n_rcu_torture_mberror) != 0 ||
n_rcu_torture_boost_ktrerror != 0 ||
n_rcu_torture_boost_rterror != 0 ||
@ -1232,12 +1299,14 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
"shuffle_interval=%d stutter=%d irqreader=%d "
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
"test_boost_duration=%d\n",
"test_boost_duration=%d shutdown_secs=%d "
"onoff_interval=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
test_boost_interval, test_boost_duration);
test_boost_interval, test_boost_duration, shutdown_secs,
onoff_interval);
}
static struct notifier_block rcutorture_shutdown_nb = {
@ -1287,6 +1356,131 @@ static int rcutorture_booster_init(int cpu)
return 0;
}
/*
* Cause the rcutorture test to shutdown the system after the test has
* run for the time specified by the shutdown_secs module parameter.
*/
static int
rcu_torture_shutdown(void *arg)
{
long delta;
unsigned long jiffies_snap;
VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started");
jiffies_snap = ACCESS_ONCE(jiffies);
while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
!kthread_should_stop()) {
delta = shutdown_time - jiffies_snap;
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
"rcu_torture_shutdown task: %lu "
"jiffies remaining\n",
torture_type, delta);
schedule_timeout_interruptible(delta);
jiffies_snap = ACCESS_ONCE(jiffies);
}
if (kthread_should_stop()) {
VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping");
return 0;
}
/* OK, shut down the system. */
VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system");
shutdown_task = NULL; /* Avoid self-kill deadlock. */
rcu_torture_cleanup(); /* Get the success/failure message. */
kernel_power_off(); /* Shut down the system. */
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* Execute random CPU-hotplug operations at the interval specified
* by the onoff_interval.
*/
static int
rcu_torture_onoff(void *arg)
{
int cpu;
int maxcpu = -1;
DEFINE_RCU_RANDOM(rand);
VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
for_each_online_cpu(cpu)
maxcpu = cpu;
WARN_ON(maxcpu < 0);
while (!kthread_should_stop()) {
cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
"rcu_torture_onoff task: offlining %d\n",
torture_type, cpu);
n_offline_attempts++;
if (cpu_down(cpu) == 0) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
"rcu_torture_onoff task: "
"offlined %d\n",
torture_type, cpu);
n_offline_successes++;
}
} else if (cpu_is_hotpluggable(cpu)) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
"rcu_torture_onoff task: onlining %d\n",
torture_type, cpu);
n_online_attempts++;
if (cpu_up(cpu) == 0) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
"rcu_torture_onoff task: "
"onlined %d\n",
torture_type, cpu);
n_online_successes++;
}
}
schedule_timeout_interruptible(onoff_interval * HZ);
}
VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping");
return 0;
}
static int
rcu_torture_onoff_init(void)
{
if (onoff_interval <= 0)
return 0;
onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
if (IS_ERR(onoff_task)) {
onoff_task = NULL;
return PTR_ERR(onoff_task);
}
return 0;
}
static void rcu_torture_onoff_cleanup(void)
{
if (onoff_task == NULL)
return;
VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
kthread_stop(onoff_task);
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
static void
rcu_torture_onoff_init(void)
{
}
static void rcu_torture_onoff_cleanup(void)
{
}
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
static int rcutorture_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@ -1391,6 +1585,11 @@ rcu_torture_cleanup(void)
for_each_possible_cpu(i)
rcutorture_booster_cleanup(i);
}
if (shutdown_task != NULL) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
kthread_stop(shutdown_task);
}
rcu_torture_onoff_cleanup();
/* Wait for all RCU callbacks to fire. */
@ -1416,7 +1615,7 @@ rcu_torture_init(void)
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
&srcu_ops, &srcu_expedited_ops,
&srcu_ops, &srcu_raw_ops, &srcu_expedited_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@ -1607,6 +1806,18 @@ rcu_torture_init(void)
}
}
}
if (shutdown_secs > 0) {
shutdown_time = jiffies + shutdown_secs * HZ;
shutdown_task = kthread_run(rcu_torture_shutdown, NULL,
"rcu_torture_shutdown");
if (IS_ERR(shutdown_task)) {
firsterr = PTR_ERR(shutdown_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
shutdown_task = NULL;
goto unwind;
}
}
rcu_torture_onoff_init();
register_reboot_notifier(&rcutorture_shutdown_nb);
rcutorture_record_test_transition();
mutex_unlock(&fullstop_mutex);

View File

@ -69,7 +69,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
NUM_RCU_LVL_3, \
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
}, \
.signaled = RCU_GP_IDLE, \
.fqs_state = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = 1,
.dynticks_nesting = DYNTICK_TASK_NESTING,
.dynticks = ATOMIC_INIT(1),
};
#endif /* #ifdef CONFIG_NO_HZ */
static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
static int qhimark = 10000; /* If this many pending, ignore blimit. */
@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
return 1;
}
/* If preemptible RCU, no point in sending reschedule IPI. */
if (rdp->preemptible)
return 0;
/* The CPU is online, so send it a reschedule IPI. */
/*
* The CPU is online, so send it a reschedule IPI. This forces
* it through the scheduler, and (inefficiently) also handles cases
* where idle loops fail to inform RCU about the CPU being idle.
*/
if (rdp->cpu != smp_processor_id())
smp_send_reschedule(rdp->cpu);
else
@ -343,59 +341,181 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
#endif /* #ifdef CONFIG_SMP */
#ifdef CONFIG_NO_HZ
/**
* rcu_enter_nohz - inform RCU that current CPU is entering nohz
/*
* rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
*
* Enter nohz mode, in other words, -leave- the mode in which RCU
* read-side critical sections can occur. (Though RCU read-side
* critical sections can occur in irq handlers in nohz mode, a possibility
* handled by rcu_irq_enter() and rcu_irq_exit()).
* If the new value of the ->dynticks_nesting counter now is zero,
* we really have entered idle, and must do the appropriate accounting.
* The caller must have disabled interrupts.
*/
void rcu_enter_nohz(void)
static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
{
unsigned long flags;
struct rcu_dynticks *rdtp;
trace_rcu_dyntick("Start", oldval, 0);
if (!is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
if (--rdtp->dynticks_nesting) {
local_irq_restore(flags);
return;
trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
trace_rcu_dyntick("Start");
rcu_prepare_for_idle(smp_processor_id());
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
smp_mb__before_atomic_inc(); /* See above. */
atomic_inc(&rdtp->dynticks);
smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
local_irq_restore(flags);
}
/*
* rcu_exit_nohz - inform RCU that current CPU is leaving nohz
/**
* rcu_idle_enter - inform RCU that current CPU is entering idle
*
* Exit nohz mode, in other words, -enter- the mode in which RCU
* read-side critical sections normally occur.
* Enter idle mode, in other words, -leave- the mode in which RCU
* read-side critical sections can occur. (Though RCU read-side
* critical sections can occur in irq handlers in idle, a possibility
* handled by irq_enter() and irq_exit().)
*
* We crowbar the ->dynticks_nesting field to zero to allow for
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
void rcu_exit_nohz(void)
void rcu_idle_enter(void)
{
unsigned long flags;
long long oldval;
struct rcu_dynticks *rdtp;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
if (rdtp->dynticks_nesting++) {
local_irq_restore(flags);
return;
}
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting = 0;
rcu_idle_enter_common(rdtp, oldval);
local_irq_restore(flags);
}
/**
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
*
* Exit from an interrupt handler, which might possibly result in entering
* idle mode, in other words, leaving the mode in which read-side critical
* sections can occur.
*
* This code assumes that the idle loop never does anything that might
* result in unbalanced calls to irq_enter() and irq_exit(). If your
* architecture violates this assumption, RCU will give you what you
* deserve, good and hard. But very infrequently and irreproducibly.
*
* Use things like work queues to work around this limitation.
*
* You have been warned.
*/
void rcu_irq_exit(void)
{
unsigned long flags;
long long oldval;
struct rcu_dynticks *rdtp;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
if (rdtp->dynticks_nesting)
trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
else
rcu_idle_enter_common(rdtp, oldval);
local_irq_restore(flags);
}
/*
* rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
*
* If the new value of the ->dynticks_nesting counter was previously zero,
* we really have exited idle, and must do the appropriate accounting.
* The caller must have disabled interrupts.
*/
static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
{
smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
smp_mb__after_atomic_inc(); /* See above. */
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
trace_rcu_dyntick("End");
rcu_cleanup_after_idle(smp_processor_id());
trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
if (!is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
trace_rcu_dyntick("Error on exit: not idle task",
oldval, rdtp->dynticks_nesting);
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
}
/**
* rcu_idle_exit - inform RCU that current CPU is leaving idle
*
* Exit idle mode, in other words, -enter- the mode in which RCU
* read-side critical sections can occur.
*
* We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to
* allow for the possibility of usermode upcalls messing up our count
* of interrupt nesting level during the busy period that is just
* now starting.
*/
void rcu_idle_exit(void)
{
unsigned long flags;
struct rcu_dynticks *rdtp;
long long oldval;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
oldval = rdtp->dynticks_nesting;
WARN_ON_ONCE(oldval != 0);
rdtp->dynticks_nesting = DYNTICK_TASK_NESTING;
rcu_idle_exit_common(rdtp, oldval);
local_irq_restore(flags);
}
/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
*
* Enter an interrupt handler, which might possibly result in exiting
* idle mode, in other words, entering the mode in which read-side critical
* sections can occur.
*
* Note that the Linux kernel is fully capable of entering an interrupt
* handler that it never exits, for example when doing upcalls to
* user mode! This code assumes that the idle loop never does upcalls to
* user mode. If your architecture does do upcalls from the idle loop (or
* does anything else that results in unbalanced calls to the irq_enter()
* and irq_exit() functions), RCU will give you what you deserve, good
* and hard. But very infrequently and irreproducibly.
*
* Use things like work queues to work around this limitation.
*
* You have been warned.
*/
void rcu_irq_enter(void)
{
unsigned long flags;
struct rcu_dynticks *rdtp;
long long oldval;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
if (oldval)
trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
else
rcu_idle_exit_common(rdtp, oldval);
local_irq_restore(flags);
}
@ -442,27 +562,37 @@ void rcu_nmi_exit(void)
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
}
/**
* rcu_irq_enter - inform RCU of entry to hard irq context
*
* If the CPU was idle with dynamic ticks active, this updates the
* rdtp->dynticks to let the RCU handling know that the CPU is active.
*/
void rcu_irq_enter(void)
{
rcu_exit_nohz();
}
#ifdef CONFIG_PROVE_RCU
/**
* rcu_irq_exit - inform RCU of exit from hard irq context
* rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
*
* If the CPU was idle with dynamic ticks active, update the rdp->dynticks
* to put let the RCU handling be aware that the CPU is going back to idle
* with no ticks.
* If the current CPU is in its idle loop and is neither in an interrupt
* or NMI handler, return true.
*/
void rcu_irq_exit(void)
int rcu_is_cpu_idle(void)
{
rcu_enter_nohz();
int ret;
preempt_disable();
ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
preempt_enable();
return ret;
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
#endif /* #ifdef CONFIG_PROVE_RCU */
/**
* rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
*
* If the current CPU is idle or running at a first-level (not nested)
* interrupt from idle, return true. The caller must have at least
* disabled preemption.
*/
int rcu_is_cpu_rrupt_from_idle(void)
{
return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
}
#ifdef CONFIG_SMP
@ -475,7 +605,7 @@ void rcu_irq_exit(void)
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
return 0;
return (rdp->dynticks_snap & 0x1) == 0;
}
/*
@ -512,26 +642,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#endif /* #ifdef CONFIG_SMP */
#else /* #ifdef CONFIG_NO_HZ */
#ifdef CONFIG_SMP
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
return 0;
}
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{
return rcu_implicit_offline_qs(rdp);
}
#endif /* #ifdef CONFIG_SMP */
#endif /* #else #ifdef CONFIG_NO_HZ */
int rcu_cpu_stall_suppress __read_mostly;
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
rsp->gp_start = jiffies;
@ -866,8 +976,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT);
rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
@ -877,7 +987,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
@ -927,7 +1037,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp = rcu_get_root(rsp);
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
@ -991,7 +1101,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
rsp->signaled = RCU_GP_IDLE;
rsp->fqs_state = RCU_GP_IDLE;
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
}
@ -1221,7 +1331,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
else
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
rcu_report_exp_rnp(rsp, rnp);
rcu_report_exp_rnp(rsp, rnp, true);
rcu_node_kthread_setaffinity(rnp, -1);
}
@ -1263,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
trace_rcu_batch_start(rsp->name, 0, 0);
trace_rcu_batch_end(rsp->name, 0);
trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
need_resched(), is_idle_task(current),
rcu_is_callbacks_kthread());
return;
}
@ -1291,12 +1403,17 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
debug_rcu_head_unqueue(list);
__rcu_reclaim(rsp->name, list);
list = next;
if (++count >= bl)
/* Stop only if limit reached and CPU has something to do. */
if (++count >= bl &&
(need_resched() ||
(!is_idle_task(current) && !rcu_is_callbacks_kthread())))
break;
}
local_irq_save(flags);
trace_rcu_batch_end(rsp->name, count);
trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
is_idle_task(current),
rcu_is_callbacks_kthread());
/* Update count, and requeue any remaining callbacks. */
rdp->qlen -= count;
@ -1334,16 +1451,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
* (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
* Also schedule RCU core processing.
*
* This function must be called with hardirqs disabled. It is normally
* This function must be called from hardirq context. It is normally
* invoked from the scheduling-clock interrupt. If rcu_pending returns
* false, there is no point in invoking rcu_check_callbacks().
*/
void rcu_check_callbacks(int cpu, int user)
{
trace_rcu_utilization("Start scheduler-tick");
if (user ||
(idle_cpu(cpu) && rcu_scheduler_active &&
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
if (user || rcu_is_cpu_rrupt_from_idle()) {
/*
* Get here if this CPU took its interrupt from user
@ -1457,7 +1572,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
goto unlock_fqs_ret; /* no GP in progress, time updated. */
}
rsp->fqs_active = 1;
switch (rsp->signaled) {
switch (rsp->fqs_state) {
case RCU_GP_IDLE:
case RCU_GP_INIT:
@ -1473,7 +1588,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
force_qs_rnp(rsp, dyntick_save_progress_counter);
raw_spin_lock(&rnp->lock); /* irqs already disabled */
if (rcu_gp_in_progress(rsp))
rsp->signaled = RCU_FORCE_QS;
rsp->fqs_state = RCU_FORCE_QS;
break;
case RCU_FORCE_QS:
@ -1812,7 +1927,7 @@ static int rcu_pending(int cpu)
* by the current CPU, even if none need be done immediately, returning
* 1 if so.
*/
static int rcu_needs_cpu_quick_check(int cpu)
static int rcu_cpu_has_callbacks(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_sched_data, cpu).nxtlist ||
@ -1913,9 +2028,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen = 0;
#ifdef CONFIG_NO_HZ
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
#endif /* #ifdef CONFIG_NO_HZ */
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
rdp->cpu = cpu;
rdp->rsp = rsp;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@ -1942,6 +2057,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
atomic_set(&rdp->dynticks->dynticks,
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
@ -2023,6 +2142,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
rcu_send_cbs_to_online(&rcu_bh_state);
rcu_send_cbs_to_online(&rcu_sched_state);
rcu_preempt_send_cbs_to_online();
rcu_cleanup_after_idle(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:

View File

@ -84,9 +84,10 @@
* Dynticks per-CPU state.
*/
struct rcu_dynticks {
int dynticks_nesting; /* Track irq/process nesting level. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
long long dynticks_nesting; /* Track irq/process nesting level. */
/* Process level is worth LLONG_MAX/2. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
};
/* RCU's kthread states for tracing. */
@ -274,16 +275,12 @@ struct rcu_data {
/* did other CPU force QS recently? */
long blimit; /* Upper limit on a processed batch */
#ifdef CONFIG_NO_HZ
/* 3) dynticks interface. */
struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
int dynticks_snap; /* Per-GP tracking for dynticks. */
#endif /* #ifdef CONFIG_NO_HZ */
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
#ifdef CONFIG_NO_HZ
unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
#endif /* #ifdef CONFIG_NO_HZ */
unsigned long offline_fqs; /* Kicked due to being offline. */
unsigned long resched_ipi; /* Sent a resched IPI. */
@ -302,16 +299,12 @@ struct rcu_data {
struct rcu_state *rsp;
};
/* Values for signaled field in struct rcu_state. */
/* Values for fqs_state field in struct rcu_state. */
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
#else /* #ifdef CONFIG_NO_HZ */
#define RCU_SIGNAL_INIT RCU_FORCE_QS
#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@ -361,7 +354,7 @@ struct rcu_state {
/* The following fields are guarded by the root rcu_node's lock. */
u8 signaled ____cacheline_internodealigned_in_smp;
u8 fqs_state ____cacheline_internodealigned_in_smp;
/* Force QS state. */
u8 fqs_active; /* force_quiescent_state() */
/* is running. */
@ -451,7 +444,8 @@ static void rcu_preempt_check_callbacks(int cpu);
static void rcu_preempt_process_callbacks(void);
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake);
#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
static int rcu_preempt_pending(int cpu);
static int rcu_preempt_needs_cpu(int cpu);
@ -461,6 +455,7 @@ static void __init __rcu_init_preempt(void);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
@ -473,5 +468,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
#endif /* #ifdef CONFIG_RCU_BOOST */
static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
static void __cpuinit rcu_prepare_kthreads(int cpu);
static void rcu_prepare_for_idle_init(int cpu);
static void rcu_cleanup_after_idle(int cpu);
static void rcu_prepare_for_idle(int cpu);
#endif /* #ifndef RCU_TREE_NONCORE */

View File

@ -312,6 +312,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
{
int empty;
int empty_exp;
int empty_exp_now;
unsigned long flags;
struct list_head *np;
#ifdef CONFIG_RCU_BOOST
@ -382,8 +383,10 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
/*
* If this was the last task on the current list, and if
* we aren't waiting on any CPUs, report the quiescent state.
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
* so we must take a snapshot of the expedited state.
*/
empty_exp_now = !rcu_preempted_readers_exp(rnp);
if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
trace_rcu_quiescent_state_report("preempt_rcu",
rnp->gpnum,
@ -406,8 +409,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
* If this was the last task on the expedited lists,
* then we need to report up the rcu_node hierarchy.
*/
if (!empty_exp && !rcu_preempted_readers_exp(rnp))
rcu_report_exp_rnp(&rcu_preempt_state, rnp);
if (!empty_exp && empty_exp_now)
rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
} else {
local_irq_restore(flags);
}
@ -729,9 +732,13 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
* Most callers will set the "wake" flag, but the task initiating the
* expedited grace period need not wake itself.
*
* Caller must hold sync_rcu_preempt_exp_mutex.
*/
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake)
{
unsigned long flags;
unsigned long mask;
@ -744,7 +751,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
}
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
wake_up(&sync_rcu_preempt_exp_wq);
if (wake)
wake_up(&sync_rcu_preempt_exp_wq);
break;
}
mask = rnp->grpmask;
@ -777,7 +785,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
must_wait = 1;
}
if (!must_wait)
rcu_report_exp_rnp(rsp, rnp);
rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
}
/*
@ -1069,9 +1077,9 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
* report on tasks preempted in RCU read-side critical sections during
* expedited RCU grace periods.
*/
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake)
{
return;
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
@ -1157,8 +1165,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
static struct lock_class_key rcu_boost_class;
/*
* Carry out RCU priority boosting on the task indicated by ->exp_tasks
* or ->boost_tasks, advancing the pointer to the next task in the
@ -1221,15 +1227,13 @@ static int rcu_boost(struct rcu_node *rnp)
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&mtx, t);
/* Avoid lockdep false positives. This rt_mutex is its own thing. */
lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class,
"rcu_boost_mutex");
t->rcu_boost_mutex = &mtx;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
ACCESS_ONCE(rnp->boost_tasks) != NULL;
}
/*
@ -1328,6 +1332,15 @@ static void invoke_rcu_callbacks_kthread(void)
local_irq_restore(flags);
}
/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
static bool rcu_is_callbacks_kthread(void)
{
return __get_cpu_var(rcu_cpu_kthread_task) == current;
}
/*
* Set the affinity of the boost kthread. The CPU-hotplug locks are
* held, so no one should be messing with the existence of the boost
@ -1772,6 +1785,11 @@ static void invoke_rcu_callbacks_kthread(void)
WARN_ON_ONCE(1);
}
static bool rcu_is_callbacks_kthread(void)
{
return false;
}
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
{
}
@ -1907,7 +1925,7 @@ void synchronize_sched_expedited(void)
* grace period works for us.
*/
get_online_cpus();
snap = atomic_read(&sync_sched_expedited_started) - 1;
snap = atomic_read(&sync_sched_expedited_started);
smp_mb(); /* ensure read is before try_stop_cpus(). */
}
@ -1939,88 +1957,243 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
*
* Because we have preemptible RCU, just check whether this CPU needs
* any flavor of RCU. Do not chew up lots of CPU cycles with preemption
* disabled in a most-likely vain attempt to cause RCU not to need this CPU.
* Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
* any flavor of RCU.
*/
int rcu_needs_cpu(int cpu)
{
return rcu_needs_cpu_quick_check(cpu);
return rcu_cpu_has_callbacks(cpu);
}
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
*/
static void rcu_prepare_for_idle_init(int cpu)
{
}
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
* after it.
*/
static void rcu_cleanup_after_idle(int cpu)
{
}
/*
* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y,
* is nothing.
*/
static void rcu_prepare_for_idle(int cpu)
{
}
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
#define RCU_NEEDS_CPU_FLUSHES 5
/*
* This code is invoked when a CPU goes idle, at which point we want
* to have the CPU do everything required for RCU so that it can enter
* the energy-efficient dyntick-idle mode. This is handled by a
* state machine implemented by rcu_prepare_for_idle() below.
*
* The following three proprocessor symbols control this state machine:
*
* RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
* to satisfy RCU. Beyond this point, it is better to incur a periodic
* scheduling-clock interrupt than to loop through the state machine
* at full power.
* RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
* optional if RCU does not need anything immediately from this
* CPU, even if this CPU still has RCU callbacks queued. The first
* times through the state machine are mandatory: we need to give
* the state machine a chance to communicate a quiescent state
* to the RCU core.
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
* is sized to be roughly one RCU grace period. Those energy-efficiency
* benchmarkers who might otherwise be tempted to set this to a large
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
* system. And if you are -that- concerned about energy efficiency,
* just power the system down and be done with it!
*
* The values below work well in practice. If future workloads require
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
static DEFINE_PER_CPU(int, rcu_dyntick_drain);
static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
static ktime_t rcu_idle_gp_wait;
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
* Allow the CPU to enter dyntick-idle mode if either: (1) There are no
* callbacks on this CPU, (2) this CPU has not yet attempted to enter
* dyntick-idle mode, or (3) this CPU is in the process of attempting to
* enter dyntick-idle mode. Otherwise, if we have recently tried and failed
* to enter dyntick-idle mode, we refuse to try to enter it. After all,
* it is better to incur scheduling-clock interrupts than to spin
* continuously for the same time duration!
*/
int rcu_needs_cpu(int cpu)
{
/* If no callbacks, RCU doesn't need the CPU. */
if (!rcu_cpu_has_callbacks(cpu))
return 0;
/* Otherwise, RCU needs the CPU only if it recently tried and failed. */
return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
}
/*
* Timer handler used to force CPU to start pushing its remaining RCU
* callbacks in the case where it entered dyntick-idle mode with callbacks
* pending. The hander doesn't really need to do anything because the
* real work is done upon re-entry to idle, or by the next scheduling-clock
* interrupt should idle not be re-entered.
*/
static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp)
{
trace_rcu_prep_idle("Timer");
return HRTIMER_NORESTART;
}
/*
* Initialize the timer used to pull CPUs out of dyntick-idle mode.
*/
static void rcu_prepare_for_idle_init(int cpu)
{
static int firsttime = 1;
struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtp->function = rcu_idle_gp_timer_func;
if (firsttime) {
unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
firsttime = 0;
}
}
/*
* Clean up for exit from idle. Because we are exiting from idle, there
* is no longer any point to rcu_idle_gp_timer, so cancel it. This will
* do nothing if this timer is not active, so just cancel it unconditionally.
*/
static void rcu_cleanup_after_idle(int cpu)
{
hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu));
}
/*
* Check to see if any RCU-related work can be done by the current CPU,
* and if so, schedule a softirq to get it done. This function is part
* of the RCU implementation; it is -not- an exported member of the RCU API.
*
* Because we are not supporting preemptible RCU, attempt to accelerate
* any current grace periods so that RCU no longer needs this CPU, but
* only if all other CPUs are already in dynticks-idle mode. This will
* allow the CPU cores to be powered down immediately, as opposed to after
* waiting many milliseconds for grace periods to elapse.
* The idea is for the current CPU to clear out all work required by the
* RCU core for the current grace period, so that this CPU can be permitted
* to enter dyntick-idle mode. In some cases, it will need to be awakened
* at the end of the grace period by whatever CPU ends the grace period.
* This allows CPUs to go dyntick-idle more quickly, and to reduce the
* number of wakeups by a modest integer factor.
*
* Because it is not legal to invoke rcu_process_callbacks() with irqs
* disabled, we do one pass of force_quiescent_state(), then do a
* invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
* later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
*
* The caller must have disabled interrupts.
*/
int rcu_needs_cpu(int cpu)
static void rcu_prepare_for_idle(int cpu)
{
int c = 0;
int snap;
int thatcpu;
unsigned long flags;
/* Check for being in the holdoff period. */
if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
return rcu_needs_cpu_quick_check(cpu);
local_irq_save(flags);
/* Don't bother unless we are the last non-dyntick-idle CPU. */
for_each_online_cpu(thatcpu) {
if (thatcpu == cpu)
continue;
snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
thatcpu).dynticks);
smp_mb(); /* Order sampling of snap with end of grace period. */
if ((snap & 0x1) != 0) {
per_cpu(rcu_dyntick_drain, cpu) = 0;
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
return rcu_needs_cpu_quick_check(cpu);
}
/*
* If there are no callbacks on this CPU, enter dyntick-idle mode.
* Also reset state to avoid prejudicing later attempts.
*/
if (!rcu_cpu_has_callbacks(cpu)) {
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
per_cpu(rcu_dyntick_drain, cpu) = 0;
local_irq_restore(flags);
trace_rcu_prep_idle("No callbacks");
return;
}
/*
* If in holdoff mode, just return. We will presumably have
* refrained from disabling the scheduling-clock tick.
*/
if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
local_irq_restore(flags);
trace_rcu_prep_idle("In holdoff");
return;
}
/* Check and update the rcu_dyntick_drain sequencing. */
if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* First time through, initialize the counter. */
per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
} else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
!rcu_pending(cpu)) {
/* Can we go dyntick-idle despite still having callbacks? */
trace_rcu_prep_idle("Dyntick with callbacks");
per_cpu(rcu_dyntick_drain, cpu) = 0;
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
rcu_idle_gp_wait, HRTIMER_MODE_REL);
return; /* Nothing more to do immediately. */
} else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* We have hit the limit, so time to give up. */
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
return rcu_needs_cpu_quick_check(cpu);
local_irq_restore(flags);
trace_rcu_prep_idle("Begin holdoff");
invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
return;
}
/* Do one step pushing remaining RCU callbacks through. */
/*
* Do one step of pushing the remaining RCU callbacks through
* the RCU core state machine.
*/
#ifdef CONFIG_TREE_PREEMPT_RCU
if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
local_irq_restore(flags);
rcu_preempt_qs(cpu);
force_quiescent_state(&rcu_preempt_state, 0);
local_irq_save(flags);
}
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
if (per_cpu(rcu_sched_data, cpu).nxtlist) {
local_irq_restore(flags);
rcu_sched_qs(cpu);
force_quiescent_state(&rcu_sched_state, 0);
c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
local_irq_save(flags);
}
if (per_cpu(rcu_bh_data, cpu).nxtlist) {
local_irq_restore(flags);
rcu_bh_qs(cpu);
force_quiescent_state(&rcu_bh_state, 0);
c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
local_irq_save(flags);
}
/* If RCU callbacks are still pending, RCU still needs this CPU. */
if (c)
/*
* If RCU callbacks are still pending, RCU still needs this CPU.
* So try forcing the callbacks through the grace period.
*/
if (rcu_cpu_has_callbacks(cpu)) {
local_irq_restore(flags);
trace_rcu_prep_idle("More callbacks");
invoke_rcu_core();
return c;
} else {
local_irq_restore(flags);
trace_rcu_prep_idle("Callbacks drained");
}
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */

View File

@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->completed, rdp->gpnum,
rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, " dt=%d/%d/%d df=%lu",
seq_printf(m, " dt=%d/%llx/%d df=%lu",
atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
#endif /* #ifdef CONFIG_NO_HZ */
seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
seq_printf(m, " ql=%ld qs=%c%c%c%c",
rdp->qlen,
@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
rdp->completed, rdp->gpnum,
rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, ",%d,%d,%d,%lu",
seq_printf(m, ",%d,%llx,%d,%lu",
atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
#endif /* #ifdef CONFIG_NO_HZ */
seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
static int show_rcudata_csv(struct seq_file *m, void *unused)
{
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
#ifdef CONFIG_NO_HZ
seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
#endif /* #ifdef CONFIG_NO_HZ */
seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
#ifdef CONFIG_RCU_BOOST
seq_puts(m, "\"kt\",\"ktl\"");
@ -278,7 +272,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
gpnum = rsp->gpnum;
seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
rsp->completed, gpnum, rsp->signaled,
rsp->completed, gpnum, rsp->fqs_state,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff),
rsp->n_force_qs, rsp->n_force_qs_ngp,

View File

@ -579,7 +579,6 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct rt_mutex_waiter *waiter)
{
int ret = 0;
int was_disabled;
for (;;) {
/* Try to acquire the lock: */
@ -602,17 +601,10 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
raw_spin_unlock(&lock->wait_lock);
was_disabled = irqs_disabled();
if (was_disabled)
local_irq_enable();
debug_rt_mutex_print_deadlock(waiter);
schedule_rt_mutex(lock);
if (was_disabled)
local_irq_disable();
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
}

View File

@ -347,12 +347,12 @@ void irq_exit(void)
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
rcu_irq_exit();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
tick_nohz_stop_sched_tick(0);
tick_nohz_irq_exit();
#endif
rcu_irq_exit();
preempt_enable_no_resched();
}

View File

@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
/**
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
* Called either from the idle loop or from irq_exit() when an idle period was
* just interrupted by an interrupt which did not cause a reschedule.
*/
void tick_nohz_stop_sched_tick(int inidle)
static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
struct tick_sched *ts;
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
u64 time_delta;
int cpu;
local_irq_save(flags);
cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu);
/*
* Call to tick_nohz_start_idle stops the last_update_time from being
* updated. Thus, it must not be called in the event we are called from
* irq_exit() with the prior state different than idle.
*/
if (!inidle && !ts->inidle)
goto end;
/*
* Set ts->inidle unconditionally. Even if the system did not
* switch to NOHZ mode the cpu frequency governers rely on the
* update of the idle time accounting in tick_nohz_start_idle().
*/
ts->inidle = 1;
now = tick_nohz_start_idle(cpu, ts);
/*
@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle)
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
goto end;
return;
if (need_resched())
goto end;
return;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle)
(unsigned int) local_softirq_pending());
ratelimit++;
}
goto end;
return;
}
ts->idle_calls++;
@ -434,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle)
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
ts->idle_jiffies = last_jiffies;
rcu_enter_nohz();
}
ts->idle_sleeps++;
@ -472,8 +446,56 @@ out:
ts->next_jiffies = next_jiffies;
ts->last_jiffies = last_jiffies;
ts->sleep_length = ktime_sub(dev->next_event, now);
end:
local_irq_restore(flags);
}
/**
* tick_nohz_idle_enter - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
* Called when we start the idle loop.
*
* The arch is responsible of calling:
*
* - rcu_idle_enter() after its last use of RCU before the CPU is put
* to sleep.
* - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
*/
void tick_nohz_idle_enter(void)
{
struct tick_sched *ts;
WARN_ON_ONCE(irqs_disabled());
local_irq_disable();
ts = &__get_cpu_var(tick_cpu_sched);
/*
* set ts->inidle unconditionally. even if the system did not
* switch to nohz mode the cpu frequency governers rely on the
* update of the idle time accounting in tick_nohz_start_idle().
*/
ts->inidle = 1;
tick_nohz_stop_sched_tick(ts);
local_irq_enable();
}
/**
* tick_nohz_irq_exit - update next tick event from interrupt exit
*
* When an interrupt fires while we are idle and it doesn't cause
* a reschedule, it may still add, modify or delete a timer, enqueue
* an RCU callback, etc...
* So we need to re-calculate and reprogram the next tick event.
*/
void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
if (!ts->inidle)
return;
tick_nohz_stop_sched_tick(ts);
}
/**
@ -515,11 +537,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
}
/**
* tick_nohz_restart_sched_tick - restart the idle tick from the idle task
* tick_nohz_idle_exit - restart the idle tick from the idle task
*
* Restart the idle tick when the CPU is woken up from idle
* This also exit the RCU extended quiescent state. The CPU
* can use RCU again after this function is called.
*/
void tick_nohz_restart_sched_tick(void)
void tick_nohz_idle_exit(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
@ -529,6 +553,7 @@ void tick_nohz_restart_sched_tick(void)
ktime_t now;
local_irq_disable();
if (ts->idle_active || (ts->inidle && ts->tick_stopped))
now = ktime_get();
@ -543,8 +568,6 @@ void tick_nohz_restart_sched_tick(void)
ts->inidle = 0;
rcu_exit_nohz();
/* Update jiffies first */
select_nohz_load_balancer(0);
tick_do_update_jiffies64(now);

View File

@ -4775,6 +4775,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
{
__ftrace_dump(true, oops_dump_mode);
}
EXPORT_SYMBOL_GPL(ftrace_dump);
__init static int tracer_alloc_buffers(void)
{