rcu: priority boosting for TINY_PREEMPT_RCU

Add priority boosting, but only for TINY_PREEMPT_RCU.  This is enabled
by the default-off RCU_BOOST kernel parameter.  The priority to which to
boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel
parameter (defaulting to real-time priority 1) and the time to wait
before boosting the readers blocking a given grace period is controlled
by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds).

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Paul E. McKenney
2010-09-27 17:25:23 -07:00
committed by Paul E. McKenney
parent b2c0710c46
commit 24278d1483
5 changed files with 280 additions and 53 deletions

View File

@@ -24,6 +24,29 @@
#include <linux/kthread.h>
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_sched_ctrlblk = {
.donetail = &rcu_sched_ctrlblk.rcucblist,
.curtail = &rcu_sched_ctrlblk.rcucblist,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.donetail = &rcu_bh_ctrlblk.rcucblist,
.curtail = &rcu_bh_ctrlblk.rcucblist,
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_TINY_PREEMPT_RCU
#include <linux/delay.h>
@@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk {
struct list_head *gp_tasks;
/* Pointer to the first task blocking the */
/* current grace period, or NULL if there */
/* is not such task. */
/* is no such task. */
struct list_head *exp_tasks;
/* Pointer to first task blocking the */
/* current expedited grace period, or NULL */
/* if there is no such task. If there */
/* is no current expedited grace period, */
/* then there cannot be any such task. */
#ifdef CONFIG_RCU_BOOST
struct list_head *boost_tasks;
/* Pointer to first task that needs to be */
/* priority-boosted, or NULL if no priority */
/* boosting is needed. If there is no */
/* current or expedited grace period, there */
/* can be no such task. */
#endif /* #ifdef CONFIG_RCU_BOOST */
u8 gpnum; /* Current grace period. */
u8 gpcpu; /* Last grace period blocked by the CPU. */
u8 completed; /* Last grace period completed. */
/* If all three are equal, RCU is idle. */
s8 boosted_this_gp; /* Has boosting already happened? */
unsigned long boost_time; /* When to start boosting (jiffies) */
};
static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -123,6 +156,130 @@ static int rcu_preempt_gp_in_progress(void)
return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
}
/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
*/
static struct list_head *rcu_next_node_entry(struct task_struct *t)
{
struct list_head *np;
np = t->rcu_node_entry.next;
if (np == &rcu_preempt_ctrlblk.blkd_tasks)
np = NULL;
return np;
}
#ifdef CONFIG_RCU_BOOST
#include "rtmutex_common.h"
/*
* Carry out RCU priority boosting on the task indicated by ->boost_tasks,
* and advance ->boost_tasks to the next task in the ->blkd_tasks list.
*/
static int rcu_boost(void)
{
unsigned long flags;
struct rt_mutex mtx;
struct list_head *np;
struct task_struct *t;
if (rcu_preempt_ctrlblk.boost_tasks == NULL)
return 0; /* Nothing to boost. */
raw_local_irq_save(flags);
rcu_preempt_ctrlblk.boosted_this_gp++;
t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
rcu_node_entry);
np = rcu_next_node_entry(t);
rt_mutex_init_proxy_locked(&mtx, t);
t->rcu_boost_mutex = &mtx;
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
raw_local_irq_restore(flags);
rt_mutex_lock(&mtx);
rt_mutex_unlock(&mtx);
return rcu_preempt_ctrlblk.boost_tasks != NULL;
}
/*
* Check to see if it is now time to start boosting RCU readers blocking
* the current grace period, and, if so, tell the rcu_kthread_task to
* start boosting them. If there is an expedited boost in progress,
* we wait for it to complete.
*/
static void rcu_initiate_boost(void)
{
if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
rcu_preempt_ctrlblk.boost_tasks == NULL &&
rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
invoke_rcu_kthread();
}
}
/*
* Initiate boosting for an expedited grace period.
*/
static void rcu_initiate_expedited_boost(void)
{
unsigned long flags;
raw_local_irq_save(flags);
if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
rcu_preempt_ctrlblk.boost_tasks =
rcu_preempt_ctrlblk.blkd_tasks.next;
rcu_preempt_ctrlblk.boosted_this_gp = -1;
invoke_rcu_kthread();
}
raw_local_irq_restore(flags);
}
#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
/*
* Do priority-boost accounting for the start of a new grace period.
*/
static void rcu_preempt_boost_start_gp(void)
{
rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
rcu_preempt_ctrlblk.boosted_this_gp = 0;
}
#else /* #ifdef CONFIG_RCU_BOOST */
/*
* If there is no RCU priority boosting, we don't boost.
*/
static int rcu_boost(void)
{
return 0;
}
/*
* If there is no RCU priority boosting, we don't initiate boosting.
*/
static void rcu_initiate_boost(void)
{
}
/*
* If there is no RCU priority boosting, we don't initiate expedited boosting.
*/
static void rcu_initiate_expedited_boost(void)
{
}
/*
* If there is no RCU priority boosting, nothing to do at grace-period start.
*/
static void rcu_preempt_boost_start_gp(void)
{
}
#endif /* else #ifdef CONFIG_RCU_BOOST */
/*
* Record a preemptible-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
@@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void)
rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
/*
* If there is no GP, or if blocked readers are still blocking GP,
* then there is nothing more to do.
*/
/* If there is no GP then there is nothing more to do. */
if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
return;
/* If there are blocked readers, go check up on boosting. */
if (rcu_preempt_blocked_readers_cgp()) {
rcu_initiate_boost();
return;
}
/* Advance callbacks. */
rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
@@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void)
/* If there are done callbacks, cause them to be invoked. */
if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
invoke_rcu_cbs();
invoke_rcu_kthread();
}
/*
@@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void)
rcu_preempt_ctrlblk.gp_tasks =
rcu_preempt_ctrlblk.blkd_tasks.next;
/* Set up for RCU priority boosting. */
rcu_preempt_boost_start_gp();
/* If there is no running reader, CPU is done with GP. */
if (!rcu_preempt_running_reader())
rcu_preempt_cpu_qs();
@@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/
empty = !rcu_preempt_blocked_readers_cgp();
empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
np = t->rcu_node_entry.next;
if (np == &rcu_preempt_ctrlblk.blkd_tasks)
np = NULL;
np = rcu_next_node_entry(t);
list_del(&t->rcu_node_entry);
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
rcu_preempt_ctrlblk.gp_tasks = np;
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
rcu_preempt_ctrlblk.exp_tasks = np;
#ifdef CONFIG_RCU_BOOST
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
rcu_preempt_ctrlblk.boost_tasks = np;
#endif /* #ifdef CONFIG_RCU_BOOST */
INIT_LIST_HEAD(&t->rcu_node_entry);
/*
@@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
rcu_report_exp_done();
}
#ifdef CONFIG_RCU_BOOST
/* Unboost self if was boosted. */
if (special & RCU_READ_UNLOCK_BOOSTED) {
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
rt_mutex_unlock(t->rcu_boost_mutex);
t->rcu_boost_mutex = NULL;
}
#endif /* #ifdef CONFIG_RCU_BOOST */
local_irq_restore(flags);
}
@@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void)
rcu_preempt_cpu_qs();
if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
rcu_preempt_ctrlblk.rcb.donetail)
invoke_rcu_cbs();
invoke_rcu_kthread();
if (rcu_preempt_gp_in_progress() &&
rcu_cpu_blocking_cur_gp() &&
rcu_preempt_running_reader())
@@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void)
/* Wait for tail of ->blkd_tasks list to drain. */
if (rcu_preempted_readers_exp())
rcu_initiate_expedited_boost();
wait_event(sync_rcu_preempt_exp_wq,
!rcu_preempted_readers_exp());
@@ -574,6 +747,15 @@ void exit_rcu(void)
#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
/*
* Because preemptible RCU does not exist, it is never necessary to
* boost preempted RCU readers.
*/
static int rcu_boost(void)
{
return 0;
}
/*
* Because preemptible RCU does not exist, it never has any callbacks
* to check.
@@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void)
}
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_RCU_BOOST
#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
#else /* #ifdef CONFIG_RCU_BOOST */
#define RCU_BOOST_PRIO 1
#endif /* #else #ifdef CONFIG_RCU_BOOST */