Merge commit 'v2.6.29' into timers/core
This commit is contained in:
@@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o
|
||||
obj-$(CONFIG_MODULES) += module.o
|
||||
obj-$(CONFIG_KALLSYMS) += kallsyms.o
|
||||
obj-$(CONFIG_PM) += power/
|
||||
obj-$(CONFIG_FREEZER) += power/
|
||||
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
|
||||
obj-$(CONFIG_KEXEC) += kexec.o
|
||||
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
|
||||
|
@@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel.
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/delay.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
static async_cookie_t next_cookie = 1;
|
||||
@@ -132,8 +133,7 @@ static void run_one_entry(void)
|
||||
entry = list_first_entry(&async_pending, struct async_entry, list);
|
||||
|
||||
/* 2) move it to the running queue */
|
||||
list_del(&entry->list);
|
||||
list_add_tail(&entry->list, &async_running);
|
||||
list_move_tail(&entry->list, entry->running);
|
||||
spin_unlock_irqrestore(&async_lock, flags);
|
||||
|
||||
/* 3) run it (and print duration)*/
|
||||
@@ -208,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l
|
||||
return newcookie;
|
||||
}
|
||||
|
||||
/**
|
||||
* async_schedule - schedule a function for asynchronous execution
|
||||
* @ptr: function to execute asynchronously
|
||||
* @data: data pointer to pass to the function
|
||||
*
|
||||
* Returns an async_cookie_t that may be used for checkpointing later.
|
||||
* Note: This function may be called from atomic or non-atomic contexts.
|
||||
*/
|
||||
async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
|
||||
{
|
||||
return __async_schedule(ptr, data, &async_pending);
|
||||
return __async_schedule(ptr, data, &async_running);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_schedule);
|
||||
|
||||
async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running)
|
||||
/**
|
||||
* async_schedule_domain - schedule a function for asynchronous execution within a certain domain
|
||||
* @ptr: function to execute asynchronously
|
||||
* @data: data pointer to pass to the function
|
||||
* @running: running list for the domain
|
||||
*
|
||||
* Returns an async_cookie_t that may be used for checkpointing later.
|
||||
* @running may be used in the async_synchronize_*_domain() functions
|
||||
* to wait within a certain synchronization domain rather than globally.
|
||||
* A synchronization domain is specified via the running queue @running to use.
|
||||
* Note: This function may be called from atomic or non-atomic contexts.
|
||||
*/
|
||||
async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
|
||||
struct list_head *running)
|
||||
{
|
||||
return __async_schedule(ptr, data, running);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_schedule_special);
|
||||
EXPORT_SYMBOL_GPL(async_schedule_domain);
|
||||
|
||||
/**
|
||||
* async_synchronize_full - synchronize all asynchronous function calls
|
||||
*
|
||||
* This function waits until all asynchronous function calls have been done.
|
||||
*/
|
||||
void async_synchronize_full(void)
|
||||
{
|
||||
do {
|
||||
@@ -228,13 +254,30 @@ void async_synchronize_full(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_full);
|
||||
|
||||
void async_synchronize_full_special(struct list_head *list)
|
||||
/**
|
||||
* async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
|
||||
* @list: running list to synchronize on
|
||||
*
|
||||
* This function waits until all asynchronous function calls for the
|
||||
* synchronization domain specified by the running list @list have been done.
|
||||
*/
|
||||
void async_synchronize_full_domain(struct list_head *list)
|
||||
{
|
||||
async_synchronize_cookie_special(next_cookie, list);
|
||||
async_synchronize_cookie_domain(next_cookie, list);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_full_special);
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
|
||||
|
||||
void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
|
||||
/**
|
||||
* async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
|
||||
* @cookie: async_cookie_t to use as checkpoint
|
||||
* @running: running list to synchronize on
|
||||
*
|
||||
* This function waits until all asynchronous function calls for the
|
||||
* synchronization domain specified by the running list @list submitted
|
||||
* prior to @cookie have been done.
|
||||
*/
|
||||
void async_synchronize_cookie_domain(async_cookie_t cookie,
|
||||
struct list_head *running)
|
||||
{
|
||||
ktime_t starttime, delta, endtime;
|
||||
|
||||
@@ -254,11 +297,18 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
|
||||
(long long)ktime_to_ns(delta) >> 10);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
|
||||
|
||||
/**
|
||||
* async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
|
||||
* @cookie: async_cookie_t to use as checkpoint
|
||||
*
|
||||
* This function waits until all asynchronous function calls prior to @cookie
|
||||
* have been done.
|
||||
*/
|
||||
void async_synchronize_cookie(async_cookie_t cookie)
|
||||
{
|
||||
async_synchronize_cookie_special(cookie, &async_running);
|
||||
async_synchronize_cookie_domain(cookie, &async_running);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_cookie);
|
||||
|
||||
@@ -319,7 +369,11 @@ static int async_manager_thread(void *unused)
|
||||
ec = atomic_read(&entry_count);
|
||||
|
||||
while (tc < ec && tc < MAX_THREADS) {
|
||||
kthread_run(async_thread, NULL, "async/%i", tc);
|
||||
if (IS_ERR(kthread_run(async_thread, NULL, "async/%i",
|
||||
tc))) {
|
||||
msleep(100);
|
||||
continue;
|
||||
}
|
||||
atomic_inc(&thread_count);
|
||||
tc++;
|
||||
}
|
||||
@@ -334,7 +388,9 @@ static int async_manager_thread(void *unused)
|
||||
static int __init async_init(void)
|
||||
{
|
||||
if (async_enabled)
|
||||
kthread_run(async_manager_thread, NULL, "async/mgr");
|
||||
if (IS_ERR(kthread_run(async_manager_thread, NULL,
|
||||
"async/mgr")))
|
||||
async_enabled = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
|
||||
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
|
||||
kfree(root);
|
||||
kill_litter_super(sb);
|
||||
kfree(root);
|
||||
}
|
||||
|
||||
static struct file_system_type cgroup_fs_type = {
|
||||
@@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
|
||||
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||
struct cgroup_subsys *ss = subsys[i];
|
||||
if (ss->root == root)
|
||||
mutex_lock_nested(&ss->hierarchy_mutex, i);
|
||||
mutex_lock(&ss->hierarchy_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
||||
BUG_ON(!list_empty(&init_task.tasks));
|
||||
|
||||
mutex_init(&ss->hierarchy_mutex);
|
||||
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
|
||||
ss->active = 1;
|
||||
}
|
||||
|
||||
|
@@ -1095,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
p->blocked_on = NULL; /* not blocked yet */
|
||||
#endif
|
||||
if (unlikely(ptrace_reparented(current)))
|
||||
if (unlikely(current->ptrace))
|
||||
ptrace_fork(p, clone_flags);
|
||||
|
||||
/* Perform scheduler related setup. Assign this task to a CPU. */
|
||||
@@ -1179,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
#endif
|
||||
clear_all_latency_tracing(p);
|
||||
|
||||
/* Our parent execution domain becomes current domain
|
||||
These must match for thread signalling to apply */
|
||||
p->parent_exec_id = p->self_exec_id;
|
||||
|
||||
/* ok, now we should be set up.. */
|
||||
p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
|
||||
p->pdeath_signal = 0;
|
||||
@@ -1220,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
set_task_cpu(p, smp_processor_id());
|
||||
|
||||
/* CLONE_PARENT re-uses the old parent */
|
||||
if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
|
||||
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
|
||||
p->real_parent = current->real_parent;
|
||||
else
|
||||
p->parent_exec_id = current->parent_exec_id;
|
||||
} else {
|
||||
p->real_parent = current;
|
||||
p->parent_exec_id = current->self_exec_id;
|
||||
}
|
||||
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
|
||||
|
@@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
|
||||
u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
|
||||
{
|
||||
struct task_struct *curr = current;
|
||||
struct restart_block *restart;
|
||||
DECLARE_WAITQUEUE(wait, curr);
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q;
|
||||
@@ -1216,11 +1217,13 @@ retry:
|
||||
|
||||
if (!ret)
|
||||
goto retry;
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
ret = -EWOULDBLOCK;
|
||||
if (uval != val)
|
||||
goto out_unlock_put_key;
|
||||
if (unlikely(uval != val)) {
|
||||
queue_unlock(&q, hb);
|
||||
goto out_put_key;
|
||||
}
|
||||
|
||||
/* Only actually queue if *uaddr contained val. */
|
||||
queue_me(&q, hb);
|
||||
@@ -1284,38 +1287,38 @@ retry:
|
||||
*/
|
||||
|
||||
/* If we were woken (and unqueued), we succeeded, whatever. */
|
||||
ret = 0;
|
||||
if (!unqueue_me(&q))
|
||||
return 0;
|
||||
goto out_put_key;
|
||||
ret = -ETIMEDOUT;
|
||||
if (rem)
|
||||
return -ETIMEDOUT;
|
||||
goto out_put_key;
|
||||
|
||||
/*
|
||||
* We expect signal_pending(current), but another thread may
|
||||
* have handled it for us already.
|
||||
*/
|
||||
ret = -ERESTARTSYS;
|
||||
if (!abs_time)
|
||||
return -ERESTARTSYS;
|
||||
else {
|
||||
struct restart_block *restart;
|
||||
restart = ¤t_thread_info()->restart_block;
|
||||
restart->fn = futex_wait_restart;
|
||||
restart->futex.uaddr = (u32 *)uaddr;
|
||||
restart->futex.val = val;
|
||||
restart->futex.time = abs_time->tv64;
|
||||
restart->futex.bitset = bitset;
|
||||
restart->futex.flags = 0;
|
||||
goto out_put_key;
|
||||
|
||||
if (fshared)
|
||||
restart->futex.flags |= FLAGS_SHARED;
|
||||
if (clockrt)
|
||||
restart->futex.flags |= FLAGS_CLOCKRT;
|
||||
return -ERESTART_RESTARTBLOCK;
|
||||
}
|
||||
restart = ¤t_thread_info()->restart_block;
|
||||
restart->fn = futex_wait_restart;
|
||||
restart->futex.uaddr = (u32 *)uaddr;
|
||||
restart->futex.val = val;
|
||||
restart->futex.time = abs_time->tv64;
|
||||
restart->futex.bitset = bitset;
|
||||
restart->futex.flags = 0;
|
||||
|
||||
out_unlock_put_key:
|
||||
queue_unlock(&q, hb);
|
||||
if (fshared)
|
||||
restart->futex.flags |= FLAGS_SHARED;
|
||||
if (clockrt)
|
||||
restart->futex.flags |= FLAGS_CLOCKRT;
|
||||
|
||||
ret = -ERESTART_RESTARTBLOCK;
|
||||
|
||||
out_put_key:
|
||||
put_futex_key(fshared, &q.key);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1465,6 +1465,11 @@ int kernel_kexec(void)
|
||||
error = device_power_down(PMSG_FREEZE);
|
||||
if (error)
|
||||
goto Enable_irqs;
|
||||
|
||||
/* Suspend system devices */
|
||||
error = sysdev_suspend(PMSG_FREEZE);
|
||||
if (error)
|
||||
goto Power_up_devices;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
@@ -1477,6 +1482,8 @@ int kernel_kexec(void)
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (kexec_image->preserve_context) {
|
||||
sysdev_resume();
|
||||
Power_up_devices:
|
||||
device_power_up(PMSG_RESTORE);
|
||||
Enable_irqs:
|
||||
local_irq_enable();
|
||||
|
@@ -2015,14 +2015,6 @@ static noinline struct module *load_module(void __user *umod,
|
||||
if (err < 0)
|
||||
goto free_mod;
|
||||
|
||||
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
|
||||
mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
|
||||
mod->name);
|
||||
if (!mod->refptr) {
|
||||
err = -ENOMEM;
|
||||
goto free_mod;
|
||||
}
|
||||
#endif
|
||||
if (pcpuindex) {
|
||||
/* We have a special allocation for this section. */
|
||||
percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
|
||||
@@ -2030,7 +2022,7 @@ static noinline struct module *load_module(void __user *umod,
|
||||
mod->name);
|
||||
if (!percpu) {
|
||||
err = -ENOMEM;
|
||||
goto free_percpu;
|
||||
goto free_mod;
|
||||
}
|
||||
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
||||
mod->percpu = percpu;
|
||||
@@ -2082,6 +2074,14 @@ static noinline struct module *load_module(void __user *umod,
|
||||
/* Module has been moved. */
|
||||
mod = (void *)sechdrs[modindex].sh_addr;
|
||||
|
||||
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
|
||||
mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
|
||||
mod->name);
|
||||
if (!mod->refptr) {
|
||||
err = -ENOMEM;
|
||||
goto free_init;
|
||||
}
|
||||
#endif
|
||||
/* Now we've moved module, initialize linked lists, etc. */
|
||||
module_unload_init(mod);
|
||||
|
||||
@@ -2288,15 +2288,17 @@ static noinline struct module *load_module(void __user *umod,
|
||||
ftrace_release(mod->module_core, mod->core_size);
|
||||
free_unload:
|
||||
module_unload_free(mod);
|
||||
module_free(mod, mod->module_init);
|
||||
free_core:
|
||||
module_free(mod, mod->module_core);
|
||||
free_percpu:
|
||||
if (percpu)
|
||||
percpu_modfree(percpu);
|
||||
free_init:
|
||||
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
|
||||
percpu_modfree(mod->refptr);
|
||||
#endif
|
||||
module_free(mod, mod->module_init);
|
||||
free_core:
|
||||
module_free(mod, mod->module_core);
|
||||
/* mod will be freed with core. Don't access it beyond this line! */
|
||||
free_percpu:
|
||||
if (percpu)
|
||||
percpu_modfree(percpu);
|
||||
free_mod:
|
||||
kfree(args);
|
||||
free_hdr:
|
||||
|
@@ -261,6 +261,40 @@ out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
|
||||
{
|
||||
if (cputime_gt(b->utime, a->utime))
|
||||
a->utime = b->utime;
|
||||
|
||||
if (cputime_gt(b->stime, a->stime))
|
||||
a->stime = b->stime;
|
||||
|
||||
if (b->sum_exec_runtime > a->sum_exec_runtime)
|
||||
a->sum_exec_runtime = b->sum_exec_runtime;
|
||||
}
|
||||
|
||||
void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
struct task_cputime sum;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&cputimer->lock, flags);
|
||||
if (!cputimer->running) {
|
||||
cputimer->running = 1;
|
||||
/*
|
||||
* The POSIX timer interface allows for absolute time expiry
|
||||
* values through the TIMER_ABSTIME flag, therefore we have
|
||||
* to synchronize the timer to the clock every time we start
|
||||
* it.
|
||||
*/
|
||||
thread_group_cputime(tsk, &sum);
|
||||
update_gt_cputime(&cputimer->cputime, &sum);
|
||||
}
|
||||
*times = cputimer->cputime;
|
||||
spin_unlock_irqrestore(&cputimer->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sample a process (thread group) clock for the given group_leader task.
|
||||
* Must be called with tasklist_lock held for reading.
|
||||
@@ -488,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
|
||||
{
|
||||
struct task_cputime cputime;
|
||||
|
||||
thread_group_cputime(tsk, &cputime);
|
||||
thread_group_cputimer(tsk, &cputime);
|
||||
cleanup_timers(tsk->signal->cpu_timers,
|
||||
cputime.utime, cputime.stime, cputime.sum_exec_runtime);
|
||||
}
|
||||
@@ -506,29 +540,6 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
|
||||
now);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable the process wide cpu timer accounting.
|
||||
*
|
||||
* serialized using ->sighand->siglock
|
||||
*/
|
||||
static void start_process_timers(struct task_struct *tsk)
|
||||
{
|
||||
tsk->signal->cputimer.running = 1;
|
||||
barrier();
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the process wide timer accounting -- timer stops ticking when
|
||||
* nobody cares about it.
|
||||
*
|
||||
* serialized using ->sighand->siglock
|
||||
*/
|
||||
static void stop_process_timers(struct task_struct *tsk)
|
||||
{
|
||||
tsk->signal->cputimer.running = 0;
|
||||
barrier();
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the timer on the appropriate list before any timers that
|
||||
* expire later. This must be called with the tasklist_lock held
|
||||
@@ -549,9 +560,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
|
||||
BUG_ON(!irqs_disabled());
|
||||
spin_lock(&p->sighand->siglock);
|
||||
|
||||
if (!CPUCLOCK_PERTHREAD(timer->it_clock))
|
||||
start_process_timers(p);
|
||||
|
||||
listpos = head;
|
||||
if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
|
||||
list_for_each_entry(next, head, entry) {
|
||||
@@ -672,6 +680,33 @@ static void cpu_timer_fire(struct k_itimer *timer)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sample a process (thread group) timer for the given group_leader task.
|
||||
* Must be called with tasklist_lock held for reading.
|
||||
*/
|
||||
static int cpu_timer_sample_group(const clockid_t which_clock,
|
||||
struct task_struct *p,
|
||||
union cpu_time_count *cpu)
|
||||
{
|
||||
struct task_cputime cputime;
|
||||
|
||||
thread_group_cputimer(p, &cputime);
|
||||
switch (CPUCLOCK_WHICH(which_clock)) {
|
||||
default:
|
||||
return -EINVAL;
|
||||
case CPUCLOCK_PROF:
|
||||
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
cpu->cpu = cputime.utime;
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Guts of sys_timer_settime for CPU timers.
|
||||
* This is called with the timer locked and interrupts disabled.
|
||||
@@ -733,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
|
||||
cpu_clock_sample(timer->it_clock, p, &val);
|
||||
} else {
|
||||
cpu_clock_sample_group(timer->it_clock, p, &val);
|
||||
cpu_timer_sample_group(timer->it_clock, p, &val);
|
||||
}
|
||||
|
||||
if (old) {
|
||||
@@ -881,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
read_unlock(&tasklist_lock);
|
||||
goto dead;
|
||||
} else {
|
||||
cpu_clock_sample_group(timer->it_clock, p, &now);
|
||||
cpu_timer_sample_group(timer->it_clock, p, &now);
|
||||
clear_dead = (unlikely(p->exit_state) &&
|
||||
thread_group_empty(p));
|
||||
}
|
||||
@@ -1021,6 +1056,19 @@ static void check_thread_timers(struct task_struct *tsk,
|
||||
}
|
||||
}
|
||||
|
||||
static void stop_process_timers(struct task_struct *tsk)
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
unsigned long flags;
|
||||
|
||||
if (!cputimer->running)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&cputimer->lock, flags);
|
||||
cputimer->running = 0;
|
||||
spin_unlock_irqrestore(&cputimer->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for any per-thread CPU timers that have fired and move them
|
||||
* off the tsk->*_timers list onto the firing list. Per-thread timers
|
||||
@@ -1223,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
|
||||
clear_dead_task(timer, now);
|
||||
goto out_unlock;
|
||||
}
|
||||
cpu_clock_sample_group(timer->it_clock, p, &now);
|
||||
cpu_timer_sample_group(timer->it_clock, p, &now);
|
||||
bump_cpu_timer(timer, now);
|
||||
/* Leave the tasklist_lock locked for the call below. */
|
||||
}
|
||||
@@ -1387,33 +1435,6 @@ void run_posix_cpu_timers(struct task_struct *tsk)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sample a process (thread group) timer for the given group_leader task.
|
||||
* Must be called with tasklist_lock held for reading.
|
||||
*/
|
||||
static int cpu_timer_sample_group(const clockid_t which_clock,
|
||||
struct task_struct *p,
|
||||
union cpu_time_count *cpu)
|
||||
{
|
||||
struct task_cputime cputime;
|
||||
|
||||
thread_group_cputimer(p, &cputime);
|
||||
switch (CPUCLOCK_WHICH(which_clock)) {
|
||||
default:
|
||||
return -EINVAL;
|
||||
case CPUCLOCK_PROF:
|
||||
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
cpu->cpu = cputime.utime;
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set one of the process-wide special case CPU timers.
|
||||
* The tsk->sighand->siglock must be held by the caller.
|
||||
@@ -1427,7 +1448,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
||||
struct list_head *head;
|
||||
|
||||
BUG_ON(clock_idx == CPUCLOCK_SCHED);
|
||||
start_process_timers(tsk);
|
||||
cpu_timer_sample_group(clock_idx, tsk, &now);
|
||||
|
||||
if (oldval) {
|
||||
|
@@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y)
|
||||
EXTRA_CFLAGS += -DDEBUG
|
||||
endif
|
||||
|
||||
obj-y := main.o
|
||||
obj-$(CONFIG_PM) += main.o
|
||||
obj-$(CONFIG_PM_SLEEP) += console.o
|
||||
obj-$(CONFIG_FREEZER) += process.o
|
||||
obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
|
||||
|
@@ -78,6 +78,12 @@ void pm_restore_console(void)
|
||||
}
|
||||
set_console(orig_fgconsole);
|
||||
release_console_sem();
|
||||
|
||||
if (vt_waitactive(orig_fgconsole)) {
|
||||
pr_debug("Resume: Can't switch VCs.");
|
||||
return;
|
||||
}
|
||||
|
||||
kmsg_redirect = orig_kmsg;
|
||||
}
|
||||
#endif
|
||||
|
@@ -227,6 +227,12 @@ static int create_image(int platform_mode)
|
||||
"aborting hibernation\n");
|
||||
goto Enable_irqs;
|
||||
}
|
||||
sysdev_suspend(PMSG_FREEZE);
|
||||
if (error) {
|
||||
printk(KERN_ERR "PM: Some devices failed to power down, "
|
||||
"aborting hibernation\n");
|
||||
goto Power_up_devices;
|
||||
}
|
||||
|
||||
if (hibernation_test(TEST_CORE))
|
||||
goto Power_up;
|
||||
@@ -242,9 +248,11 @@ static int create_image(int platform_mode)
|
||||
if (!in_suspend)
|
||||
platform_leave(platform_mode);
|
||||
Power_up:
|
||||
sysdev_resume();
|
||||
/* NOTE: device_power_up() is just a resume() for devices
|
||||
* that suspended with irqs off ... no overall powerup.
|
||||
*/
|
||||
Power_up_devices:
|
||||
device_power_up(in_suspend ?
|
||||
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
|
||||
Enable_irqs:
|
||||
@@ -335,6 +343,7 @@ static int resume_target_kernel(void)
|
||||
"aborting resume\n");
|
||||
goto Enable_irqs;
|
||||
}
|
||||
sysdev_suspend(PMSG_QUIESCE);
|
||||
/* We'll ignore saved state, but this gets preempt count (etc) right */
|
||||
save_processor_state();
|
||||
error = restore_highmem();
|
||||
@@ -357,6 +366,7 @@ static int resume_target_kernel(void)
|
||||
swsusp_free();
|
||||
restore_processor_state();
|
||||
touch_softlockup_watchdog();
|
||||
sysdev_resume();
|
||||
device_power_up(PMSG_RECOVER);
|
||||
Enable_irqs:
|
||||
local_irq_enable();
|
||||
@@ -440,6 +450,7 @@ int hibernation_platform_enter(void)
|
||||
local_irq_disable();
|
||||
error = device_power_down(PMSG_HIBERNATE);
|
||||
if (!error) {
|
||||
sysdev_suspend(PMSG_HIBERNATE);
|
||||
hibernation_ops->enter();
|
||||
/* We should never get here */
|
||||
while (1);
|
||||
@@ -594,6 +605,12 @@ static int software_resume(void)
|
||||
int error;
|
||||
unsigned int flags;
|
||||
|
||||
/*
|
||||
* If the user said "noresume".. bail out early.
|
||||
*/
|
||||
if (noresume)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* name_to_dev_t() below takes a sysfs buffer mutex when sysfs
|
||||
* is configured into the kernel. Since the regular hibernate
|
||||
@@ -610,6 +627,11 @@ static int software_resume(void)
|
||||
mutex_unlock(&pm_mutex);
|
||||
return -ENOENT;
|
||||
}
|
||||
/*
|
||||
* Some device discovery might still be in progress; we need
|
||||
* to wait for this to finish.
|
||||
*/
|
||||
wait_for_device_probe();
|
||||
swsusp_resume_device = name_to_dev_t(resume_file);
|
||||
pr_debug("PM: Resume from partition %s\n", resume_file);
|
||||
} else {
|
||||
|
@@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val)
|
||||
#ifdef CONFIG_PM_DEBUG
|
||||
int pm_test_level = TEST_NONE;
|
||||
|
||||
static int suspend_test(int level)
|
||||
{
|
||||
if (pm_test_level == level) {
|
||||
printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
|
||||
mdelay(5000);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char * const pm_tests[__TEST_AFTER_LAST] = {
|
||||
[TEST_NONE] = "none",
|
||||
[TEST_CORE] = "core",
|
||||
@@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
}
|
||||
|
||||
power_attr(pm_test);
|
||||
#else /* !CONFIG_PM_DEBUG */
|
||||
static inline int suspend_test(int level) { return 0; }
|
||||
#endif /* !CONFIG_PM_DEBUG */
|
||||
#endif /* CONFIG_PM_DEBUG */
|
||||
|
||||
#endif /* CONFIG_PM_SLEEP */
|
||||
|
||||
#ifdef CONFIG_SUSPEND
|
||||
|
||||
static int suspend_test(int level)
|
||||
{
|
||||
#ifdef CONFIG_PM_DEBUG
|
||||
if (pm_test_level == level) {
|
||||
printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
|
||||
mdelay(5000);
|
||||
return 1;
|
||||
}
|
||||
#endif /* !CONFIG_PM_DEBUG */
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_TEST_SUSPEND
|
||||
|
||||
/*
|
||||
@@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state)
|
||||
goto Done;
|
||||
}
|
||||
|
||||
if (!suspend_test(TEST_CORE))
|
||||
error = suspend_ops->enter(state);
|
||||
error = sysdev_suspend(PMSG_SUSPEND);
|
||||
if (!error) {
|
||||
if (!suspend_test(TEST_CORE))
|
||||
error = suspend_ops->enter(state);
|
||||
sysdev_resume();
|
||||
}
|
||||
|
||||
device_power_up(PMSG_RESUME);
|
||||
Done:
|
||||
|
@@ -60,6 +60,7 @@ static struct block_device *resume_bdev;
|
||||
static int submit(int rw, pgoff_t page_off, struct page *page,
|
||||
struct bio **bio_chain)
|
||||
{
|
||||
const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
|
||||
@@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
|
||||
bio_get(bio);
|
||||
|
||||
if (bio_chain == NULL) {
|
||||
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
|
||||
submit_bio(bio_rw, bio);
|
||||
wait_on_page_locked(page);
|
||||
if (rw == READ)
|
||||
bio_set_pages_dirty(bio);
|
||||
@@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
|
||||
get_page(page); /* These pages are freed later */
|
||||
bio->bi_private = *bio_chain;
|
||||
*bio_chain = bio;
|
||||
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
|
||||
submit_bio(bio_rw, bio);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
data->swap = swsusp_resume_device ?
|
||||
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
|
||||
data->mode = O_RDONLY;
|
||||
error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
|
||||
if (error)
|
||||
pm_notifier_call_chain(PM_POST_RESTORE);
|
||||
} else {
|
||||
data->swap = -1;
|
||||
data->mode = O_WRONLY;
|
||||
error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
|
||||
if (error)
|
||||
pm_notifier_call_chain(PM_POST_HIBERNATION);
|
||||
} else {
|
||||
data->swap = -1;
|
||||
data->mode = O_WRONLY;
|
||||
error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
|
||||
if (error)
|
||||
pm_notifier_call_chain(PM_POST_RESTORE);
|
||||
}
|
||||
if (error)
|
||||
atomic_inc(&snapshot_device_available);
|
||||
|
@@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress);
|
||||
* driver system.
|
||||
*/
|
||||
static DECLARE_MUTEX(console_sem);
|
||||
static DECLARE_MUTEX(secondary_console_sem);
|
||||
struct console *console_drivers;
|
||||
EXPORT_SYMBOL_GPL(console_drivers);
|
||||
|
||||
@@ -891,12 +890,14 @@ void suspend_console(void)
|
||||
printk("Suspending console(s) (use no_console_suspend to debug)\n");
|
||||
acquire_console_sem();
|
||||
console_suspended = 1;
|
||||
up(&console_sem);
|
||||
}
|
||||
|
||||
void resume_console(void)
|
||||
{
|
||||
if (!console_suspend_enabled)
|
||||
return;
|
||||
down(&console_sem);
|
||||
console_suspended = 0;
|
||||
release_console_sem();
|
||||
}
|
||||
@@ -912,11 +913,9 @@ void resume_console(void)
|
||||
void acquire_console_sem(void)
|
||||
{
|
||||
BUG_ON(in_interrupt());
|
||||
if (console_suspended) {
|
||||
down(&secondary_console_sem);
|
||||
return;
|
||||
}
|
||||
down(&console_sem);
|
||||
if (console_suspended)
|
||||
return;
|
||||
console_locked = 1;
|
||||
console_may_schedule = 1;
|
||||
}
|
||||
@@ -926,6 +925,10 @@ int try_acquire_console_sem(void)
|
||||
{
|
||||
if (down_trylock(&console_sem))
|
||||
return -1;
|
||||
if (console_suspended) {
|
||||
up(&console_sem);
|
||||
return -1;
|
||||
}
|
||||
console_locked = 1;
|
||||
console_may_schedule = 0;
|
||||
return 0;
|
||||
@@ -979,7 +982,7 @@ void release_console_sem(void)
|
||||
unsigned wake_klogd = 0;
|
||||
|
||||
if (console_suspended) {
|
||||
up(&secondary_console_sem);
|
||||
up(&console_sem);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -114,12 +114,15 @@ int __ref profile_init(void)
|
||||
if (!slab_is_available()) {
|
||||
prof_buffer = alloc_bootmem(buffer_bytes);
|
||||
alloc_bootmem_cpumask_var(&prof_cpu_mask);
|
||||
cpumask_copy(prof_cpu_mask, cpu_possible_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
cpumask_copy(prof_cpu_mask, cpu_possible_mask);
|
||||
|
||||
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
|
||||
if (prof_buffer)
|
||||
return 0;
|
||||
|
@@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu)
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
(idle_cpu(cpu) && rcu_scheduler_active &&
|
||||
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
|
||||
/*
|
||||
* Get here if this CPU took its interrupt from user
|
||||
|
@@ -44,6 +44,7 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
enum rcu_barrier {
|
||||
RCU_BARRIER_STD,
|
||||
@@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
int rcu_scheduler_active __read_mostly;
|
||||
|
||||
/*
|
||||
* Awaken the corresponding synchronize_rcu() instance now that a
|
||||
@@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head)
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu(&rcu.head, wakeme_after_rcu);
|
||||
@@ -175,3 +181,9 @@ void __init rcu_init(void)
|
||||
__rcu_init();
|
||||
}
|
||||
|
||||
void rcu_scheduler_starting(void)
|
||||
{
|
||||
WARN_ON(num_online_cpus() != 1);
|
||||
WARN_ON(nr_context_switches() > 0);
|
||||
rcu_scheduler_active = 1;
|
||||
}
|
||||
|
@@ -1181,6 +1181,9 @@ void __synchronize_sched(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return; /* blocking is gp if only one CPU! */
|
||||
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu_sched(&rcu.head, wakeme_after_rcu);
|
||||
|
@@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
(idle_cpu(cpu) && rcu_scheduler_active &&
|
||||
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
|
||||
/*
|
||||
* Get here if this CPU took its interrupt from user
|
||||
|
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
||||
{
|
||||
ktime_t now;
|
||||
|
||||
if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
|
||||
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
|
||||
return;
|
||||
|
||||
if (hrtimer_active(&rt_b->rt_period_timer))
|
||||
@@ -2266,16 +2266,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
|
||||
if (!sched_feat(SYNC_WAKEUPS))
|
||||
sync = 0;
|
||||
|
||||
if (!sync) {
|
||||
if (current->se.avg_overlap < sysctl_sched_migration_cost &&
|
||||
p->se.avg_overlap < sysctl_sched_migration_cost)
|
||||
sync = 1;
|
||||
} else {
|
||||
if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
|
||||
p->se.avg_overlap >= sysctl_sched_migration_cost)
|
||||
sync = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (sched_feat(LB_WAKEUP_UPDATE)) {
|
||||
struct sched_domain *sd;
|
||||
@@ -6954,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd)
|
||||
|
||||
static void rq_attach_root(struct rq *rq, struct root_domain *rd)
|
||||
{
|
||||
struct root_domain *old_rd = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
if (rq->rd) {
|
||||
struct root_domain *old_rd = rq->rd;
|
||||
old_rd = rq->rd;
|
||||
|
||||
if (cpumask_test_cpu(rq->cpu, old_rd->online))
|
||||
set_rq_offline(rq);
|
||||
|
||||
cpumask_clear_cpu(rq->cpu, old_rd->span);
|
||||
|
||||
if (atomic_dec_and_test(&old_rd->refcount))
|
||||
free_rootdomain(old_rd);
|
||||
/*
|
||||
* If we dont want to free the old_rt yet then
|
||||
* set old_rd to NULL to skip the freeing later
|
||||
* in this function:
|
||||
*/
|
||||
if (!atomic_dec_and_test(&old_rd->refcount))
|
||||
old_rd = NULL;
|
||||
}
|
||||
|
||||
atomic_inc(&rd->refcount);
|
||||
@@ -6978,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
|
||||
set_rq_online(rq);
|
||||
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
if (old_rd)
|
||||
free_rootdomain(old_rd);
|
||||
}
|
||||
|
||||
static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
|
||||
@@ -9225,6 +9224,16 @@ static int sched_rt_global_constraints(void)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
|
||||
{
|
||||
/* Don't accept realtime tasks when there is no way for them to run */
|
||||
if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_RT_GROUP_SCHED */
|
||||
static int sched_rt_global_constraints(void)
|
||||
{
|
||||
@@ -9318,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/* Don't accept realtime tasks when there is no way for them to run */
|
||||
if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
|
||||
if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
|
||||
return -EINVAL;
|
||||
#else
|
||||
/* We don't support RT-tasks being in separate groups */
|
||||
|
@@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
|
||||
int idx, unsigned long load, unsigned long this_load,
|
||||
unsigned int imbalance)
|
||||
{
|
||||
struct task_struct *curr = this_rq->curr;
|
||||
struct task_group *tg;
|
||||
unsigned long tl = this_load;
|
||||
unsigned long tl_per_task;
|
||||
struct task_group *tg;
|
||||
unsigned long weight;
|
||||
int balanced;
|
||||
|
||||
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
|
||||
return 0;
|
||||
|
||||
if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
|
||||
p->se.avg_overlap > sysctl_sched_migration_cost))
|
||||
sync = 0;
|
||||
|
||||
/*
|
||||
* If sync wakeup then subtract the (maximum possible)
|
||||
* effect of the currently running task from the load
|
||||
@@ -1426,7 +1431,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
||||
if (!sched_feat(WAKEUP_PREEMPT))
|
||||
return;
|
||||
|
||||
if (sched_feat(WAKEUP_OVERLAP) && sync) {
|
||||
if (sched_feat(WAKEUP_OVERLAP) && (sync ||
|
||||
(se->avg_overlap < sysctl_sched_migration_cost &&
|
||||
pse->avg_overlap < sysctl_sched_migration_cost))) {
|
||||
resched_task(curr);
|
||||
return;
|
||||
}
|
||||
|
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
/* #define SECCOMP_DEBUG 1 */
|
||||
#define NR_SECCOMP_MODES 1
|
||||
@@ -22,7 +23,7 @@ static int mode1_syscalls[] = {
|
||||
0, /* null terminated */
|
||||
};
|
||||
|
||||
#ifdef TIF_32BIT
|
||||
#ifdef CONFIG_COMPAT
|
||||
static int mode1_syscalls_32[] = {
|
||||
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
|
||||
0, /* null terminated */
|
||||
@@ -37,8 +38,8 @@ void __secure_computing(int this_syscall)
|
||||
switch (mode) {
|
||||
case 1:
|
||||
syscall = mode1_syscalls;
|
||||
#ifdef TIF_32BIT
|
||||
if (test_thread_flag(TIF_32BIT))
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (is_compat_task())
|
||||
syscall = mode1_syscalls_32;
|
||||
#endif
|
||||
do {
|
||||
|
@@ -1575,7 +1575,15 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
|
||||
read_lock(&tasklist_lock);
|
||||
if (may_ptrace_stop()) {
|
||||
do_notify_parent_cldstop(current, CLD_TRAPPED);
|
||||
/*
|
||||
* Don't want to allow preemption here, because
|
||||
* sys_ptrace() needs this task to be inactive.
|
||||
*
|
||||
* XXX: implement read_unlock_no_resched().
|
||||
*/
|
||||
preempt_disable();
|
||||
read_unlock(&tasklist_lock);
|
||||
preempt_enable_no_resched();
|
||||
schedule();
|
||||
} else {
|
||||
/*
|
||||
|
@@ -626,6 +626,7 @@ static int ksoftirqd(void * __bind_cpu)
|
||||
preempt_enable_no_resched();
|
||||
cond_resched();
|
||||
preempt_disable();
|
||||
rcu_qsctr_inc((long)__bind_cpu);
|
||||
}
|
||||
preempt_enable();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
31
kernel/sys.c
31
kernel/sys.c
@@ -559,7 +559,7 @@ error:
|
||||
abort_creds(new);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* change the user struct in a credentials set to match the new UID
|
||||
*/
|
||||
@@ -571,6 +571,11 @@ static int set_user(struct cred *new)
|
||||
if (!new_user)
|
||||
return -EAGAIN;
|
||||
|
||||
if (!task_can_switch_user(new_user, current)) {
|
||||
free_uid(new_user);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (atomic_read(&new_user->processes) >=
|
||||
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
|
||||
new_user != INIT_USER) {
|
||||
@@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
|
||||
goto error;
|
||||
}
|
||||
|
||||
retval = -EAGAIN;
|
||||
if (new->uid != old->uid && set_user(new) < 0)
|
||||
goto error;
|
||||
|
||||
if (new->uid != old->uid) {
|
||||
retval = set_user(new);
|
||||
if (retval < 0)
|
||||
goto error;
|
||||
}
|
||||
if (ruid != (uid_t) -1 ||
|
||||
(euid != (uid_t) -1 && euid != old->uid))
|
||||
new->suid = new->euid;
|
||||
@@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
|
||||
retval = -EPERM;
|
||||
if (capable(CAP_SETUID)) {
|
||||
new->suid = new->uid = uid;
|
||||
if (uid != old->uid && set_user(new) < 0) {
|
||||
retval = -EAGAIN;
|
||||
goto error;
|
||||
if (uid != old->uid) {
|
||||
retval = set_user(new);
|
||||
if (retval < 0)
|
||||
goto error;
|
||||
}
|
||||
} else if (uid != old->uid && uid != new->suid) {
|
||||
goto error;
|
||||
@@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
|
||||
goto error;
|
||||
}
|
||||
|
||||
retval = -EAGAIN;
|
||||
if (ruid != (uid_t) -1) {
|
||||
new->uid = ruid;
|
||||
if (ruid != old->uid && set_user(new) < 0)
|
||||
goto error;
|
||||
if (ruid != old->uid) {
|
||||
retval = set_user(new);
|
||||
if (retval < 0)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
if (euid != (uid_t) -1)
|
||||
new->euid = euid;
|
||||
|
@@ -101,6 +101,7 @@ static int two = 2;
|
||||
|
||||
static int zero;
|
||||
static int one = 1;
|
||||
static unsigned long one_ul = 1;
|
||||
static int one_hundred = 100;
|
||||
|
||||
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
|
||||
@@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &dirty_background_bytes_handler,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &one,
|
||||
.extra1 = &one_ul,
|
||||
},
|
||||
{
|
||||
.ctl_name = VM_DIRTY_RATIO,
|
||||
@@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &dirty_bytes_handler,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &one,
|
||||
.extra1 = &one_ul,
|
||||
},
|
||||
{
|
||||
.procname = "dirty_writeback_centisecs",
|
||||
|
@@ -52,6 +52,7 @@ config FUNCTION_TRACER
|
||||
depends on HAVE_FUNCTION_TRACER
|
||||
depends on DEBUG_KERNEL
|
||||
select FRAME_POINTER
|
||||
select KALLSYMS
|
||||
select TRACING
|
||||
select CONTEXT_SWITCH_TRACER
|
||||
help
|
||||
@@ -238,6 +239,7 @@ config STACK_TRACER
|
||||
depends on DEBUG_KERNEL
|
||||
select FUNCTION_TRACER
|
||||
select STACKTRACE
|
||||
select KALLSYMS
|
||||
help
|
||||
This special tracer records the maximum stack footprint of the
|
||||
kernel and displays it in debugfs/tracing/stack_trace.
|
||||
@@ -302,4 +304,27 @@ config FTRACE_STARTUP_TEST
|
||||
functioning properly. It will do tests on all the configured
|
||||
tracers of ftrace.
|
||||
|
||||
config MMIOTRACE
|
||||
bool "Memory mapped IO tracing"
|
||||
depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
|
||||
select TRACING
|
||||
help
|
||||
Mmiotrace traces Memory Mapped I/O access and is meant for
|
||||
debugging and reverse engineering. It is called from the ioremap
|
||||
implementation and works via page faults. Tracing is disabled by
|
||||
default and can be enabled at run-time.
|
||||
|
||||
See Documentation/tracers/mmiotrace.txt.
|
||||
If you are not helping to develop drivers, say N.
|
||||
|
||||
config MMIOTRACE_TEST
|
||||
tristate "Test module for mmiotrace"
|
||||
depends on MMIOTRACE && m
|
||||
help
|
||||
This is a dumb module for testing mmiotrace. It is very dangerous
|
||||
as it will write garbage to IO memory starting at a given address.
|
||||
However, it should be safe to use on e.g. unused portion of VRAM.
|
||||
|
||||
Say N, unless you absolutely know what you are doing.
|
||||
|
||||
endmenu
|
||||
|
@@ -2033,7 +2033,7 @@ free:
|
||||
static int start_graph_tracing(void)
|
||||
{
|
||||
struct ftrace_ret_stack **ret_stack_list;
|
||||
int ret;
|
||||
int ret, cpu;
|
||||
|
||||
ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
|
||||
sizeof(struct ftrace_ret_stack *),
|
||||
@@ -2042,6 +2042,10 @@ static int start_graph_tracing(void)
|
||||
if (!ret_stack_list)
|
||||
return -ENOMEM;
|
||||
|
||||
/* The cpu_boot init_task->ret_stack will never be freed */
|
||||
for_each_online_cpu(cpu)
|
||||
ftrace_graph_init_task(idle_task(cpu));
|
||||
|
||||
do {
|
||||
ret = alloc_retstack_tasklist(ret_stack_list);
|
||||
} while (ret == -EAGAIN);
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mmiotrace.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
@@ -19,6 +20,7 @@ struct header_iter {
|
||||
static struct trace_array *mmio_trace_array;
|
||||
static bool overrun_detected;
|
||||
static unsigned long prev_overruns;
|
||||
static atomic_t dropped_count;
|
||||
|
||||
static void mmio_reset_data(struct trace_array *tr)
|
||||
{
|
||||
@@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter)
|
||||
|
||||
static unsigned long count_overruns(struct trace_iterator *iter)
|
||||
{
|
||||
unsigned long cnt = 0;
|
||||
unsigned long cnt = atomic_xchg(&dropped_count, 0);
|
||||
unsigned long over = ring_buffer_overruns(iter->tr->buffer);
|
||||
|
||||
if (over > prev_overruns)
|
||||
cnt = over - prev_overruns;
|
||||
cnt += over - prev_overruns;
|
||||
prev_overruns = over;
|
||||
return cnt;
|
||||
}
|
||||
@@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
|
||||
|
||||
event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
|
||||
&irq_flags);
|
||||
if (!event)
|
||||
if (!event) {
|
||||
atomic_inc(&dropped_count);
|
||||
return;
|
||||
}
|
||||
entry = ring_buffer_event_data(event);
|
||||
tracing_generic_entry_update(&entry->ent, 0, preempt_count());
|
||||
entry->ent.type = TRACE_MMIO_RW;
|
||||
@@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
|
||||
|
||||
event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
|
||||
&irq_flags);
|
||||
if (!event)
|
||||
if (!event) {
|
||||
atomic_inc(&dropped_count);
|
||||
return;
|
||||
}
|
||||
entry = ring_buffer_event_data(event);
|
||||
tracing_generic_entry_update(&entry->ent, 0, preempt_count());
|
||||
entry->ent.type = TRACE_MMIO_MAP;
|
||||
|
@@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
struct trace_entry *entry;
|
||||
unsigned int loops = 0;
|
||||
|
||||
while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
|
||||
entry = ring_buffer_event_data(event);
|
||||
|
||||
/*
|
||||
* The ring buffer is a size of trace_buf_size, if
|
||||
* we loop more than the size, there's something wrong
|
||||
* with the ring buffer.
|
||||
*/
|
||||
if (loops++ > trace_buf_size) {
|
||||
printk(KERN_CONT ".. bad ring buffer ");
|
||||
goto failed;
|
||||
}
|
||||
if (!trace_valid_entry(entry)) {
|
||||
printk(KERN_CONT ".. invalid entry %d ",
|
||||
entry->type);
|
||||
@@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
|
||||
|
||||
cnt = ring_buffer_entries(tr->buffer);
|
||||
|
||||
/*
|
||||
* The trace_test_buffer_cpu runs a while loop to consume all data.
|
||||
* If the calling tracer is broken, and is constantly filling
|
||||
* the buffer, this will run forever, and hard lock the box.
|
||||
* We disable the ring buffer while we do this test to prevent
|
||||
* a hard lock up.
|
||||
*/
|
||||
tracing_off();
|
||||
for_each_possible_cpu(cpu) {
|
||||
ret = trace_test_buffer_cpu(tr, cpu);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
tracing_on();
|
||||
__raw_spin_unlock(&ftrace_max_lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
|
@@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk)
|
||||
if (likely(tsk->mm)) {
|
||||
cputime_t time, dtime;
|
||||
struct timeval value;
|
||||
unsigned long flags;
|
||||
u64 delta;
|
||||
|
||||
local_irq_save(flags);
|
||||
time = tsk->stime + tsk->utime;
|
||||
dtime = cputime_sub(time, tsk->acct_timexpd);
|
||||
jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
|
||||
@@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk)
|
||||
delta = delta * USEC_PER_SEC + value.tv_usec;
|
||||
|
||||
if (delta == 0)
|
||||
return;
|
||||
goto out;
|
||||
tsk->acct_timexpd = time;
|
||||
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -72,6 +72,7 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
|
||||
static void uid_hash_remove(struct user_struct *up)
|
||||
{
|
||||
hlist_del_init(&up->uidhash_node);
|
||||
put_user_ns(up->user_ns);
|
||||
}
|
||||
|
||||
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
||||
@@ -285,14 +286,12 @@ int __init uids_sysfs_init(void)
|
||||
/* work function to remove sysfs directory for a user and free up
|
||||
* corresponding structures.
|
||||
*/
|
||||
static void remove_user_sysfs_dir(struct work_struct *w)
|
||||
static void cleanup_user_struct(struct work_struct *w)
|
||||
{
|
||||
struct user_struct *up = container_of(w, struct user_struct, work);
|
||||
unsigned long flags;
|
||||
int remove_user = 0;
|
||||
|
||||
if (up->user_ns != &init_user_ns)
|
||||
return;
|
||||
/* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
|
||||
* atomic.
|
||||
*/
|
||||
@@ -311,9 +310,11 @@ static void remove_user_sysfs_dir(struct work_struct *w)
|
||||
if (!remove_user)
|
||||
goto done;
|
||||
|
||||
kobject_uevent(&up->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&up->kobj);
|
||||
kobject_put(&up->kobj);
|
||||
if (up->user_ns == &init_user_ns) {
|
||||
kobject_uevent(&up->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&up->kobj);
|
||||
kobject_put(&up->kobj);
|
||||
}
|
||||
|
||||
sched_destroy_user(up);
|
||||
key_put(up->uid_keyring);
|
||||
@@ -334,8 +335,7 @@ static void free_user(struct user_struct *up, unsigned long flags)
|
||||
atomic_inc(&up->__count);
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
|
||||
put_user_ns(up->user_ns);
|
||||
INIT_WORK(&up->work, remove_user_sysfs_dir);
|
||||
INIT_WORK(&up->work, cleanup_user_struct);
|
||||
schedule_work(&up->work);
|
||||
}
|
||||
|
||||
@@ -357,12 +357,29 @@ static void free_user(struct user_struct *up, unsigned long flags)
|
||||
sched_destroy_user(up);
|
||||
key_put(up->uid_keyring);
|
||||
key_put(up->session_keyring);
|
||||
put_user_ns(up->user_ns);
|
||||
kmem_cache_free(uid_cachep, up);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
|
||||
/*
|
||||
* We need to check if a setuid can take place. This function should be called
|
||||
* before successfully completing the setuid.
|
||||
*/
|
||||
int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
|
||||
{
|
||||
|
||||
return sched_rt_can_attach(up->tg, tsk);
|
||||
|
||||
}
|
||||
#else
|
||||
int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Locate the user_struct for the passed UID. If found, take a ref on it. The
|
||||
* caller must undo that ref with free_uid().
|
||||
|
@@ -60,12 +60,25 @@ int create_user_ns(struct cred *new)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void free_user_ns(struct kref *kref)
|
||||
/*
|
||||
* Deferred destructor for a user namespace. This is required because
|
||||
* free_user_ns() may be called with uidhash_lock held, but we need to call
|
||||
* back to free_uid() which will want to take the lock again.
|
||||
*/
|
||||
static void free_user_ns_work(struct work_struct *work)
|
||||
{
|
||||
struct user_namespace *ns;
|
||||
|
||||
ns = container_of(kref, struct user_namespace, kref);
|
||||
struct user_namespace *ns =
|
||||
container_of(work, struct user_namespace, destroyer);
|
||||
free_uid(ns->creator);
|
||||
kfree(ns);
|
||||
}
|
||||
|
||||
void free_user_ns(struct kref *kref)
|
||||
{
|
||||
struct user_namespace *ns =
|
||||
container_of(kref, struct user_namespace, kref);
|
||||
|
||||
INIT_WORK(&ns->destroyer, free_user_ns_work);
|
||||
schedule_work(&ns->destroyer);
|
||||
}
|
||||
EXPORT_SYMBOL(free_user_ns);
|
||||
|
Reference in New Issue
Block a user