Merge branch 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (574 commits) perf_counter: Turn off by default perf_counter: Add counter->id to the throttle event perf_counter: Better align code perf_counter: Rename L2 to LL cache perf_counter: Standardize event names perf_counter: Rename enums perf_counter tools: Clean up u64 usage perf_counter: Rename perf_counter_limit sysctl perf_counter: More paranoia settings perf_counter: powerpc: Implement generalized cache events for POWER processors perf_counters: powerpc: Add support for POWER7 processors perf_counter: Accurate period data perf_counter: Introduce struct for sample data perf_counter tools: Normalize data using per sample period data perf_counter: Annotate exit ctx recursion perf_counter tools: Propagate signals properly perf_counter tools: Small frequency related fixes perf_counter: More aggressive frequency adjustment perf_counter/x86: Fix the model number of Intel Core2 processors perf_counter, x86: Correct some event and umask values for Intel processors ...
This commit is contained in:
@ -96,6 +96,7 @@ obj-$(CONFIG_TRACING) += trace/
|
||||
obj-$(CONFIG_X86_DS) += trace/
|
||||
obj-$(CONFIG_SMP) += sched_cpupri.o
|
||||
obj-$(CONFIG_SLOW_WORK) += slow-work.o
|
||||
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/perf_counter.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
@ -154,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
|
||||
{
|
||||
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
|
||||
|
||||
#ifdef CONFIG_PERF_COUNTERS
|
||||
WARN_ON_ONCE(tsk->perf_counter_ctxp);
|
||||
#endif
|
||||
trace_sched_process_free(tsk);
|
||||
put_task_struct(tsk);
|
||||
}
|
||||
@ -170,6 +174,7 @@ repeat:
|
||||
atomic_dec(&__task_cred(p)->user->processes);
|
||||
|
||||
proc_flush_task(p);
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
tracehook_finish_release_task(p);
|
||||
__exit_signal(p);
|
||||
@ -971,16 +976,19 @@ NORET_TYPE void do_exit(long code)
|
||||
module_put(tsk->binfmt->module);
|
||||
|
||||
proc_exit_connector(tsk);
|
||||
|
||||
/*
|
||||
* Flush inherited counters to the parent - before the parent
|
||||
* gets woken up by child-exit notifications.
|
||||
*/
|
||||
perf_counter_exit_task(tsk);
|
||||
|
||||
exit_notify(tsk, group_dead);
|
||||
#ifdef CONFIG_NUMA
|
||||
mpol_put(tsk->mempolicy);
|
||||
tsk->mempolicy = NULL;
|
||||
#endif
|
||||
#ifdef CONFIG_FUTEX
|
||||
/*
|
||||
* This must happen late, after the PID is not
|
||||
* hashed anymore:
|
||||
*/
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
if (unlikely(current->pi_state_cache))
|
||||
|
@ -62,6 +62,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/perf_counter.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@ -1096,6 +1097,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
/* Perform scheduler related setup. Assign this task to a CPU. */
|
||||
sched_fork(p, clone_flags);
|
||||
|
||||
retval = perf_counter_init_task(p);
|
||||
if (retval)
|
||||
goto bad_fork_cleanup_policy;
|
||||
|
||||
if ((retval = audit_alloc(p)))
|
||||
goto bad_fork_cleanup_policy;
|
||||
/* copy all the process information */
|
||||
@ -1290,6 +1295,7 @@ bad_fork_cleanup_semundo:
|
||||
bad_fork_cleanup_audit:
|
||||
audit_free(p);
|
||||
bad_fork_cleanup_policy:
|
||||
perf_counter_free_task(p);
|
||||
#ifdef CONFIG_NUMA
|
||||
mpol_put(p->mempolicy);
|
||||
bad_fork_cleanup_cgroup:
|
||||
@ -1403,6 +1409,12 @@ long do_fork(unsigned long clone_flags,
|
||||
if (clone_flags & CLONE_VFORK) {
|
||||
p->vfork_done = &vfork;
|
||||
init_completion(&vfork);
|
||||
} else if (!(clone_flags & CLONE_VM)) {
|
||||
/*
|
||||
* vfork will do an exec which will call
|
||||
* set_task_comm()
|
||||
*/
|
||||
perf_counter_fork(p);
|
||||
}
|
||||
|
||||
audit_finish_fork(p);
|
||||
|
@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count);
|
||||
*
|
||||
* This function is similar to (but not equivalent to) down().
|
||||
*/
|
||||
void inline __sched mutex_lock(struct mutex *lock)
|
||||
void __sched mutex_lock(struct mutex *lock)
|
||||
{
|
||||
might_sleep();
|
||||
/*
|
||||
|
4260
kernel/perf_counter.c
Normal file
4260
kernel/perf_counter.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -39,6 +39,7 @@
|
||||
#include <linux/completion.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/debug_locks.h>
|
||||
#include <linux/perf_counter.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/profile.h>
|
||||
@ -579,6 +580,7 @@ struct rq {
|
||||
struct load_weight load;
|
||||
unsigned long nr_load_updates;
|
||||
u64 nr_switches;
|
||||
u64 nr_migrations_in;
|
||||
|
||||
struct cfs_rq cfs;
|
||||
struct rt_rq rt;
|
||||
@ -691,7 +693,7 @@ static inline int cpu_of(struct rq *rq)
|
||||
#define task_rq(p) cpu_rq(task_cpu(p))
|
||||
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
|
||||
|
||||
static inline void update_rq_clock(struct rq *rq)
|
||||
inline void update_rq_clock(struct rq *rq)
|
||||
{
|
||||
rq->clock = sched_clock_cpu(cpu_of(rq));
|
||||
}
|
||||
@ -1968,12 +1970,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
p->se.sleep_start -= clock_offset;
|
||||
if (p->se.block_start)
|
||||
p->se.block_start -= clock_offset;
|
||||
#endif
|
||||
if (old_cpu != new_cpu) {
|
||||
schedstat_inc(p, se.nr_migrations);
|
||||
p->se.nr_migrations++;
|
||||
new_rq->nr_migrations_in++;
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (task_hot(p, old_rq->clock, NULL))
|
||||
schedstat_inc(p, se.nr_forced2_migrations);
|
||||
}
|
||||
#endif
|
||||
perf_counter_task_migration(p, new_cpu);
|
||||
}
|
||||
p->se.vruntime -= old_cfsrq->min_vruntime -
|
||||
new_cfsrq->min_vruntime;
|
||||
|
||||
@ -2368,6 +2374,27 @@ static int sched_balance_self(int cpu, int flag)
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/**
|
||||
* task_oncpu_function_call - call a function on the cpu on which a task runs
|
||||
* @p: the task to evaluate
|
||||
* @func: the function to be called
|
||||
* @info: the function call argument
|
||||
*
|
||||
* Calls the function @func when the task is currently running. This might
|
||||
* be on the current CPU, which just calls the function directly
|
||||
*/
|
||||
void task_oncpu_function_call(struct task_struct *p,
|
||||
void (*func) (void *info), void *info)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
preempt_disable();
|
||||
cpu = task_cpu(p);
|
||||
if (task_curr(p))
|
||||
smp_call_function_single(cpu, func, info, 1);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/***
|
||||
* try_to_wake_up - wake up a thread
|
||||
* @p: the to-be-woken-up thread
|
||||
@ -2535,6 +2562,7 @@ static void __sched_fork(struct task_struct *p)
|
||||
p->se.exec_start = 0;
|
||||
p->se.sum_exec_runtime = 0;
|
||||
p->se.prev_sum_exec_runtime = 0;
|
||||
p->se.nr_migrations = 0;
|
||||
p->se.last_wakeup = 0;
|
||||
p->se.avg_overlap = 0;
|
||||
p->se.start_runtime = 0;
|
||||
@ -2765,6 +2793,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
|
||||
*/
|
||||
prev_state = prev->state;
|
||||
finish_arch_switch(prev);
|
||||
perf_counter_task_sched_in(current, cpu_of(rq));
|
||||
finish_lock_switch(rq, prev);
|
||||
#ifdef CONFIG_SMP
|
||||
if (post_schedule)
|
||||
@ -2979,6 +3008,15 @@ static void calc_load_account_active(struct rq *this_rq)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Externally visible per-cpu scheduler statistics:
|
||||
* cpu_nr_migrations(cpu) - number of migrations into that cpu
|
||||
*/
|
||||
u64 cpu_nr_migrations(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->nr_migrations_in;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update rq->cpu_load[] statistics. This function is usually called every
|
||||
* scheduler tick (TICK_NSEC).
|
||||
@ -5077,6 +5115,8 @@ void scheduler_tick(void)
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
spin_unlock(&rq->lock);
|
||||
|
||||
perf_counter_task_tick(curr, cpu);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
rq->idle_at_tick = idle_cpu(cpu);
|
||||
trigger_load_balance(rq, cpu);
|
||||
@ -5292,6 +5332,7 @@ need_resched_nonpreemptible:
|
||||
|
||||
if (likely(prev != next)) {
|
||||
sched_info_switch(prev, next);
|
||||
perf_counter_task_sched_out(prev, next, cpu);
|
||||
|
||||
rq->nr_switches++;
|
||||
rq->curr = next;
|
||||
@ -7535,8 +7576,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
/* Register at highest priority so that task migration (migrate_all_tasks)
|
||||
* happens before everything else.
|
||||
/*
|
||||
* Register at high priority so that task migration (migrate_all_tasks)
|
||||
* happens before everything else. This has to be lower priority than
|
||||
* the notifier in the perf_counter subsystem, though.
|
||||
*/
|
||||
static struct notifier_block __cpuinitdata migration_notifier = {
|
||||
.notifier_call = migration_call,
|
||||
@ -9214,7 +9257,7 @@ void __init sched_init(void)
|
||||
* 1024) and two child groups A0 and A1 (of weight 1024 each),
|
||||
* then A0's share of the cpu resource is:
|
||||
*
|
||||
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
|
||||
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
|
||||
*
|
||||
* We achieve this by letting init_task_group's tasks sit
|
||||
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
|
||||
@ -9319,6 +9362,8 @@ void __init sched_init(void)
|
||||
alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
|
||||
#endif /* SMP */
|
||||
|
||||
perf_counter_init();
|
||||
|
||||
scheduler_running = 1;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/highuid.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/perf_counter.h>
|
||||
#include <linux/resource.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kexec.h>
|
||||
@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
case PR_SET_TSC:
|
||||
error = SET_TSC_CTL(arg2);
|
||||
break;
|
||||
case PR_TASK_PERF_COUNTERS_DISABLE:
|
||||
error = perf_counter_task_disable();
|
||||
break;
|
||||
case PR_TASK_PERF_COUNTERS_ENABLE:
|
||||
error = perf_counter_task_enable();
|
||||
break;
|
||||
case PR_GET_TIMERSLACK:
|
||||
error = current->timer_slack_ns;
|
||||
break;
|
||||
|
@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime);
|
||||
cond_syscall(compat_sys_timerfd_gettime);
|
||||
cond_syscall(sys_eventfd);
|
||||
cond_syscall(sys_eventfd2);
|
||||
|
||||
/* performance counters: */
|
||||
cond_syscall(sys_perf_counter_open);
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/slow-work.h>
|
||||
#include <linux/perf_counter.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/processor.h>
|
||||
@ -932,6 +933,32 @@ static struct ctl_table kern_table[] = {
|
||||
.child = slow_work_sysctls,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_PERF_COUNTERS
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "perf_counter_paranoid",
|
||||
.data = &sysctl_perf_counter_paranoid,
|
||||
.maxlen = sizeof(sysctl_perf_counter_paranoid),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "perf_counter_mlock_kb",
|
||||
.data = &sysctl_perf_counter_mlock,
|
||||
.maxlen = sizeof(sysctl_perf_counter_mlock),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "perf_counter_max_sample_rate",
|
||||
.data = &sysctl_perf_counter_sample_rate,
|
||||
.maxlen = sizeof(sysctl_perf_counter_sample_rate),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
/*
|
||||
* NOTE: do not add new entries to this table unless you have read
|
||||
* Documentation/sysctl/ctl_unnumbered.txt
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/perf_counter.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
@ -1129,6 +1130,8 @@ static void run_timer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct tvec_base *base = __get_cpu_var(tvec_bases);
|
||||
|
||||
perf_counter_do_pending();
|
||||
|
||||
hrtimer_run_pending();
|
||||
|
||||
if (time_after_eq(jiffies, base->timer_jiffies))
|
||||
|
Reference in New Issue
Block a user