Merge branch 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (574 commits)
  perf_counter: Turn off by default
  perf_counter: Add counter->id to the throttle event
  perf_counter: Better align code
  perf_counter: Rename L2 to LL cache
  perf_counter: Standardize event names
  perf_counter: Rename enums
  perf_counter tools: Clean up u64 usage
  perf_counter: Rename perf_counter_limit sysctl
  perf_counter: More paranoia settings
  perf_counter: powerpc: Implement generalized cache events for POWER processors
  perf_counters: powerpc: Add support for POWER7 processors
  perf_counter: Accurate period data
  perf_counter: Introduce struct for sample data
  perf_counter tools: Normalize data using per sample period data
  perf_counter: Annotate exit ctx recursion
  perf_counter tools: Propagate signals properly
  perf_counter tools: Small frequency related fixes
  perf_counter: More aggressive frequency adjustment
  perf_counter/x86: Fix the model number of Intel Core2 processors
  perf_counter, x86: Correct some event and umask values for Intel processors
  ...
This commit is contained in:
Linus Torvalds
2009-06-11 14:01:07 -07:00
138 changed files with 27407 additions and 86 deletions

View File

@ -96,6 +96,7 @@ obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

View File

@ -48,6 +48,7 @@
#include <linux/tracehook.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <linux/perf_counter.h>
#include <trace/events/sched.h>
#include <asm/uaccess.h>
@ -154,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
#ifdef CONFIG_PERF_COUNTERS
WARN_ON_ONCE(tsk->perf_counter_ctxp);
#endif
trace_sched_process_free(tsk);
put_task_struct(tsk);
}
@ -170,6 +174,7 @@ repeat:
atomic_dec(&__task_cred(p)->user->processes);
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
tracehook_finish_release_task(p);
__exit_signal(p);
@ -971,16 +976,19 @@ NORET_TYPE void do_exit(long code)
module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
/*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
*/
perf_counter_exit_task(tsk);
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
/*
* This must happen late, after the PID is not
* hashed anymore:
*/
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))

View File

@ -62,6 +62,7 @@
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
#include <linux/perf_counter.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@ -1096,6 +1097,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
retval = perf_counter_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_policy;
/* copy all the process information */
@ -1290,6 +1295,7 @@ bad_fork_cleanup_semundo:
bad_fork_cleanup_audit:
audit_free(p);
bad_fork_cleanup_policy:
perf_counter_free_task(p);
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup:
@ -1403,6 +1409,12 @@ long do_fork(unsigned long clone_flags,
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
} else if (!(clone_flags & CLONE_VM)) {
/*
* vfork will do an exec which will call
* set_task_comm()
*/
perf_counter_fork(p);
}
audit_finish_fork(p);

View File

@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count);
*
* This function is similar to (but not equivalent to) down().
*/
void inline __sched mutex_lock(struct mutex *lock)
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*

4260
kernel/perf_counter.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -39,6 +39,7 @@
#include <linux/completion.h>
#include <linux/kernel_stat.h>
#include <linux/debug_locks.h>
#include <linux/perf_counter.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/profile.h>
@ -579,6 +580,7 @@ struct rq {
struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
u64 nr_migrations_in;
struct cfs_rq cfs;
struct rt_rq rt;
@ -691,7 +693,7 @@ static inline int cpu_of(struct rq *rq)
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
static inline void update_rq_clock(struct rq *rq)
inline void update_rq_clock(struct rq *rq)
{
rq->clock = sched_clock_cpu(cpu_of(rq));
}
@ -1968,12 +1970,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.sleep_start -= clock_offset;
if (p->se.block_start)
p->se.block_start -= clock_offset;
#endif
if (old_cpu != new_cpu) {
schedstat_inc(p, se.nr_migrations);
p->se.nr_migrations++;
new_rq->nr_migrations_in++;
#ifdef CONFIG_SCHEDSTATS
if (task_hot(p, old_rq->clock, NULL))
schedstat_inc(p, se.nr_forced2_migrations);
}
#endif
perf_counter_task_migration(p, new_cpu);
}
p->se.vruntime -= old_cfsrq->min_vruntime -
new_cfsrq->min_vruntime;
@ -2368,6 +2374,27 @@ static int sched_balance_self(int cpu, int flag)
#endif /* CONFIG_SMP */
/**
* task_oncpu_function_call - call a function on the cpu on which a task runs
* @p: the task to evaluate
* @func: the function to be called
* @info: the function call argument
*
* Calls the function @func when the task is currently running. This might
* be on the current CPU, which just calls the function directly
*/
void task_oncpu_function_call(struct task_struct *p,
void (*func) (void *info), void *info)
{
int cpu;
preempt_disable();
cpu = task_cpu(p);
if (task_curr(p))
smp_call_function_single(cpu, func, info, 1);
preempt_enable();
}
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
@ -2535,6 +2562,7 @@ static void __sched_fork(struct task_struct *p)
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.last_wakeup = 0;
p->se.avg_overlap = 0;
p->se.start_runtime = 0;
@ -2765,6 +2793,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
perf_counter_task_sched_in(current, cpu_of(rq));
finish_lock_switch(rq, prev);
#ifdef CONFIG_SMP
if (post_schedule)
@ -2979,6 +3008,15 @@ static void calc_load_account_active(struct rq *this_rq)
}
}
/*
* Externally visible per-cpu scheduler statistics:
* cpu_nr_migrations(cpu) - number of migrations into that cpu
*/
u64 cpu_nr_migrations(int cpu)
{
return cpu_rq(cpu)->nr_migrations_in;
}
/*
* Update rq->cpu_load[] statistics. This function is usually called every
* scheduler tick (TICK_NSEC).
@ -5077,6 +5115,8 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
spin_unlock(&rq->lock);
perf_counter_task_tick(curr, cpu);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);
@ -5292,6 +5332,7 @@ need_resched_nonpreemptible:
if (likely(prev != next)) {
sched_info_switch(prev, next);
perf_counter_task_sched_out(prev, next, cpu);
rq->nr_switches++;
rq->curr = next;
@ -7535,8 +7576,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
/* Register at highest priority so that task migration (migrate_all_tasks)
* happens before everything else.
/*
* Register at high priority so that task migration (migrate_all_tasks)
* happens before everything else. This has to be lower priority than
* the notifier in the perf_counter subsystem, though.
*/
static struct notifier_block __cpuinitdata migration_notifier = {
.notifier_call = migration_call,
@ -9214,7 +9257,7 @@ void __init sched_init(void)
* 1024) and two child groups A0 and A1 (of weight 1024 each),
* then A0's share of the cpu resource is:
*
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
*
* We achieve this by letting init_task_group's tasks sit
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
@ -9319,6 +9362,8 @@ void __init sched_init(void)
alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
#endif /* SMP */
perf_counter_init();
scheduler_running = 1;
}

View File

@ -14,6 +14,7 @@
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/perf_counter.h>
#include <linux/resource.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_TSC:
error = SET_TSC_CTL(arg2);
break;
case PR_TASK_PERF_COUNTERS_DISABLE:
error = perf_counter_task_disable();
break;
case PR_TASK_PERF_COUNTERS_ENABLE:
error = perf_counter_task_enable();
break;
case PR_GET_TIMERSLACK:
error = current->timer_slack_ns;
break;

View File

@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime);
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
cond_syscall(sys_eventfd2);
/* performance counters: */
cond_syscall(sys_perf_counter_open);

View File

@ -49,6 +49,7 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/slow-work.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@ -932,6 +933,32 @@ static struct ctl_table kern_table[] = {
.child = slow_work_sysctls,
},
#endif
#ifdef CONFIG_PERF_COUNTERS
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_paranoid",
.data = &sysctl_perf_counter_paranoid,
.maxlen = sizeof(sysctl_perf_counter_paranoid),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_mlock_kb",
.data = &sysctl_perf_counter_mlock,
.maxlen = sizeof(sysctl_perf_counter_mlock),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_max_sample_rate",
.data = &sysctl_perf_counter_sample_rate,
.maxlen = sizeof(sysctl_perf_counter_sample_rate),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt

View File

@ -37,6 +37,7 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@ -1129,6 +1130,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __get_cpu_var(tvec_bases);
perf_counter_do_pending();
hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))