Merge branch 'tracing/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (228 commits)
  ftrace: build fix for ftraced_suspend
  ftrace: separate out the function enabled variable
  ftrace: add ftrace_kill_atomic
  ftrace: use current CPU for function startup
  ftrace: start wakeup tracing after setting function tracer
  ftrace: check proper config for preempt type
  ftrace: trace schedule
  ftrace: define function trace nop
  ftrace: move sched_switch enable after markers
  ftrace: prevent ftrace modifications while being kprobe'd, v2
  fix "ftrace: store mcount address in rec->ip"
  mmiotrace broken in linux-next (8-bit writes only)
  ftrace: avoid modifying kprobe'd records
  ftrace: freeze kprobe'd records
  kprobes: enable clean usage of get_kprobe
  ftrace: store mcount address in rec->ip
  ftrace: build fix with gcc 4.3
  namespacecheck: fixes
  ftrace: fix "notrace" filtering priority
  ftrace: fix printout
  ...
This commit is contained in:
Linus Torvalds
2008-07-14 14:49:54 -07:00
100 changed files with 11490 additions and 115 deletions

View File

@@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
CFLAGS_REMOVE_sched.o = -mno-spe
ifdef CONFIG_FTRACE
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
CFLAGS_REMOVE_lockdep_proc.o = -pg
CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
endif
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
@@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_FTRACE) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)

View File

@@ -910,7 +910,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
rt_mutex_init_task(p);
#ifdef CONFIG_TRACE_IRQFLAGS
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif

View File

@@ -39,6 +39,7 @@
#include <linux/irqflags.h>
#include <linux/utsname.h>
#include <linux/hash.h>
#include <linux/ftrace.h>
#include <asm/sections.h>
@@ -81,6 +82,8 @@ static int graph_lock(void)
__raw_spin_unlock(&lockdep_lock);
return 0;
}
/* prevent any recursions within lockdep from causing deadlocks */
current->lockdep_recursion++;
return 1;
}
@@ -89,6 +92,7 @@ static inline int graph_unlock(void)
if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
return DEBUG_LOCKS_WARN_ON(1);
current->lockdep_recursion--;
__raw_spin_unlock(&lockdep_lock);
return 0;
}
@@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
return 1;
}
#ifdef CONFIG_TRACE_IRQFLAGS
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* Forwards and backwards subgraph searching, for the purposes of
* proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
#ifdef CONFIG_TRACE_IRQFLAGS
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* print irq inversion bug:
@@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
/*
* Hardirqs will be enabled:
*/
void trace_hardirqs_on(void)
void trace_hardirqs_on_caller(unsigned long a0)
{
struct task_struct *curr = current;
unsigned long ip;
time_hardirqs_on(CALLER_ADDR0, a0);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(&hardirqs_on_events);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
void trace_hardirqs_on(void)
{
trace_hardirqs_on_caller(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
/*
* Hardirqs were disabled:
*/
void trace_hardirqs_off(void)
void trace_hardirqs_off_caller(unsigned long a0)
{
struct task_struct *curr = current;
time_hardirqs_off(CALLER_ADDR0, a0);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
} else
debug_atomic_inc(&redundant_hardirqs_off);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
void trace_hardirqs_off(void)
{
trace_hardirqs_off_caller(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off);
/*
@@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
* Mark a lock with a usage bit, and validate the state transition:
*/
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit)
enum lock_usage_bit new_bit)
{
unsigned int new_mask = 1 << new_bit, ret = 1;
@@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
* and also avoid lockdep recursion:
*/
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, unsigned long ip)
int trylock, int read, int check, unsigned long ip)
{
unsigned long flags;
@@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
EXPORT_SYMBOL_GPL(lock_acquire);
void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip)
{
unsigned long flags;

View File

@@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
struct marker_entry {
struct hlist_node hlist;
char *format;
void (*call)(const struct marker *mdata, /* Probe wrapper */
void *call_private, const char *fmt, ...);
/* Probe wrapper */
void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
int refcount; /* Number of times armed. 0 if disarmed. */
@@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
* marker_probe_cb Callback that prepares the variable argument list for probes.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @fmt: format string
* @...: Variable argument list.
*
* Since we do not use "typical" pointer based RCU in the 1 argument case, we
* need to put a full smp_rmb() in this branch. This is why we do not use
* rcu_dereference() for the pointer read.
*/
void marker_probe_cb(const struct marker *mdata, void *call_private,
const char *fmt, ...)
void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
{
va_list args;
char ptype;
@@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
va_start(args, fmt);
func(mdata->single.probe_private, call_private, fmt, &args);
va_start(args, call_private);
func(mdata->single.probe_private, call_private, mdata->format,
&args);
va_end(args);
} else {
struct marker_probe_closure *multi;
@@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
smp_read_barrier_depends();
multi = mdata->multi;
for (i = 0; multi[i].func; i++) {
va_start(args, fmt);
multi[i].func(multi[i].probe_private, call_private, fmt,
&args);
va_start(args, call_private);
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
va_end(args);
}
}
@@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
* marker_probe_cb Callback that does not prepare the variable argument list.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @fmt: format string
* @...: Variable argument list.
*
* Should be connected to markers "MARK_NOARGS".
*/
void marker_probe_cb_noarg(const struct marker *mdata,
void *call_private, const char *fmt, ...)
void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
@@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
func(mdata->single.probe_private, call_private, fmt, &args);
func(mdata->single.probe_private, call_private, mdata->format,
&args);
} else {
struct marker_probe_closure *multi;
int i;
@@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
smp_read_barrier_depends();
multi = mdata->multi;
for (i = 0; multi[i].func; i++)
multi[i].func(multi[i].probe_private, call_private, fmt,
&args);
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
preempt_enable();
}

View File

@@ -1046,7 +1046,9 @@ void release_console_sem(void)
_log_end = log_end;
con_start = log_end; /* Flush */
spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
start_critical_timings();
local_irq_restore(flags);
}
console_locked = 0;

View File

@@ -70,6 +70,7 @@
#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -645,6 +646,24 @@ static inline void update_rq_clock(struct rq *rq)
# define const_debug static const
#endif
/**
* runqueue_is_locked
*
* Returns true if the current cpu runqueue is locked.
* This interface allows printk to be called with the runqueue lock
* held and know whether or not it is OK to wake up the klogd.
*/
int runqueue_is_locked(void)
{
int cpu = get_cpu();
struct rq *rq = cpu_rq(cpu);
int ret;
ret = spin_is_locked(&rq->lock);
put_cpu();
return ret;
}
/*
* Debugging: various feature bits
*/
@@ -2318,6 +2337,9 @@ out_activate:
success = 1;
out_running:
trace_mark(kernel_sched_wakeup,
"pid %d state %ld ## rq %p task %p rq->curr %p",
p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
@@ -2450,6 +2472,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
trace_mark(kernel_sched_wakeup_new,
"pid %d state %ld ## rq %p task %p rq->curr %p",
p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -2622,6 +2647,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
trace_mark(kernel_sched_schedule,
"prev_pid %d next_pid %d prev_state %ld "
"## rq %p prev %p next %p",
prev->pid, next->pid, prev->state,
rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -4221,26 +4251,44 @@ void scheduler_tick(void)
#endif
}
#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
defined(CONFIG_PREEMPT_TRACER))
static inline unsigned long get_parent_ip(unsigned long addr)
{
if (in_lock_functions(addr)) {
addr = CALLER_ADDR2;
if (in_lock_functions(addr))
addr = CALLER_ADDR3;
}
return addr;
}
void __kprobes add_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
return;
#endif
preempt_count() += val;
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Spinlock count overflowing soon?
*/
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
#endif
if (preempt_count() == val)
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
EXPORT_SYMBOL(add_preempt_count);
void __kprobes sub_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
@@ -4252,7 +4300,10 @@ void __kprobes sub_preempt_count(int val)
if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
!(preempt_count() & PREEMPT_MASK)))
return;
#endif
if (preempt_count() == val)
trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
preempt_count() -= val;
}
EXPORT_SYMBOL(sub_preempt_count);
@@ -5566,7 +5617,7 @@ out_unlock:
return retval;
}
static const char stat_nam[] = "RSDTtZX";
static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
void sched_show_task(struct task_struct *p)
{

View File

@@ -31,6 +31,7 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);

View File

@@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
}
EXPORT_SYMBOL(_spin_trylock_bh);
int in_lock_functions(unsigned long addr)
notrace int in_lock_functions(unsigned long addr)
{
/* Linker adds these: start and end of __lockfunc functions */
extern char __lock_text_start[], __lock_text_end[];

View File

@@ -46,6 +46,7 @@
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -463,6 +464,16 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#ifdef CONFIG_FTRACE
{
.ctl_name = CTL_UNNUMBERED,
.procname = "ftrace_enabled",
.data = &ftrace_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &ftrace_enable_sysctl,
},
#endif
#ifdef CONFIG_KMOD
{
.ctl_name = KERN_MODPROBE,

135
kernel/trace/Kconfig Normal file
View File

@@ -0,0 +1,135 @@
#
# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
#
config HAVE_FTRACE
bool
config HAVE_DYNAMIC_FTRACE
bool
config TRACER_MAX_TRACE
bool
config TRACING
bool
select DEBUG_FS
select STACKTRACE
config FTRACE
bool "Kernel Function Tracer"
depends on HAVE_FTRACE
select FRAME_POINTER
select TRACING
select CONTEXT_SWITCH_TRACER
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
instruction to the beginning of every kernel function, which NOP
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
small and not measurable even in micro-benchmarks.
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
depends on HAVE_FTRACE
select TRACE_IRQFLAGS
select TRACING
select TRACER_MAX_TRACE
help
This option measures the time spent in irqs-off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
disabled by default and can be runtime (re-)started
via:
echo 0 > /debugfs/tracing/tracing_max_latency
(Note that kernel size and overhead increases with this option
enabled. This option and the preempt-off timing option can be
used together or separately.)
config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
depends on GENERIC_TIME
depends on PREEMPT
depends on HAVE_FTRACE
select TRACING
select TRACER_MAX_TRACE
help
This option measures the time spent in preemption off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
disabled by default and can be runtime (re-)started
via:
echo 0 > /debugfs/tracing/tracing_max_latency
(Note that kernel size and overhead increases with this option
enabled. This option and the irqs-off timing option can be
used together or separately.)
config SYSPROF_TRACER
bool "Sysprof Tracer"
depends on X86
select TRACING
help
This tracer provides the trace needed by the 'Sysprof' userspace
tool.
config SCHED_TRACER
bool "Scheduling Latency Tracer"
depends on HAVE_FTRACE
select TRACING
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
help
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
depends on HAVE_FTRACE
select TRACING
select MARKERS
help
This tracer gets called from the context switch and records
all switching of tasks.
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FTRACE
depends on HAVE_DYNAMIC_FTRACE
default y
help
This option will modify all the calls to ftrace dynamically
(will patch them out of the binary image and replaces them
with a No-Op instruction) as they are called. A table is
created to dynamically enable them again.
This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
has native performance as long as no tracing is active.
The changes to the code are done by a kernel thread that
wakes up once a second and checks to see if any ftrace calls
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
config FTRACE_SELFTEST
bool
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
depends on TRACING
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
a series of tests are made to verify that the tracer is
functioning properly. It will do tests on all the configured
tracers of ftrace.

24
kernel/trace/Makefile Normal file
View File

@@ -0,0 +1,24 @@
# Do not instrument the tracer itself:
ifdef CONFIG_FTRACE
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
# selftest needs instrumentation
CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
obj-$(CONFIG_FTRACE) += libftrace.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FTRACE) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
libftrace-y := ftrace.o

1727
kernel/trace/ftrace.c Normal file

File diff suppressed because it is too large Load Diff

3161
kernel/trace/trace.c Normal file

File diff suppressed because it is too large Load Diff

339
kernel/trace/trace.h Normal file
View File

@@ -0,0 +1,339 @@
#ifndef _LINUX_KERNEL_TRACE_H
#define _LINUX_KERNEL_TRACE_H
#include <linux/fs.h>
#include <asm/atomic.h>
#include <linux/sched.h>
#include <linux/clocksource.h>
#include <linux/mmiotrace.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
TRACE_FN,
TRACE_CTX,
TRACE_WAKE,
TRACE_STACK,
TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
__TRACE_LAST_TYPE
};
/*
* Function trace entry - function address and parent function addres:
*/
struct ftrace_entry {
unsigned long ip;
unsigned long parent_ip;
};
/*
* Context switch trace entry - which task (and prio) we switched from/to:
*/
struct ctx_switch_entry {
unsigned int prev_pid;
unsigned char prev_prio;
unsigned char prev_state;
unsigned int next_pid;
unsigned char next_prio;
unsigned char next_state;
};
/*
* Special (free-form) trace entry:
*/
struct special_entry {
unsigned long arg1;
unsigned long arg2;
unsigned long arg3;
};
/*
* Stack-trace entry:
*/
#define FTRACE_STACK_ENTRIES 8
struct stack_entry {
unsigned long caller[FTRACE_STACK_ENTRIES];
};
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
*
* bash-15816 [01] 235.197585: idle_cpu <- irq_enter
*/
struct trace_entry {
char type;
char cpu;
char flags;
char preempt_count;
int pid;
cycle_t t;
union {
struct ftrace_entry fn;
struct ctx_switch_entry ctx;
struct special_entry special;
struct stack_entry stack;
struct mmiotrace_rw mmiorw;
struct mmiotrace_map mmiomap;
};
};
#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
/*
* The CPU trace array - it consists of thousands of trace entries
* plus some other descriptor data: (for example which task started
* the trace, etc.)
*/
struct trace_array_cpu {
struct list_head trace_pages;
atomic_t disabled;
raw_spinlock_t lock;
struct lock_class_key lock_key;
/* these fields get copied into max-trace: */
unsigned trace_head_idx;
unsigned trace_tail_idx;
void *trace_head; /* producer */
void *trace_tail; /* consumer */
unsigned long trace_idx;
unsigned long overrun;
unsigned long saved_latency;
unsigned long critical_start;
unsigned long critical_end;
unsigned long critical_sequence;
unsigned long nice;
unsigned long policy;
unsigned long rt_priority;
cycle_t preempt_timestamp;
pid_t pid;
uid_t uid;
char comm[TASK_COMM_LEN];
};
struct trace_iterator;
/*
* The trace array - an array of per-CPU trace arrays. This is the
* highest level data structure that individual tracers deal with.
* They have on/off state as well:
*/
struct trace_array {
unsigned long entries;
long ctrl;
int cpu;
cycle_t time_start;
struct task_struct *waiter;
struct trace_array_cpu *data[NR_CPUS];
};
/*
* A specific tracer, represented by methods that operate on a trace array:
*/
struct tracer {
const char *name;
void (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
void (*start)(struct trace_iterator *iter);
void (*stop)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
void (*ctrl_update)(struct trace_array *tr);
#ifdef CONFIG_FTRACE_STARTUP_TEST
int (*selftest)(struct tracer *trace,
struct trace_array *tr);
#endif
int (*print_line)(struct trace_iterator *iter);
struct tracer *next;
int print_max;
};
struct trace_seq {
unsigned char buffer[PAGE_SIZE];
unsigned int len;
unsigned int readpos;
};
/*
* Trace iterator - used by printout routines who present trace
* results to users and which routines might sleep, etc:
*/
struct trace_iterator {
struct trace_array *tr;
struct tracer *trace;
void *private;
long last_overrun[NR_CPUS];
long overrun[NR_CPUS];
/* The below is zeroed out in pipe_read */
struct trace_seq seq;
struct trace_entry *ent;
int cpu;
struct trace_entry *prev_ent;
int prev_cpu;
unsigned long iter_flags;
loff_t pos;
unsigned long next_idx[NR_CPUS];
struct list_head *next_page[NR_CPUS];
unsigned next_page_idx[NR_CPUS];
long idx;
};
void tracing_reset(struct trace_array_cpu *data);
int tracing_open_generic(struct inode *inode, struct file *filp);
struct dentry *tracing_init_dentry(void);
void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
void ftrace(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags);
void tracing_sched_switch_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *prev,
struct task_struct *next,
unsigned long flags);
void tracing_record_cmdline(struct task_struct *tsk);
void tracing_sched_wakeup_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *wakee,
struct task_struct *cur,
unsigned long flags);
void trace_special(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long arg1,
unsigned long arg2,
unsigned long arg3);
void trace_function(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
extern unsigned long tracing_max_latency;
extern unsigned long tracing_thresh;
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
extern cycle_t ftrace_now(int cpu);
#ifdef CONFIG_FTRACE
void tracing_start_function_trace(void);
void tracing_stop_function_trace(void);
#else
# define tracing_start_function_trace() do { } while (0)
# define tracing_stop_function_trace() do { } while (0)
#endif
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
typedef void
(*tracer_switch_func_t)(void *private,
void *__rq,
struct task_struct *prev,
struct task_struct *next);
struct tracer_switch_ops {
tracer_switch_func_t func;
void *private;
struct tracer_switch_ops *next;
};
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
extern int DYN_FTRACE_TEST_NAME(void);
#endif
#ifdef CONFIG_MMIOTRACE
extern void __trace_mmiotrace_rw(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_rw *rw);
extern void __trace_mmiotrace_map(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_map *map);
#endif
#ifdef CONFIG_FTRACE_STARTUP_TEST
#ifdef CONFIG_FTRACE
extern int trace_selftest_startup_function(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_IRQSOFF_TRACER
extern int trace_selftest_startup_irqsoff(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_PREEMPT_TRACER
extern int trace_selftest_startup_preemptoff(struct tracer *trace,
struct trace_array *tr);
#endif
#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_SCHED_TRACER
extern int trace_selftest_startup_wakeup(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_SYSPROF_TRACER
extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr);
#endif
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
size_t cnt);
extern long ns2usecs(cycle_t nsec);
extern unsigned long trace_flags;
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
*
* NOTE: These bits must match the trace_options array in
* trace.c.
*/
enum trace_iterator_flags {
TRACE_ITER_PRINT_PARENT = 0x01,
TRACE_ITER_SYM_OFFSET = 0x02,
TRACE_ITER_SYM_ADDR = 0x04,
TRACE_ITER_VERBOSE = 0x08,
TRACE_ITER_RAW = 0x10,
TRACE_ITER_HEX = 0x20,
TRACE_ITER_BIN = 0x40,
TRACE_ITER_BLOCK = 0x80,
TRACE_ITER_STACKTRACE = 0x100,
TRACE_ITER_SCHED_TREE = 0x200,
};
#endif /* _LINUX_KERNEL_TRACE_H */

View File

@@ -0,0 +1,81 @@
/*
* ring buffer based function tracer
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Based on code from the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
#include "trace.h"
static void function_reset(struct trace_array *tr)
{
int cpu;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
tracing_reset(tr->data[cpu]);
}
static void start_function_trace(struct trace_array *tr)
{
tr->cpu = get_cpu();
function_reset(tr);
put_cpu();
tracing_start_cmdline_record();
tracing_start_function_trace();
}
static void stop_function_trace(struct trace_array *tr)
{
tracing_stop_function_trace();
tracing_stop_cmdline_record();
}
static void function_trace_init(struct trace_array *tr)
{
if (tr->ctrl)
start_function_trace(tr);
}
static void function_trace_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_function_trace(tr);
}
static void function_trace_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
start_function_trace(tr);
else
stop_function_trace(tr);
}
static struct tracer function_trace __read_mostly =
{
.name = "ftrace",
.init = function_trace_init,
.reset = function_trace_reset,
.ctrl_update = function_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
#endif
};
static __init int init_function_trace(void)
{
return register_tracer(&function_trace);
}
device_initcall(init_function_trace);

View File

@@ -0,0 +1,486 @@
/*
* trace irqs off criticall timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* From code in the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
#include "trace.h"
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu);
static DEFINE_SPINLOCK(max_trace_lock);
enum {
TRACER_IRQS_OFF = (1 << 1),
TRACER_PREEMPT_OFF = (1 << 2),
};
static int trace_type __read_mostly;
#ifdef CONFIG_PREEMPT_TRACER
static inline int
preempt_trace(void)
{
return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
}
#else
# define preempt_trace() (0)
#endif
#ifdef CONFIG_IRQSOFF_TRACER
static inline int
irq_trace(void)
{
return ((trace_type & TRACER_IRQS_OFF) &&
irqs_disabled());
}
#else
# define irq_trace() (0)
#endif
/*
* Sequence count - we record it when starting a measurement and
* skip the latency if the sequence has changed - some other section
* did a maximum and could disturb our measurement with serial console
* printouts, etc. Truly coinciding maximum latencies should be rare
* and what happens together happens separately as well, so this doesnt
* decrease the validity of the maximum found:
*/
static __cacheline_aligned_in_smp unsigned long max_sequence;
#ifdef CONFIG_FTRACE
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
static void
irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
/*
* Does not matter if we preempt. We test the flags
* afterward, to see if irqs are disabled or not.
* If we preempt and get a false positive, the flags
* test will fail.
*/
cpu = raw_smp_processor_id();
if (likely(!per_cpu(tracing_cpu, cpu)))
return;
local_save_flags(flags);
/* slight chance to get a false positive on tracing_cpu */
if (!irqs_disabled_flags(flags))
return;
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
trace_function(tr, data, ip, parent_ip, flags);
atomic_dec(&data->disabled);
}
static struct ftrace_ops trace_ops __read_mostly =
{
.func = irqsoff_tracer_call,
};
#endif /* CONFIG_FTRACE */
/*
* Should this new latency be reported/recorded?
*/
static int report_latency(cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
if (delta <= tracing_max_latency)
return 0;
}
return 1;
}
static void
check_critical_timing(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long parent_ip,
int cpu)
{
unsigned long latency, t0, t1;
cycle_t T0, T1, delta;
unsigned long flags;
/*
* usecs conversion is slow so we try to delay the conversion
* as long as possible:
*/
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
delta = T1-T0;
local_save_flags(flags);
if (!report_latency(delta))
goto out;
spin_lock_irqsave(&max_trace_lock, flags);
/* check if we are still the max latency */
if (!report_latency(delta))
goto out_unlock;
trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
latency = nsecs_to_usecs(delta);
if (data->critical_sequence != max_sequence)
goto out_unlock;
tracing_max_latency = delta;
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
data->critical_end = parent_ip;
update_max_tr_single(tr, current, cpu);
max_sequence++;
out_unlock:
spin_unlock_irqrestore(&max_trace_lock, flags);
out:
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
tracing_reset(data);
trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
}
static inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip)
{
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
if (likely(!tracer_enabled))
return;
cpu = raw_smp_processor_id();
if (per_cpu(tracing_cpu, cpu))
return;
data = tr->data[cpu];
if (unlikely(!data) || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
data->critical_start = parent_ip ? : ip;
tracing_reset(data);
local_save_flags(flags);
trace_function(tr, data, ip, parent_ip, flags);
per_cpu(tracing_cpu, cpu) = 1;
atomic_dec(&data->disabled);
}
static inline void
stop_critical_timing(unsigned long ip, unsigned long parent_ip)
{
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
cpu = raw_smp_processor_id();
/* Always clear the tracing cpu on stopping the trace */
if (unlikely(per_cpu(tracing_cpu, cpu)))
per_cpu(tracing_cpu, cpu) = 0;
else
return;
if (!tracer_enabled)
return;
data = tr->data[cpu];
if (unlikely(!data) || unlikely(!head_page(data)) ||
!data->critical_start || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
local_save_flags(flags);
trace_function(tr, data, ip, parent_ip, flags);
check_critical_timing(tr, data, parent_ip ? : ip, cpu);
data->critical_start = 0;
atomic_dec(&data->disabled);
}
/* start and stop critical timings used to for stoppage (in idle) */
void start_critical_timings(void)
{
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
void stop_critical_timings(void)
{
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
#ifdef CONFIG_IRQSOFF_TRACER
#ifdef CONFIG_PROVE_LOCKING
void time_hardirqs_on(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(a0, a1);
}
void time_hardirqs_off(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(a0, a1);
}
#else /* !CONFIG_PROVE_LOCKING */
/*
* Stubs:
*/
void early_boot_irqs_off(void)
{
}
void early_boot_irqs_on(void)
{
}
void trace_softirqs_on(unsigned long ip)
{
}
void trace_softirqs_off(unsigned long ip)
{
}
inline void print_irqtrace_events(struct task_struct *curr)
{
}
/*
* We are only interested in hardirq on/off events:
*/
void trace_hardirqs_on(void)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL(trace_hardirqs_off);
void trace_hardirqs_on_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
void trace_hardirqs_off_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_PROVE_LOCKING */
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
stop_critical_timing(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
start_critical_timing(a0, a1);
}
#endif /* CONFIG_PREEMPT_TRACER */
static void start_irqsoff_tracer(struct trace_array *tr)
{
register_ftrace_function(&trace_ops);
tracer_enabled = 1;
}
static void stop_irqsoff_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
}
static void __irqsoff_tracer_init(struct trace_array *tr)
{
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
if (tr->ctrl)
start_irqsoff_tracer(tr);
}
static void irqsoff_tracer_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_irqsoff_tracer(tr);
}
static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
start_irqsoff_tracer(tr);
else
stop_irqsoff_tracer(tr);
}
static void irqsoff_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
if (iter->tr->ctrl)
stop_irqsoff_tracer(iter->tr);
}
static void irqsoff_tracer_close(struct trace_iterator *iter)
{
if (iter->tr->ctrl)
start_irqsoff_tracer(iter->tr);
}
#ifdef CONFIG_IRQSOFF_TRACER
static void irqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF;
__irqsoff_tracer_init(tr);
}
static struct tracer irqsoff_tracer __read_mostly =
{
.name = "irqsoff",
.init = irqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
#endif
};
# define register_irqsoff(trace) register_tracer(&trace)
#else
# define register_irqsoff(trace) do { } while (0)
#endif
#ifdef CONFIG_PREEMPT_TRACER
static void preemptoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
}
static struct tracer preemptoff_tracer __read_mostly =
{
.name = "preemptoff",
.init = preemptoff_tracer_init,
.reset = irqsoff_tracer_reset,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
#endif
};
# define register_preemptoff(trace) register_tracer(&trace)
#else
# define register_preemptoff(trace) do { } while (0)
#endif
#if defined(CONFIG_IRQSOFF_TRACER) && \
defined(CONFIG_PREEMPT_TRACER)
static void preemptirqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
}
static struct tracer preemptirqsoff_tracer __read_mostly =
{
.name = "preemptirqsoff",
.init = preemptirqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
#endif
};
# define register_preemptirqsoff(trace) register_tracer(&trace)
#else
# define register_preemptirqsoff(trace) do { } while (0)
#endif
__init static int init_irqsoff_tracer(void)
{
register_irqsoff(irqsoff_tracer);
register_preemptoff(preemptoff_tracer);
register_preemptirqsoff(preemptirqsoff_tracer);
return 0;
}
device_initcall(init_irqsoff_tracer);

View File

@@ -0,0 +1,295 @@
/*
* Memory mapped I/O tracing
*
* Copyright (C) 2008 Pekka Paalanen <pq@iki.fi>
*/
#define DEBUG 1
#include <linux/kernel.h>
#include <linux/mmiotrace.h>
#include <linux/pci.h>
#include "trace.h"
struct header_iter {
struct pci_dev *dev;
};
static struct trace_array *mmio_trace_array;
static bool overrun_detected;
static void mmio_reset_data(struct trace_array *tr)
{
int cpu;
overrun_detected = false;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
tracing_reset(tr->data[cpu]);
}
static void mmio_trace_init(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
mmio_trace_array = tr;
if (tr->ctrl) {
mmio_reset_data(tr);
enable_mmiotrace();
}
}
static void mmio_trace_reset(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
if (tr->ctrl)
disable_mmiotrace();
mmio_reset_data(tr);
mmio_trace_array = NULL;
}
static void mmio_trace_ctrl_update(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
if (tr->ctrl) {
mmio_reset_data(tr);
enable_mmiotrace();
} else {
disable_mmiotrace();
}
}
static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
{
int ret = 0;
int i;
resource_size_t start, end;
const struct pci_driver *drv = pci_dev_driver(dev);
/* XXX: incomplete checks for trace_seq_printf() return value */
ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
dev->bus->number, dev->devfn,
dev->vendor, dev->device, dev->irq);
/*
* XXX: is pci_resource_to_user() appropriate, since we are
* supposed to interpret the __ioremap() phys_addr argument based on
* these printed values?
*/
for (i = 0; i < 7; i++) {
pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
ret += trace_seq_printf(s, " %llx",
(unsigned long long)(start |
(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
}
for (i = 0; i < 7; i++) {
pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
ret += trace_seq_printf(s, " %llx",
dev->resource[i].start < dev->resource[i].end ?
(unsigned long long)(end - start) + 1 : 0);
}
if (drv)
ret += trace_seq_printf(s, " %s\n", drv->name);
else
ret += trace_seq_printf(s, " \n");
return ret;
}
static void destroy_header_iter(struct header_iter *hiter)
{
if (!hiter)
return;
pci_dev_put(hiter->dev);
kfree(hiter);
}
static void mmio_pipe_open(struct trace_iterator *iter)
{
struct header_iter *hiter;
struct trace_seq *s = &iter->seq;
trace_seq_printf(s, "VERSION 20070824\n");
hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
if (!hiter)
return;
hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL);
iter->private = hiter;
}
/* XXX: This is not called when the pipe is closed! */
static void mmio_close(struct trace_iterator *iter)
{
struct header_iter *hiter = iter->private;
destroy_header_iter(hiter);
iter->private = NULL;
}
static unsigned long count_overruns(struct trace_iterator *iter)
{
int cpu;
unsigned long cnt = 0;
for_each_online_cpu(cpu) {
cnt += iter->overrun[cpu];
iter->overrun[cpu] = 0;
}
return cnt;
}
static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
char __user *ubuf, size_t cnt, loff_t *ppos)
{
ssize_t ret;
struct header_iter *hiter = iter->private;
struct trace_seq *s = &iter->seq;
unsigned long n;
n = count_overruns(iter);
if (n) {
/* XXX: This is later than where events were lost. */
trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
if (!overrun_detected)
pr_warning("mmiotrace has lost events.\n");
overrun_detected = true;
goto print_out;
}
if (!hiter)
return 0;
mmio_print_pcidev(s, hiter->dev);
hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev);
if (!hiter->dev) {
destroy_header_iter(hiter);
iter->private = NULL;
}
print_out:
ret = trace_seq_to_user(s, ubuf, cnt);
return (ret == -EBUSY) ? 0 : ret;
}
static int mmio_print_rw(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct mmiotrace_rw *rw = &entry->mmiorw;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(entry->t);
unsigned long usec_rem = do_div(t, 1000000ULL);
unsigned secs = (unsigned long)t;
int ret = 1;
switch (entry->mmiorw.opcode) {
case MMIO_READ:
ret = trace_seq_printf(s,
"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
rw->width, secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
rw->value, rw->pc, 0);
break;
case MMIO_WRITE:
ret = trace_seq_printf(s,
"W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
rw->width, secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
rw->value, rw->pc, 0);
break;
case MMIO_UNKNOWN_OP:
ret = trace_seq_printf(s,
"UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
(rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
(rw->value >> 0) & 0xff, rw->pc, 0);
break;
default:
ret = trace_seq_printf(s, "rw what?\n");
break;
}
if (ret)
return 1;
return 0;
}
static int mmio_print_map(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct mmiotrace_map *m = &entry->mmiomap;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(entry->t);
unsigned long usec_rem = do_div(t, 1000000ULL);
unsigned secs = (unsigned long)t;
int ret = 1;
switch (entry->mmiorw.opcode) {
case MMIO_PROBE:
ret = trace_seq_printf(s,
"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
secs, usec_rem, m->map_id,
(unsigned long long)m->phys, m->virt, m->len,
0UL, 0);
break;
case MMIO_UNPROBE:
ret = trace_seq_printf(s,
"UNMAP %lu.%06lu %d 0x%lx %d\n",
secs, usec_rem, m->map_id, 0UL, 0);
break;
default:
ret = trace_seq_printf(s, "map what?\n");
break;
}
if (ret)
return 1;
return 0;
}
/* return 0 to abort printing without consuming current entry in pipe mode */
static int mmio_print_line(struct trace_iterator *iter)
{
switch (iter->ent->type) {
case TRACE_MMIO_RW:
return mmio_print_rw(iter);
case TRACE_MMIO_MAP:
return mmio_print_map(iter);
default:
return 1; /* ignore unknown entries */
}
}
static struct tracer mmio_tracer __read_mostly =
{
.name = "mmiotrace",
.init = mmio_trace_init,
.reset = mmio_trace_reset,
.pipe_open = mmio_pipe_open,
.close = mmio_close,
.read = mmio_read,
.ctrl_update = mmio_trace_ctrl_update,
.print_line = mmio_print_line,
};
__init static int init_mmio_trace(void)
{
return register_tracer(&mmio_tracer);
}
device_initcall(init_mmio_trace);
void mmio_trace_rw(struct mmiotrace_rw *rw)
{
struct trace_array *tr = mmio_trace_array;
struct trace_array_cpu *data = tr->data[smp_processor_id()];
__trace_mmiotrace_rw(tr, data, rw);
}
void mmio_trace_mapping(struct mmiotrace_map *map)
{
struct trace_array *tr = mmio_trace_array;
struct trace_array_cpu *data;
preempt_disable();
data = tr->data[smp_processor_id()];
__trace_mmiotrace_map(tr, data, map);
preempt_enable();
}

View File

@@ -0,0 +1,286 @@
/*
* trace context switch
*
* Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
*
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/marker.h>
#include <linux/ftrace.h>
#include "trace.h"
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
static atomic_t sched_ref;
static void
sched_switch_func(void *private, void *__rq, struct task_struct *prev,
struct task_struct *next)
{
struct trace_array **ptr = private;
struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
if (!tracer_enabled)
return;
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
tracing_sched_switch_trace(tr, data, prev, next, flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
static notrace void
sched_switch_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct task_struct *prev;
struct task_struct *next;
struct rq *__rq;
if (!atomic_read(&sched_ref))
return;
/* skip prev_pid %d next_pid %d prev_state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, int);
(void)va_arg(*args, long);
__rq = va_arg(*args, typeof(__rq));
prev = va_arg(*args, typeof(prev));
next = va_arg(*args, typeof(next));
/*
* If tracer_switch_func only points to the local
* switch func, it still needs the ptr passed to it.
*/
sched_switch_func(probe_data, __rq, prev, next);
}
static void
wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
task_struct *curr)
{
struct trace_array **ptr = private;
struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
if (!tracer_enabled)
return;
tracing_record_cmdline(curr);
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
static notrace void
wake_up_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct task_struct *curr;
struct task_struct *task;
struct rq *__rq;
if (likely(!tracer_enabled))
return;
/* Skip pid %d state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, long);
/* now get the meat: "rq %p task %p rq->curr %p" */
__rq = va_arg(*args, typeof(__rq));
task = va_arg(*args, typeof(task));
curr = va_arg(*args, typeof(curr));
tracing_record_cmdline(task);
tracing_record_cmdline(curr);
wakeup_func(probe_data, __rq, task, curr);
}
static void sched_switch_reset(struct trace_array *tr)
{
int cpu;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
tracing_reset(tr->data[cpu]);
}
static int tracing_sched_register(void)
{
int ret;
ret = marker_probe_register("kernel_sched_wakeup",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&ctx_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup\n");
return ret;
}
ret = marker_probe_register("kernel_sched_wakeup_new",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&ctx_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
ret = marker_probe_register("kernel_sched_schedule",
"prev_pid %d next_pid %d prev_state %ld "
"## rq %p prev %p next %p",
sched_switch_callback,
&ctx_trace);
if (ret) {
pr_info("sched trace: Couldn't add marker"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
return ret;
fail_deprobe_wake_new:
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&ctx_trace);
fail_deprobe:
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&ctx_trace);
return ret;
}
static void tracing_sched_unregister(void)
{
marker_probe_unregister("kernel_sched_schedule",
sched_switch_callback,
&ctx_trace);
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&ctx_trace);
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&ctx_trace);
}
static void tracing_start_sched_switch(void)
{
long ref;
ref = atomic_inc_return(&sched_ref);
if (ref == 1)
tracing_sched_register();
}
static void tracing_stop_sched_switch(void)
{
long ref;
ref = atomic_dec_and_test(&sched_ref);
if (ref)
tracing_sched_unregister();
}
void tracing_start_cmdline_record(void)
{
tracing_start_sched_switch();
}
void tracing_stop_cmdline_record(void)
{
tracing_stop_sched_switch();
}
static void start_sched_trace(struct trace_array *tr)
{
sched_switch_reset(tr);
tracing_start_cmdline_record();
tracer_enabled = 1;
}
static void stop_sched_trace(struct trace_array *tr)
{
tracer_enabled = 0;
tracing_stop_cmdline_record();
}
static void sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
if (tr->ctrl)
start_sched_trace(tr);
}
static void sched_switch_trace_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_sched_trace(tr);
}
static void sched_switch_trace_ctrl_update(struct trace_array *tr)
{
/* When starting a new trace, reset the buffers */
if (tr->ctrl)
start_sched_trace(tr);
else
stop_sched_trace(tr);
}
static struct tracer sched_switch_trace __read_mostly =
{
.name = "sched_switch",
.init = sched_switch_trace_init,
.reset = sched_switch_trace_reset,
.ctrl_update = sched_switch_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sched_switch,
#endif
};
__init static int init_sched_switch_trace(void)
{
int ret = 0;
if (atomic_read(&sched_ref))
ret = tracing_sched_register();
if (ret) {
pr_info("error registering scheduler trace\n");
return ret;
}
return register_tracer(&sched_switch_trace);
}
device_initcall(init_sched_switch_trace);

View File

@@ -0,0 +1,448 @@
/*
* trace task wakeup timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Based on code from the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/marker.h>
#include "trace.h"
static struct trace_array *wakeup_trace;
static int __read_mostly tracer_enabled;
static struct task_struct *wakeup_task;
static int wakeup_cpu;
static unsigned wakeup_prio = -1;
static DEFINE_SPINLOCK(wakeup_lock);
static void __wakeup_reset(struct trace_array *tr);
#ifdef CONFIG_FTRACE
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int resched;
int cpu;
if (likely(!wakeup_task))
return;
resched = need_resched();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (unlikely(disabled != 1))
goto out;
spin_lock_irqsave(&wakeup_lock, flags);
if (unlikely(!wakeup_task))
goto unlock;
/*
* The task can't disappear because it needs to
* wake up first, and we have the wakeup_lock.
*/
if (task_cpu(wakeup_task) != cpu)
goto unlock;
trace_function(tr, data, ip, parent_ip, flags);
unlock:
spin_unlock_irqrestore(&wakeup_lock, flags);
out:
atomic_dec(&data->disabled);
/*
* To prevent recursion from the scheduler, if the
* resched flag was set before we entered, then
* don't reschedule.
*/
if (resched)
preempt_enable_no_resched_notrace();
else
preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =
{
.func = wakeup_tracer_call,
};
#endif /* CONFIG_FTRACE */
/*
* Should this new latency be reported/recorded?
*/
static int report_latency(cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
if (delta <= tracing_max_latency)
return 0;
}
return 1;
}
static void notrace
wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
struct trace_array **ptr = private;
struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;
if (unlikely(!tracer_enabled))
return;
/*
* When we start a new trace, we set wakeup_task to NULL
* and then set tracer_enabled = 1. We want to make sure
* that another CPU does not see the tracer_enabled = 1
* and the wakeup_task with an older task, that might
* actually be the same as next.
*/
smp_rmb();
if (next != wakeup_task)
return;
/* The task we are waiting for is waking up */
data = tr->data[wakeup_cpu];
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&tr->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;
spin_lock_irqsave(&wakeup_lock, flags);
/* We could race with grabbing wakeup_lock */
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
/*
* usecs conversion is slow so we try to delay the conversion
* as long as possible:
*/
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
delta = T1-T0;
if (!report_latency(delta))
goto out_unlock;
latency = nsecs_to_usecs(delta);
tracing_max_latency = delta;
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
update_max_tr(tr, wakeup_task, wakeup_cpu);
out_unlock:
__wakeup_reset(tr);
spin_unlock_irqrestore(&wakeup_lock, flags);
out:
atomic_dec(&tr->data[cpu]->disabled);
}
static notrace void
sched_switch_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct task_struct *prev;
struct task_struct *next;
struct rq *__rq;
/* skip prev_pid %d next_pid %d prev_state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, int);
(void)va_arg(*args, long);
__rq = va_arg(*args, typeof(__rq));
prev = va_arg(*args, typeof(prev));
next = va_arg(*args, typeof(next));
tracing_record_cmdline(prev);
/*
* If tracer_switch_func only points to the local
* switch func, it still needs the ptr passed to it.
*/
wakeup_sched_switch(probe_data, __rq, prev, next);
}
static void __wakeup_reset(struct trace_array *tr)
{
struct trace_array_cpu *data;
int cpu;
assert_spin_locked(&wakeup_lock);
for_each_possible_cpu(cpu) {
data = tr->data[cpu];
tracing_reset(data);
}
wakeup_cpu = -1;
wakeup_prio = -1;
if (wakeup_task)
put_task_struct(wakeup_task);
wakeup_task = NULL;
}
static void wakeup_reset(struct trace_array *tr)
{
unsigned long flags;
spin_lock_irqsave(&wakeup_lock, flags);
__wakeup_reset(tr);
spin_unlock_irqrestore(&wakeup_lock, flags);
}
static void
wakeup_check_start(struct trace_array *tr, struct task_struct *p,
struct task_struct *curr)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
p->prio >= curr->prio)
return;
disabled = atomic_inc_return(&tr->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;
/* interrupts should be off from try_to_wake_up */
spin_lock(&wakeup_lock);
/* check for races. */
if (!tracer_enabled || p->prio >= wakeup_prio)
goto out_locked;
/* reset the trace */
__wakeup_reset(tr);
wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
wakeup_task = p;
get_task_struct(wakeup_task);
local_save_flags(flags);
tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
trace_function(tr, tr->data[wakeup_cpu],
CALLER_ADDR1, CALLER_ADDR2, flags);
out_locked:
spin_unlock(&wakeup_lock);
out:
atomic_dec(&tr->data[cpu]->disabled);
}
static notrace void
wake_up_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct trace_array **ptr = probe_data;
struct trace_array *tr = *ptr;
struct task_struct *curr;
struct task_struct *task;
struct rq *__rq;
if (likely(!tracer_enabled))
return;
/* Skip pid %d state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, long);
/* now get the meat: "rq %p task %p rq->curr %p" */
__rq = va_arg(*args, typeof(__rq));
task = va_arg(*args, typeof(task));
curr = va_arg(*args, typeof(curr));
tracing_record_cmdline(task);
tracing_record_cmdline(curr);
wakeup_check_start(tr, task, curr);
}
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
ret = marker_probe_register("kernel_sched_wakeup",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&wakeup_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup\n");
return;
}
ret = marker_probe_register("kernel_sched_wakeup_new",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&wakeup_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
ret = marker_probe_register("kernel_sched_schedule",
"prev_pid %d next_pid %d prev_state %ld "
"## rq %p prev %p next %p",
sched_switch_callback,
&wakeup_trace);
if (ret) {
pr_info("sched trace: Couldn't add marker"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
wakeup_reset(tr);
/*
* Don't let the tracer_enabled = 1 show up before
* the wakeup_task is reset. This may be overkill since
* wakeup_reset does a spin_unlock after setting the
* wakeup_task to NULL, but I want to be safe.
* This is a slow path anyway.
*/
smp_wmb();
register_ftrace_function(&trace_ops);
tracer_enabled = 1;
return;
fail_deprobe_wake_new:
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&wakeup_trace);
fail_deprobe:
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&wakeup_trace);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
marker_probe_unregister("kernel_sched_schedule",
sched_switch_callback,
&wakeup_trace);
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&wakeup_trace);
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&wakeup_trace);
}
static void wakeup_tracer_init(struct trace_array *tr)
{
wakeup_trace = tr;
if (tr->ctrl)
start_wakeup_tracer(tr);
}
static void wakeup_tracer_reset(struct trace_array *tr)
{
if (tr->ctrl) {
stop_wakeup_tracer(tr);
/* make sure we put back any tasks we are tracing */
wakeup_reset(tr);
}
}
static void wakeup_tracer_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
start_wakeup_tracer(tr);
else
stop_wakeup_tracer(tr);
}
static void wakeup_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
if (iter->tr->ctrl)
stop_wakeup_tracer(iter->tr);
}
static void wakeup_tracer_close(struct trace_iterator *iter)
{
/* forget about any processes we were recording */
if (iter->tr->ctrl)
start_wakeup_tracer(iter->tr);
}
static struct tracer wakeup_tracer __read_mostly =
{
.name = "wakeup",
.init = wakeup_tracer_init,
.reset = wakeup_tracer_reset,
.open = wakeup_tracer_open,
.close = wakeup_tracer_close,
.ctrl_update = wakeup_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
};
__init static int init_wakeup_tracer(void)
{
int ret;
ret = register_tracer(&wakeup_tracer);
if (ret)
return ret;
return 0;
}
device_initcall(init_wakeup_tracer);

View File

@@ -0,0 +1,563 @@
/* Include in trace.c */
#include <linux/kthread.h>
#include <linux/delay.h>
static inline int trace_valid_entry(struct trace_entry *entry)
{
switch (entry->type) {
case TRACE_FN:
case TRACE_CTX:
case TRACE_WAKE:
case TRACE_STACK:
case TRACE_SPECIAL:
return 1;
}
return 0;
}
static int
trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
{
struct trace_entry *entries;
struct page *page;
int idx = 0;
int i;
BUG_ON(list_empty(&data->trace_pages));
page = list_entry(data->trace_pages.next, struct page, lru);
entries = page_address(page);
check_pages(data);
if (head_page(data) != entries)
goto failed;
/*
* The starting trace buffer always has valid elements,
* if any element exists.
*/
entries = head_page(data);
for (i = 0; i < tr->entries; i++) {
if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
printk(KERN_CONT ".. invalid entry %d ",
entries[idx].type);
goto failed;
}
idx++;
if (idx >= ENTRIES_PER_PAGE) {
page = virt_to_page(entries);
if (page->lru.next == &data->trace_pages) {
if (i != tr->entries - 1) {
printk(KERN_CONT ".. entries buffer mismatch");
goto failed;
}
} else {
page = list_entry(page->lru.next, struct page, lru);
entries = page_address(page);
}
idx = 0;
}
}
page = virt_to_page(entries);
if (page->lru.next != &data->trace_pages) {
printk(KERN_CONT ".. too many entries");
goto failed;
}
return 0;
failed:
/* disable tracing */
tracing_disabled = 1;
printk(KERN_CONT ".. corrupted trace buffer .. ");
return -1;
}
/*
* Test the trace buffer to see if all the elements
* are still sane.
*/
static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
{
unsigned long flags, cnt = 0;
int cpu, ret = 0;
/* Don't allow flipping of max traces now */
raw_local_irq_save(flags);
__raw_spin_lock(&ftrace_max_lock);
for_each_possible_cpu(cpu) {
if (!head_page(tr->data[cpu]))
continue;
cnt += tr->data[cpu]->trace_idx;
ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
if (ret)
break;
}
__raw_spin_unlock(&ftrace_max_lock);
raw_local_irq_restore(flags);
if (count)
*count = cnt;
return ret;
}
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
#define __STR(x) #x
#define STR(x) __STR(x)
/* Test dynamic code modification and ftrace filters */
int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
struct trace_array *tr,
int (*func)(void))
{
unsigned long count;
int ret;
int save_ftrace_enabled = ftrace_enabled;
int save_tracer_enabled = tracer_enabled;
char *func_name;
/* The ftrace test PASSED */
printk(KERN_CONT "PASSED\n");
pr_info("Testing dynamic ftrace: ");
/* enable tracing, and record the filter function */
ftrace_enabled = 1;
tracer_enabled = 1;
/* passed in by parameter to fool gcc from optimizing */
func();
/* update the records */
ret = ftrace_force_update();
if (ret) {
printk(KERN_CONT ".. ftraced failed .. ");
return ret;
}
/*
* Some archs *cough*PowerPC*cough* add charachters to the
* start of the function names. We simply put a '*' to
* accomodate them.
*/
func_name = "*" STR(DYN_FTRACE_TEST_NAME);
/* filter only on our function */
ftrace_set_filter(func_name, strlen(func_name), 1);
/* enable tracing */
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* we should have nothing in the buffer */
ret = trace_test_buffer(tr, &count);
if (ret)
goto out;
if (count) {
ret = -1;
printk(KERN_CONT ".. filter did not filter .. ");
goto out;
}
/* call our function again */
func();
/* sleep again */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
/* we should only have one item */
if (!ret && count != 1) {
printk(KERN_CONT ".. filter failed count=%ld ..", count);
ret = -1;
goto out;
}
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
/* Enable tracing on all functions again */
ftrace_set_filter(NULL, 0, 1);
return ret;
}
#else
# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
* Simple verification test of ftrace function tracer.
* Enable ftrace, sleep 1/10 second, and then read the trace
* buffer to see if all is in order.
*/
int
trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
int save_ftrace_enabled = ftrace_enabled;
int save_tracer_enabled = tracer_enabled;
/* make sure msleep has been recorded */
msleep(1);
/* force the recorded functions to be traced */
ret = ftrace_force_update();
if (ret) {
printk(KERN_CONT ".. ftraced failed .. ");
return ret;
}
/* start the tracing */
ftrace_enabled = 1;
tracer_enabled = 1;
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
goto out;
}
ret = trace_selftest_startup_dynamic_tracing(trace, tr,
DYN_FTRACE_TEST_NAME);
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
/* kill ftrace totally if we failed */
if (ret)
ftrace_kill();
return ret;
}
#endif /* CONFIG_FTRACE */
#ifdef CONFIG_IRQSOFF_TRACER
int
trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* disable interrupts for a bit */
local_irq_disable();
udelay(100);
local_irq_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
int
trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* disable preemption for a bit */
preempt_disable();
udelay(100);
preempt_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_PREEMPT_TRACER */
#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
int
trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* disable preemption and interrupts for a bit */
preempt_disable();
local_irq_disable();
udelay(100);
preempt_enable();
/* reverse the order of preempt vs irqs */
local_irq_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (ret)
goto out;
ret = trace_test_buffer(&max_tr, &count);
if (ret)
goto out;
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
goto out;
}
/* do the test by disabling interrupts first this time */
tracing_max_latency = 0;
tr->ctrl = 1;
trace->ctrl_update(tr);
preempt_disable();
local_irq_disable();
udelay(100);
preempt_enable();
/* reverse the order of preempt vs irqs */
local_irq_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (ret)
goto out;
ret = trace_test_buffer(&max_tr, &count);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
goto out;
}
out:
trace->reset(tr);
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
#ifdef CONFIG_SCHED_TRACER
static int trace_wakeup_test_thread(void *data)
{
/* Make this a RT thread, doesn't need to be too high */
struct sched_param param = { .sched_priority = 5 };
struct completion *x = data;
sched_setscheduler(current, SCHED_FIFO, &param);
/* Make it know we have a new prio */
complete(x);
/* now go to sleep and let the test wake us up */
set_current_state(TASK_INTERRUPTIBLE);
schedule();
/* we are awake, now wait to disappear */
while (!kthread_should_stop()) {
/*
* This is an RT task, do short sleeps to let
* others run.
*/
msleep(100);
}
return 0;
}
int
trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
struct task_struct *p;
struct completion isrt;
unsigned long count;
int ret;
init_completion(&isrt);
/* create a high prio thread */
p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
if (IS_ERR(p)) {
printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
return -1;
}
/* make sure the thread is running at an RT prio */
wait_for_completion(&isrt);
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* sleep to let the RT thread sleep too */
msleep(100);
/*
* Yes this is slightly racy. It is possible that for some
* strange reason that the RT thread we created, did not
* call schedule for 100ms after doing the completion,
* and we do a wakeup on a task that already is awake.
* But that is extremely unlikely, and the worst thing that
* happens in such a case, is that we disable tracing.
* Honestly, if this race does happen something is horrible
* wrong with the system.
*/
wake_up_process(p);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
tracing_max_latency = save_max;
/* kill the thread */
kthread_stop(p);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_SCHED_TRACER */
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
int
trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
#ifdef CONFIG_SYSPROF_TRACER
int
trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
return ret;
}
#endif /* CONFIG_SYSPROF_TRACER */

View File

@@ -0,0 +1,7 @@
#include "trace.h"
int DYN_FTRACE_TEST_NAME(void)
{
/* used to call mcount */
return 0;
}

View File

@@ -0,0 +1,363 @@
/*
* trace stack traces
*
* Copyright (C) 2004-2008, Soeren Sandmann
* Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*/
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/hrtimer.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/irq.h>
#include <linux/fs.h>
#include <asm/stacktrace.h>
#include "trace.h"
static struct trace_array *sysprof_trace;
static int __read_mostly tracer_enabled;
/*
* 1 msec sample interval by default:
*/
static unsigned long sample_period = 1000000;
static const unsigned int sample_max_depth = 512;
static DEFINE_MUTEX(sample_timer_lock);
/*
* Per CPU hrtimers that do the profiling:
*/
static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
struct stack_frame {
const void __user *next_fp;
unsigned long return_address;
};
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
int ret;
if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
return 0;
ret = 1;
pagefault_disable();
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
ret = 0;
pagefault_enable();
return ret;
}
struct backtrace_info {
struct trace_array_cpu *data;
struct trace_array *tr;
int pos;
};
static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
/* Ignore warnings */
}
static void backtrace_warning(void *data, char *msg)
{
/* Ignore warnings */
}
static int backtrace_stack(void *data, char *name)
{
/* Don't bother with IRQ stacks for now */
return -1;
}
static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct backtrace_info *info = data;
if (info->pos < sample_max_depth && reliable) {
__trace_special(info->tr, info->data, 1, addr, 0);
info->pos++;
}
}
const static struct stacktrace_ops backtrace_ops = {
.warning = backtrace_warning,
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
.address = backtrace_address,
};
static int
trace_kernel(struct pt_regs *regs, struct trace_array *tr,
struct trace_array_cpu *data)
{
struct backtrace_info info;
unsigned long bp;
char *stack;
info.tr = tr;
info.data = data;
info.pos = 1;
__trace_special(info.tr, info.data, 1, regs->ip, 0);
stack = ((char *)regs + sizeof(struct pt_regs));
#ifdef CONFIG_FRAME_POINTER
bp = regs->bp;
#else
bp = 0;
#endif
dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
return info.pos;
}
static void timer_notify(struct pt_regs *regs, int cpu)
{
struct trace_array_cpu *data;
struct stack_frame frame;
struct trace_array *tr;
const void __user *fp;
int is_user;
int i;
if (!regs)
return;
tr = sysprof_trace;
data = tr->data[cpu];
is_user = user_mode(regs);
if (!current || current->pid == 0)
return;
if (is_user && current->state != TASK_RUNNING)
return;
__trace_special(tr, data, 0, 0, current->pid);
if (!is_user)
i = trace_kernel(regs, tr, data);
else
i = 0;
/*
* Trace user stack if we are not a kernel thread
*/
if (current->mm && i < sample_max_depth) {
regs = (struct pt_regs *)current->thread.sp0 - 1;
fp = (void __user *)regs->bp;
__trace_special(tr, data, 2, regs->ip, 0);
while (i < sample_max_depth) {
frame.next_fp = 0;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
break;
if ((unsigned long)fp < regs->sp)
break;
__trace_special(tr, data, 2, frame.return_address,
(unsigned long)fp);
fp = frame.next_fp;
i++;
}
}
/*
* Special trace entry if we overflow the max depth:
*/
if (i == sample_max_depth)
__trace_special(tr, data, -1, -1, -1);
__trace_special(tr, data, 3, current->pid, i);
}
static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
{
/* trace here */
timer_notify(get_irq_regs(), smp_processor_id());
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
return HRTIMER_RESTART;
}
static void start_stack_timer(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
}
static void start_stack_timers(void)
{
cpumask_t saved_mask = current->cpus_allowed;
int cpu;
for_each_online_cpu(cpu) {
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
start_stack_timer(cpu);
}
set_cpus_allowed_ptr(current, &saved_mask);
}
static void stop_stack_timer(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
hrtimer_cancel(hrtimer);
}
static void stop_stack_timers(void)
{
int cpu;
for_each_online_cpu(cpu)
stop_stack_timer(cpu);
}
static void stack_reset(struct trace_array *tr)
{
int cpu;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
tracing_reset(tr->data[cpu]);
}
static void start_stack_trace(struct trace_array *tr)
{
mutex_lock(&sample_timer_lock);
stack_reset(tr);
start_stack_timers();
tracer_enabled = 1;
mutex_unlock(&sample_timer_lock);
}
static void stop_stack_trace(struct trace_array *tr)
{
mutex_lock(&sample_timer_lock);
stop_stack_timers();
tracer_enabled = 0;
mutex_unlock(&sample_timer_lock);
}
static void stack_trace_init(struct trace_array *tr)
{
sysprof_trace = tr;
if (tr->ctrl)
start_stack_trace(tr);
}
static void stack_trace_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_stack_trace(tr);
}
static void stack_trace_ctrl_update(struct trace_array *tr)
{
/* When starting a new trace, reset the buffers */
if (tr->ctrl)
start_stack_trace(tr);
else
stop_stack_trace(tr);
}
static struct tracer stack_trace __read_mostly =
{
.name = "sysprof",
.init = stack_trace_init,
.reset = stack_trace_reset,
.ctrl_update = stack_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sysprof,
#endif
};
__init static int init_stack_trace(void)
{
return register_tracer(&stack_trace);
}
device_initcall(init_stack_trace);
#define MAX_LONG_DIGITS 22
static ssize_t
sysprof_sample_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[MAX_LONG_DIGITS];
int r;
r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static ssize_t
sysprof_sample_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[MAX_LONG_DIGITS];
unsigned long val;
if (cnt > MAX_LONG_DIGITS-1)
cnt = MAX_LONG_DIGITS-1;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
val = simple_strtoul(buf, NULL, 10);
/*
* Enforce a minimum sample period of 100 usecs:
*/
if (val < 100)
val = 100;
mutex_lock(&sample_timer_lock);
stop_stack_timers();
sample_period = val * 1000;
start_stack_timers();
mutex_unlock(&sample_timer_lock);
return cnt;
}
static struct file_operations sysprof_sample_fops = {
.read = sysprof_sample_read,
.write = sysprof_sample_write,
};
void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
{
struct dentry *entry;
entry = debugfs_create_file("sysprof_sample_period", 0644,
d_tracer, NULL, &sysprof_sample_fops);
if (entry)
return;
pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
}