1f0d69a9fc
Impact: new unlikely/likely profiler Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2167 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2670 0 0 IS_ERR err.h 34 71230 5693 7 __switch_to process_64.c 673 76919 0 0 __switch_to process_64.c 639 43184 33743 43 __switch_to process_64.c 624 12740 64181 83 __switch_to process_64.c 594 12740 64174 83 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 44963 35259 43 __switch_to process_64.c 624 12762 67454 84 __switch_to process_64.c 594 12762 67447 84 __switch_to process_64.c 590 1478 595 28 syscall_get_error syscall.h 51 0 2821 100 syscall_trace_leave ptrace.c 1567 0 1 100 native_smp_prepare_cpus smpboot.c 1237 86338 265881 75 calc_delta_fair sched_fair.c 408 210410 108540 34 calc_delta_mine sched.c 1267 0 54550 100 sched_info_queued sched_stats.h 222 51899 66435 56 pick_next_task_fair sched_fair.c 1422 6 10 62 yield_task_fair sched_fair.c 982 7325 2692 26 rt_policy sched.c 144 0 1270 100 pre_schedule_rt sched_rt.c 1261 1268 48073 97 pick_next_task_rt sched_rt.c 884 0 45181 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8700 0 15 100 sched_move_task sched.c 8690 53167 33217 38 schedule sched.c 4457 0 80208 100 sched_info_switch sched_stats.h 270 30585 49631 61 context_switch sched.c 2619 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 39900 36577 47 pick_next_task sched.c 4397 20824 15233 42 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 617 66484 99 clocksource_adjust timekeeping.c 456 0 346340 100 audit_syscall_exit auditsc.c 1570 38 347350 99 audit_get_context auditsc.c 732 0 345244 100 audit_syscall_entry auditsc.c 1541 38 1017 96 audit_free auditsc.c 1446 0 1090 100 audit_alloc auditsc.c 862 2618 1090 29 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 9 1 198 99 probe_sched_wakeup trace_sched_switch.c 58 2 2 50 probe_wakeup trace_sched_wakeup.c 227 0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144 4514 2090 31 __grab_cache_page filemap.c 2149 12882 228786 94 mapping_unevictable pagemap.h 50 4 11 73 __flush_cpu_slab slub.c 1466 627757 330451 34 slab_free slub.c 1731 2959 61245 95 dentry_lru_del_init dcache.c 153 946 1217 56 load_elf_binary binfmt_elf.c 904 102 82 44 disk_put_part genhd.h 206 1 1 50 dst_gc_task dst.c 82 0 19 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 71 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Theodore Tso <tytso@mit.edu> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
165 lines
3.4 KiB
C
165 lines
3.4 KiB
C
/*
|
|
* unlikely profiler
|
|
*
|
|
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
|
|
*/
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/module.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/hash.h>
|
|
#include <linux/fs.h>
|
|
#include <asm/local.h>
|
|
#include "trace.h"
|
|
|
|
void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect)
|
|
{
|
|
/* FIXME: Make this atomic! */
|
|
if (val == expect)
|
|
f->correct++;
|
|
else
|
|
f->incorrect++;
|
|
}
|
|
EXPORT_SYMBOL(ftrace_likely_update);
|
|
|
|
struct ftrace_pointer {
|
|
void *start;
|
|
void *stop;
|
|
};
|
|
|
|
static void *
|
|
t_next(struct seq_file *m, void *v, loff_t *pos)
|
|
{
|
|
struct ftrace_pointer *f = m->private;
|
|
struct ftrace_likely_data *p = v;
|
|
|
|
(*pos)++;
|
|
|
|
if (v == (void *)1)
|
|
return f->start;
|
|
|
|
++p;
|
|
|
|
if ((void *)p >= (void *)f->stop)
|
|
return NULL;
|
|
|
|
return p;
|
|
}
|
|
|
|
static void *t_start(struct seq_file *m, loff_t *pos)
|
|
{
|
|
void *t = (void *)1;
|
|
loff_t l = 0;
|
|
|
|
for (; t && l < *pos; t = t_next(m, t, &l))
|
|
;
|
|
|
|
return t;
|
|
}
|
|
|
|
static void t_stop(struct seq_file *m, void *p)
|
|
{
|
|
}
|
|
|
|
static int t_show(struct seq_file *m, void *v)
|
|
{
|
|
struct ftrace_likely_data *p = v;
|
|
const char *f;
|
|
unsigned long percent;
|
|
|
|
if (v == (void *)1) {
|
|
seq_printf(m, " correct incorrect %% "
|
|
" Function "
|
|
" File Line\n"
|
|
" ------- --------- - "
|
|
" -------- "
|
|
" ---- ----\n");
|
|
return 0;
|
|
}
|
|
|
|
/* Only print the file, not the path */
|
|
f = p->file + strlen(p->file);
|
|
while (f >= p->file && *f != '/')
|
|
f--;
|
|
f++;
|
|
|
|
if (p->correct) {
|
|
percent = p->incorrect * 100;
|
|
percent /= p->correct + p->incorrect;
|
|
} else
|
|
percent = p->incorrect ? 100 : 0;
|
|
|
|
seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent);
|
|
seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
|
|
return 0;
|
|
}
|
|
|
|
static struct seq_operations tracing_likely_seq_ops = {
|
|
.start = t_start,
|
|
.next = t_next,
|
|
.stop = t_stop,
|
|
.show = t_show,
|
|
};
|
|
|
|
static int tracing_likely_open(struct inode *inode, struct file *file)
|
|
{
|
|
int ret;
|
|
|
|
ret = seq_open(file, &tracing_likely_seq_ops);
|
|
if (!ret) {
|
|
struct seq_file *m = file->private_data;
|
|
m->private = (void *)inode->i_private;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct file_operations tracing_likely_fops = {
|
|
.open = tracing_likely_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
};
|
|
|
|
extern unsigned long __start_likely_profile[];
|
|
extern unsigned long __stop_likely_profile[];
|
|
extern unsigned long __start_unlikely_profile[];
|
|
extern unsigned long __stop_unlikely_profile[];
|
|
|
|
static struct ftrace_pointer ftrace_likely_pos = {
|
|
.start = __start_likely_profile,
|
|
.stop = __stop_likely_profile,
|
|
};
|
|
|
|
static struct ftrace_pointer ftrace_unlikely_pos = {
|
|
.start = __start_unlikely_profile,
|
|
.stop = __stop_unlikely_profile,
|
|
};
|
|
|
|
static __init int ftrace_unlikely_init(void)
|
|
{
|
|
struct dentry *d_tracer;
|
|
struct dentry *entry;
|
|
|
|
d_tracer = tracing_init_dentry();
|
|
|
|
entry = debugfs_create_file("profile_likely", 0444, d_tracer,
|
|
&ftrace_likely_pos,
|
|
&tracing_likely_fops);
|
|
if (!entry)
|
|
pr_warning("Could not create debugfs 'profile_likely' entry\n");
|
|
|
|
entry = debugfs_create_file("profile_unlikely", 0444, d_tracer,
|
|
&ftrace_unlikely_pos,
|
|
&tracing_likely_fops);
|
|
if (!entry)
|
|
pr_warning("Could not create debugfs"
|
|
" 'profile_unlikely' entry\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
device_initcall(ftrace_unlikely_init);
|