Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: tracing: Fix sign fields in ftrace_define_fields_##call() tracing/syscalls: Fix typo in SYSCALL_DEFINE0 tracing/kprobe: Show sign of fields in trace_kprobe format files ksym_tracer: Remove trace_stat ksym_tracer: Fix race when incrementing count ksym_tracer: Fix to allow writing newline to ksym_trace_filter ksym_tracer: Fix to make the tracer work tracing: Kconfig spelling fixes and cleanups tracing: Fix setting tracer specific options Documentation: Update ftrace-design.txt Documentation: Update tracepoint-analysis.txt Documentation: Update mmiotrace.txt
This commit is contained in:
@@ -53,14 +53,14 @@ size of the mcount call that is embedded in the function).
|
|||||||
For example, if the function foo() calls bar(), when the bar() function calls
|
For example, if the function foo() calls bar(), when the bar() function calls
|
||||||
mcount(), the arguments mcount() will pass to the tracer are:
|
mcount(), the arguments mcount() will pass to the tracer are:
|
||||||
"frompc" - the address bar() will use to return to foo()
|
"frompc" - the address bar() will use to return to foo()
|
||||||
"selfpc" - the address bar() (with _mcount() size adjustment)
|
"selfpc" - the address bar() (with mcount() size adjustment)
|
||||||
|
|
||||||
Also keep in mind that this mcount function will be called *a lot*, so
|
Also keep in mind that this mcount function will be called *a lot*, so
|
||||||
optimizing for the default case of no tracer will help the smooth running of
|
optimizing for the default case of no tracer will help the smooth running of
|
||||||
your system when tracing is disabled. So the start of the mcount function is
|
your system when tracing is disabled. So the start of the mcount function is
|
||||||
typically the bare min with checking things before returning. That also means
|
typically the bare minimum with checking things before returning. That also
|
||||||
the code flow should usually kept linear (i.e. no branching in the nop case).
|
means the code flow should usually be kept linear (i.e. no branching in the nop
|
||||||
This is of course an optimization and not a hard requirement.
|
case). This is of course an optimization and not a hard requirement.
|
||||||
|
|
||||||
Here is some pseudo code that should help (these functions should actually be
|
Here is some pseudo code that should help (these functions should actually be
|
||||||
implemented in assembly):
|
implemented in assembly):
|
||||||
@@ -131,10 +131,10 @@ some functions to save (hijack) and restore the return address.
|
|||||||
|
|
||||||
The mcount function should check the function pointers ftrace_graph_return
|
The mcount function should check the function pointers ftrace_graph_return
|
||||||
(compare to ftrace_stub) and ftrace_graph_entry (compare to
|
(compare to ftrace_stub) and ftrace_graph_entry (compare to
|
||||||
ftrace_graph_entry_stub). If either of those are not set to the relevant stub
|
ftrace_graph_entry_stub). If either of those is not set to the relevant stub
|
||||||
function, call the arch-specific function ftrace_graph_caller which in turn
|
function, call the arch-specific function ftrace_graph_caller which in turn
|
||||||
calls the arch-specific function prepare_ftrace_return. Neither of these
|
calls the arch-specific function prepare_ftrace_return. Neither of these
|
||||||
function names are strictly required, but you should use them anyways to stay
|
function names is strictly required, but you should use them anyway to stay
|
||||||
consistent across the architecture ports -- easier to compare & contrast
|
consistent across the architecture ports -- easier to compare & contrast
|
||||||
things.
|
things.
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ but the first argument should be a pointer to the "frompc". Typically this is
|
|||||||
located on the stack. This allows the function to hijack the return address
|
located on the stack. This allows the function to hijack the return address
|
||||||
temporarily to have it point to the arch-specific function return_to_handler.
|
temporarily to have it point to the arch-specific function return_to_handler.
|
||||||
That function will simply call the common ftrace_return_to_handler function and
|
That function will simply call the common ftrace_return_to_handler function and
|
||||||
that will return the original return address with which, you can return to the
|
that will return the original return address with which you can return to the
|
||||||
original call site.
|
original call site.
|
||||||
|
|
||||||
Here is the updated mcount pseudo code:
|
Here is the updated mcount pseudo code:
|
||||||
|
@@ -44,7 +44,8 @@ Check for lost events.
|
|||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
|
|
||||||
Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges)
|
Make sure debugfs is mounted to /sys/kernel/debug.
|
||||||
|
If not (requires root privileges):
|
||||||
$ mount -t debugfs debugfs /sys/kernel/debug
|
$ mount -t debugfs debugfs /sys/kernel/debug
|
||||||
|
|
||||||
Check that the driver you are about to trace is not loaded.
|
Check that the driver you are about to trace is not loaded.
|
||||||
@@ -91,7 +92,7 @@ $ dmesg > dmesg.txt
|
|||||||
$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
|
$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
|
||||||
and then send the .tar.gz file. The trace compresses considerably. Replace
|
and then send the .tar.gz file. The trace compresses considerably. Replace
|
||||||
"pciid" and "nick" with the PCI ID or model name of your piece of hardware
|
"pciid" and "nick" with the PCI ID or model name of your piece of hardware
|
||||||
under investigation and your nick name.
|
under investigation and your nickname.
|
||||||
|
|
||||||
|
|
||||||
How Mmiotrace Works
|
How Mmiotrace Works
|
||||||
@@ -100,7 +101,7 @@ How Mmiotrace Works
|
|||||||
Access to hardware IO-memory is gained by mapping addresses from PCI bus by
|
Access to hardware IO-memory is gained by mapping addresses from PCI bus by
|
||||||
calling one of the ioremap_*() functions. Mmiotrace is hooked into the
|
calling one of the ioremap_*() functions. Mmiotrace is hooked into the
|
||||||
__ioremap() function and gets called whenever a mapping is created. Mapping is
|
__ioremap() function and gets called whenever a mapping is created. Mapping is
|
||||||
an event that is recorded into the trace log. Note, that ISA range mappings
|
an event that is recorded into the trace log. Note that ISA range mappings
|
||||||
are not caught, since the mapping always exists and is returned directly.
|
are not caught, since the mapping always exists and is returned directly.
|
||||||
|
|
||||||
MMIO accesses are recorded via page faults. Just before __ioremap() returns,
|
MMIO accesses are recorded via page faults. Just before __ioremap() returns,
|
||||||
@@ -122,11 +123,11 @@ Trace Log Format
|
|||||||
----------------
|
----------------
|
||||||
|
|
||||||
The raw log is text and easily filtered with e.g. grep and awk. One record is
|
The raw log is text and easily filtered with e.g. grep and awk. One record is
|
||||||
one line in the log. A record starts with a keyword, followed by keyword
|
one line in the log. A record starts with a keyword, followed by keyword-
|
||||||
dependant arguments. Arguments are separated by a space, or continue until the
|
dependent arguments. Arguments are separated by a space, or continue until the
|
||||||
end of line. The format for version 20070824 is as follows:
|
end of line. The format for version 20070824 is as follows:
|
||||||
|
|
||||||
Explanation Keyword Space separated arguments
|
Explanation Keyword Space-separated arguments
|
||||||
---------------------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
read event R width, timestamp, map id, physical, value, PC, PID
|
read event R width, timestamp, map id, physical, value, PC, PID
|
||||||
@@ -136,7 +137,7 @@ iounmap event UNMAP timestamp, map id, PC, PID
|
|||||||
marker MARK timestamp, text
|
marker MARK timestamp, text
|
||||||
version VERSION the string "20070824"
|
version VERSION the string "20070824"
|
||||||
info for reader LSPCI one line from lspci -v
|
info for reader LSPCI one line from lspci -v
|
||||||
PCI address map PCIDEV space separated /proc/bus/pci/devices data
|
PCI address map PCIDEV space-separated /proc/bus/pci/devices data
|
||||||
unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID
|
unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID
|
||||||
|
|
||||||
Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
|
Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
|
||||||
|
@@ -10,8 +10,8 @@ Tracepoints (see Documentation/trace/tracepoints.txt) can be used without
|
|||||||
creating custom kernel modules to register probe functions using the event
|
creating custom kernel modules to register probe functions using the event
|
||||||
tracing infrastructure.
|
tracing infrastructure.
|
||||||
|
|
||||||
Simplistically, tracepoints will represent an important event that when can
|
Simplistically, tracepoints represent important events that can be
|
||||||
be taken in conjunction with other tracepoints to build a "Big Picture" of
|
taken in conjunction with other tracepoints to build a "Big Picture" of
|
||||||
what is going on within the system. There are a large number of methods for
|
what is going on within the system. There are a large number of methods for
|
||||||
gathering and interpreting these events. Lacking any current Best Practises,
|
gathering and interpreting these events. Lacking any current Best Practises,
|
||||||
this document describes some of the methods that can be used.
|
this document describes some of the methods that can be used.
|
||||||
@@ -33,12 +33,12 @@ calling
|
|||||||
|
|
||||||
will give a fair indication of the number of events available.
|
will give a fair indication of the number of events available.
|
||||||
|
|
||||||
2.2 PCL
|
2.2 PCL (Performance Counters for Linux)
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Discovery and enumeration of all counters and events, including tracepoints
|
Discovery and enumeration of all counters and events, including tracepoints,
|
||||||
are available with the perf tool. Getting a list of available events is a
|
are available with the perf tool. Getting a list of available events is a
|
||||||
simple case of
|
simple case of:
|
||||||
|
|
||||||
$ perf list 2>&1 | grep Tracepoint
|
$ perf list 2>&1 | grep Tracepoint
|
||||||
ext4:ext4_free_inode [Tracepoint event]
|
ext4:ext4_free_inode [Tracepoint event]
|
||||||
@@ -49,19 +49,19 @@ simple case of
|
|||||||
[ .... remaining output snipped .... ]
|
[ .... remaining output snipped .... ]
|
||||||
|
|
||||||
|
|
||||||
2. Enabling Events
|
3. Enabling Events
|
||||||
==================
|
==================
|
||||||
|
|
||||||
2.1 System-Wide Event Enabling
|
3.1 System-Wide Event Enabling
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
See Documentation/trace/events.txt for a proper description on how events
|
See Documentation/trace/events.txt for a proper description on how events
|
||||||
can be enabled system-wide. A short example of enabling all events related
|
can be enabled system-wide. A short example of enabling all events related
|
||||||
to page allocation would look something like
|
to page allocation would look something like:
|
||||||
|
|
||||||
$ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
|
$ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
|
||||||
|
|
||||||
2.2 System-Wide Event Enabling with SystemTap
|
3.2 System-Wide Event Enabling with SystemTap
|
||||||
---------------------------------------------
|
---------------------------------------------
|
||||||
|
|
||||||
In SystemTap, tracepoints are accessible using the kernel.trace() function
|
In SystemTap, tracepoints are accessible using the kernel.trace() function
|
||||||
@@ -86,7 +86,7 @@ were allocating the pages.
|
|||||||
print_count()
|
print_count()
|
||||||
}
|
}
|
||||||
|
|
||||||
2.3 System-Wide Event Enabling with PCL
|
3.3 System-Wide Event Enabling with PCL
|
||||||
---------------------------------------
|
---------------------------------------
|
||||||
|
|
||||||
By specifying the -a switch and analysing sleep, the system-wide events
|
By specifying the -a switch and analysing sleep, the system-wide events
|
||||||
@@ -107,16 +107,16 @@ for a duration of time can be examined.
|
|||||||
Similarly, one could execute a shell and exit it as desired to get a report
|
Similarly, one could execute a shell and exit it as desired to get a report
|
||||||
at that point.
|
at that point.
|
||||||
|
|
||||||
2.4 Local Event Enabling
|
3.4 Local Event Enabling
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
Documentation/trace/ftrace.txt describes how to enable events on a per-thread
|
Documentation/trace/ftrace.txt describes how to enable events on a per-thread
|
||||||
basis using set_ftrace_pid.
|
basis using set_ftrace_pid.
|
||||||
|
|
||||||
2.5 Local Event Enablement with PCL
|
3.5 Local Event Enablement with PCL
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
|
||||||
Events can be activate and tracked for the duration of a process on a local
|
Events can be activated and tracked for the duration of a process on a local
|
||||||
basis using PCL such as follows.
|
basis using PCL such as follows.
|
||||||
|
|
||||||
$ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
$ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||||
@@ -131,18 +131,18 @@ basis using PCL such as follows.
|
|||||||
|
|
||||||
0.973913387 seconds time elapsed
|
0.973913387 seconds time elapsed
|
||||||
|
|
||||||
3. Event Filtering
|
4. Event Filtering
|
||||||
==================
|
==================
|
||||||
|
|
||||||
Documentation/trace/ftrace.txt covers in-depth how to filter events in
|
Documentation/trace/ftrace.txt covers in-depth how to filter events in
|
||||||
ftrace. Obviously using grep and awk of trace_pipe is an option as well
|
ftrace. Obviously using grep and awk of trace_pipe is an option as well
|
||||||
as any script reading trace_pipe.
|
as any script reading trace_pipe.
|
||||||
|
|
||||||
4. Analysing Event Variances with PCL
|
5. Analysing Event Variances with PCL
|
||||||
=====================================
|
=====================================
|
||||||
|
|
||||||
Any workload can exhibit variances between runs and it can be important
|
Any workload can exhibit variances between runs and it can be important
|
||||||
to know what the standard deviation in. By and large, this is left to the
|
to know what the standard deviation is. By and large, this is left to the
|
||||||
performance analyst to do it by hand. In the event that the discrete event
|
performance analyst to do it by hand. In the event that the discrete event
|
||||||
occurrences are useful to the performance analyst, then perf can be used.
|
occurrences are useful to the performance analyst, then perf can be used.
|
||||||
|
|
||||||
@@ -166,7 +166,7 @@ In the event that some higher-level event is required that depends on some
|
|||||||
aggregation of discrete events, then a script would need to be developed.
|
aggregation of discrete events, then a script would need to be developed.
|
||||||
|
|
||||||
Using --repeat, it is also possible to view how events are fluctuating over
|
Using --repeat, it is also possible to view how events are fluctuating over
|
||||||
time on a system wide basis using -a and sleep.
|
time on a system-wide basis using -a and sleep.
|
||||||
|
|
||||||
$ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
$ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||||
-e kmem:mm_pagevec_free \
|
-e kmem:mm_pagevec_free \
|
||||||
@@ -180,7 +180,7 @@ time on a system wide basis using -a and sleep.
|
|||||||
|
|
||||||
1.002251757 seconds time elapsed ( +- 0.005% )
|
1.002251757 seconds time elapsed ( +- 0.005% )
|
||||||
|
|
||||||
5. Higher-Level Analysis with Helper Scripts
|
6. Higher-Level Analysis with Helper Scripts
|
||||||
============================================
|
============================================
|
||||||
|
|
||||||
When events are enabled the events that are triggering can be read from
|
When events are enabled the events that are triggering can be read from
|
||||||
@@ -190,11 +190,11 @@ be gathered on-line as appropriate. Examples of post-processing might include
|
|||||||
|
|
||||||
o Reading information from /proc for the PID that triggered the event
|
o Reading information from /proc for the PID that triggered the event
|
||||||
o Deriving a higher-level event from a series of lower-level events.
|
o Deriving a higher-level event from a series of lower-level events.
|
||||||
o Calculate latencies between two events
|
o Calculating latencies between two events
|
||||||
|
|
||||||
Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
|
Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
|
||||||
script that can read trace_pipe from STDIN or a copy of a trace. When used
|
script that can read trace_pipe from STDIN or a copy of a trace. When used
|
||||||
on-line, it can be interrupted once to generate a report without existing
|
on-line, it can be interrupted once to generate a report without exiting
|
||||||
and twice to exit.
|
and twice to exit.
|
||||||
|
|
||||||
Simplistically, the script just reads STDIN and counts up events but it
|
Simplistically, the script just reads STDIN and counts up events but it
|
||||||
@@ -212,12 +212,12 @@ also can do more such as
|
|||||||
processes, the parent process responsible for creating all the helpers
|
processes, the parent process responsible for creating all the helpers
|
||||||
can be identified
|
can be identified
|
||||||
|
|
||||||
6. Lower-Level Analysis with PCL
|
7. Lower-Level Analysis with PCL
|
||||||
================================
|
================================
|
||||||
|
|
||||||
There may also be a requirement to identify what functions with a program
|
There may also be a requirement to identify what functions within a program
|
||||||
were generating events within the kernel. To begin this sort of analysis, the
|
were generating events within the kernel. To begin this sort of analysis, the
|
||||||
data must be recorded. At the time of writing, this required root
|
data must be recorded. At the time of writing, this required root:
|
||||||
|
|
||||||
$ perf record -c 1 \
|
$ perf record -c 1 \
|
||||||
-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||||
@@ -253,11 +253,11 @@ perf report.
|
|||||||
# (For more details, try: perf report --sort comm,dso,symbol)
|
# (For more details, try: perf report --sort comm,dso,symbol)
|
||||||
#
|
#
|
||||||
|
|
||||||
According to this, the vast majority of events occured triggered on events
|
According to this, the vast majority of events triggered on events
|
||||||
within the VDSO. With simple binaries, this will often be the case so lets
|
within the VDSO. With simple binaries, this will often be the case so let's
|
||||||
take a slightly different example. In the course of writing this, it was
|
take a slightly different example. In the course of writing this, it was
|
||||||
noticed that X was generating an insane amount of page allocations so lets look
|
noticed that X was generating an insane amount of page allocations so let's look
|
||||||
at it
|
at it:
|
||||||
|
|
||||||
$ perf record -c 1 -f \
|
$ perf record -c 1 -f \
|
||||||
-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||||
@@ -280,8 +280,8 @@ This was interrupted after a few seconds and
|
|||||||
# (For more details, try: perf report --sort comm,dso,symbol)
|
# (For more details, try: perf report --sort comm,dso,symbol)
|
||||||
#
|
#
|
||||||
|
|
||||||
So, almost half of the events are occuring in a library. To get an idea which
|
So, almost half of the events are occurring in a library. To get an idea which
|
||||||
symbol.
|
symbol:
|
||||||
|
|
||||||
$ perf report --sort comm,dso,symbol
|
$ perf report --sort comm,dso,symbol
|
||||||
# Samples: 27666
|
# Samples: 27666
|
||||||
@@ -297,7 +297,7 @@ symbol.
|
|||||||
0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path
|
0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path
|
||||||
0.00% Xorg [kernel] [k] ftrace_trace_userstack
|
0.00% Xorg [kernel] [k] ftrace_trace_userstack
|
||||||
|
|
||||||
To see where within the function pixmanFillsse2 things are going wrong
|
To see where within the function pixmanFillsse2 things are going wrong:
|
||||||
|
|
||||||
$ perf annotate pixmanFillsse2
|
$ perf annotate pixmanFillsse2
|
||||||
[ ... ]
|
[ ... ]
|
||||||
|
@@ -195,7 +195,7 @@ struct perf_event_attr;
|
|||||||
static const struct syscall_metadata __used \
|
static const struct syscall_metadata __used \
|
||||||
__attribute__((__aligned__(4))) \
|
__attribute__((__aligned__(4))) \
|
||||||
__attribute__((section("__syscalls_metadata"))) \
|
__attribute__((section("__syscalls_metadata"))) \
|
||||||
__syscall_meta_##sname = { \
|
__syscall_meta__##sname = { \
|
||||||
.name = "sys_"#sname, \
|
.name = "sys_"#sname, \
|
||||||
.nb_args = 0, \
|
.nb_args = 0, \
|
||||||
.enter_event = &event_enter__##sname, \
|
.enter_event = &event_enter__##sname, \
|
||||||
|
@@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
|
|||||||
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
|
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
|
||||||
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
|
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
|
||||||
offsetof(typeof(field), item), \
|
offsetof(typeof(field), item), \
|
||||||
sizeof(field.item), 0, FILTER_OTHER); \
|
sizeof(field.item), \
|
||||||
|
is_signed_type(type), FILTER_OTHER); \
|
||||||
if (ret) \
|
if (ret) \
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
|
|||||||
#define __dynamic_array(type, item, len) \
|
#define __dynamic_array(type, item, len) \
|
||||||
ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \
|
ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \
|
||||||
offsetof(typeof(field), __data_loc_##item), \
|
offsetof(typeof(field), __data_loc_##item), \
|
||||||
sizeof(field.__data_loc_##item), 0, \
|
sizeof(field.__data_loc_##item), \
|
||||||
FILTER_OTHER);
|
is_signed_type(type), FILTER_OTHER);
|
||||||
|
|
||||||
#undef __string
|
#undef __string
|
||||||
#define __string(item, src) __dynamic_array(char, item, -1)
|
#define __string(item, src) __dynamic_array(char, item, -1)
|
||||||
|
@@ -40,6 +40,7 @@
|
|||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
#include <linux/cpu.h>
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
|
|
||||||
#include <linux/hw_breakpoint.h>
|
#include <linux/hw_breakpoint.h>
|
||||||
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
|
|||||||
if (!cpu_events)
|
if (!cpu_events)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
get_online_cpus();
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||||
bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
|
bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
|
||||||
|
|
||||||
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
put_online_cpus();
|
||||||
|
|
||||||
return cpu_events;
|
return cpu_events;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_online_cpu(cpu) {
|
||||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||||
if (IS_ERR(*pevent))
|
if (IS_ERR(*pevent))
|
||||||
break;
|
break;
|
||||||
unregister_hw_breakpoint(*pevent);
|
unregister_hw_breakpoint(*pevent);
|
||||||
}
|
}
|
||||||
|
put_online_cpus();
|
||||||
|
|
||||||
free_percpu(cpu_events);
|
free_percpu(cpu_events);
|
||||||
/* return the error if any */
|
|
||||||
return ERR_PTR(err);
|
return ERR_PTR(err);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
|
EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
|
||||||
|
@@ -12,17 +12,17 @@ config NOP_TRACER
|
|||||||
config HAVE_FTRACE_NMI_ENTER
|
config HAVE_FTRACE_NMI_ENTER
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config HAVE_FUNCTION_TRACER
|
config HAVE_FUNCTION_TRACER
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config HAVE_FUNCTION_GRAPH_TRACER
|
config HAVE_FUNCTION_GRAPH_TRACER
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config HAVE_FUNCTION_GRAPH_FP_TEST
|
config HAVE_FUNCTION_GRAPH_FP_TEST
|
||||||
bool
|
bool
|
||||||
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
|
|||||||
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
|
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config HAVE_DYNAMIC_FTRACE
|
config HAVE_DYNAMIC_FTRACE
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config HAVE_FTRACE_MCOUNT_RECORD
|
config HAVE_FTRACE_MCOUNT_RECORD
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config HAVE_HW_BRANCH_TRACER
|
config HAVE_HW_BRANCH_TRACER
|
||||||
bool
|
bool
|
||||||
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER
|
|||||||
config HAVE_SYSCALL_TRACEPOINTS
|
config HAVE_SYSCALL_TRACEPOINTS
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
See Documentation/trace/ftrace-implementation.txt
|
See Documentation/trace/ftrace-design.txt
|
||||||
|
|
||||||
config TRACER_MAX_TRACE
|
config TRACER_MAX_TRACE
|
||||||
bool
|
bool
|
||||||
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
|
|||||||
# This allows those options to appear when no other tracer is selected. But the
|
# This allows those options to appear when no other tracer is selected. But the
|
||||||
# options do not appear when something else selects it. We need the two options
|
# options do not appear when something else selects it. We need the two options
|
||||||
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
|
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
|
||||||
# hidding of the automatic options.
|
# hiding of the automatic options.
|
||||||
|
|
||||||
config TRACING
|
config TRACING
|
||||||
bool
|
bool
|
||||||
@@ -119,7 +119,7 @@ menuconfig FTRACE
|
|||||||
bool "Tracers"
|
bool "Tracers"
|
||||||
default y if DEBUG_KERNEL
|
default y if DEBUG_KERNEL
|
||||||
help
|
help
|
||||||
Enable the kernel tracing infrastructure.
|
Enable the kernel tracing infrastructure.
|
||||||
|
|
||||||
if FTRACE
|
if FTRACE
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ config FUNCTION_TRACER
|
|||||||
help
|
help
|
||||||
Enable the kernel to trace every kernel function. This is done
|
Enable the kernel to trace every kernel function. This is done
|
||||||
by using a compiler feature to insert a small, 5-byte No-Operation
|
by using a compiler feature to insert a small, 5-byte No-Operation
|
||||||
instruction to the beginning of every kernel function, which NOP
|
instruction at the beginning of every kernel function, which NOP
|
||||||
sequence is then dynamically patched into a tracer call when
|
sequence is then dynamically patched into a tracer call when
|
||||||
tracing is enabled by the administrator. If it's runtime disabled
|
tracing is enabled by the administrator. If it's runtime disabled
|
||||||
(the bootup default), then the overhead of the instructions is very
|
(the bootup default), then the overhead of the instructions is very
|
||||||
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER
|
|||||||
and its entry.
|
and its entry.
|
||||||
Its first purpose is to trace the duration of functions and
|
Its first purpose is to trace the duration of functions and
|
||||||
draw a call graph for each thread with some information like
|
draw a call graph for each thread with some information like
|
||||||
the return value. This is done by setting the current return
|
the return value. This is done by setting the current return
|
||||||
address on the current task structure into a stack of calls.
|
address on the current task structure into a stack of calls.
|
||||||
|
|
||||||
|
|
||||||
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER
|
|||||||
|
|
||||||
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
||||||
|
|
||||||
(Note that kernel size and overhead increases with this option
|
(Note that kernel size and overhead increase with this option
|
||||||
enabled. This option and the preempt-off timing option can be
|
enabled. This option and the preempt-off timing option can be
|
||||||
used together or separately.)
|
used together or separately.)
|
||||||
|
|
||||||
@@ -186,7 +186,7 @@ config PREEMPT_TRACER
|
|||||||
select TRACER_MAX_TRACE
|
select TRACER_MAX_TRACE
|
||||||
select RING_BUFFER_ALLOW_SWAP
|
select RING_BUFFER_ALLOW_SWAP
|
||||||
help
|
help
|
||||||
This option measures the time spent in preemption off critical
|
This option measures the time spent in preemption-off critical
|
||||||
sections, with microsecond accuracy.
|
sections, with microsecond accuracy.
|
||||||
|
|
||||||
The default measurement method is a maximum search, which is
|
The default measurement method is a maximum search, which is
|
||||||
@@ -195,7 +195,7 @@ config PREEMPT_TRACER
|
|||||||
|
|
||||||
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
||||||
|
|
||||||
(Note that kernel size and overhead increases with this option
|
(Note that kernel size and overhead increase with this option
|
||||||
enabled. This option and the irqs-off timing option can be
|
enabled. This option and the irqs-off timing option can be
|
||||||
used together or separately.)
|
used together or separately.)
|
||||||
|
|
||||||
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS
|
|||||||
depends on !GENERIC_TRACER
|
depends on !GENERIC_TRACER
|
||||||
select TRACING
|
select TRACING
|
||||||
help
|
help
|
||||||
This tracer hooks to various trace points in the kernel
|
This tracer hooks to various trace points in the kernel,
|
||||||
allowing the user to pick and choose which trace point they
|
allowing the user to pick and choose which trace point they
|
||||||
want to trace. It also includes the sched_switch tracer plugin.
|
want to trace. It also includes the sched_switch tracer plugin.
|
||||||
|
|
||||||
@@ -265,19 +265,19 @@ choice
|
|||||||
The likely/unlikely profiler only looks at the conditions that
|
The likely/unlikely profiler only looks at the conditions that
|
||||||
are annotated with a likely or unlikely macro.
|
are annotated with a likely or unlikely macro.
|
||||||
|
|
||||||
The "all branch" profiler will profile every if statement in the
|
The "all branch" profiler will profile every if-statement in the
|
||||||
kernel. This profiler will also enable the likely/unlikely
|
kernel. This profiler will also enable the likely/unlikely
|
||||||
profiler as well.
|
profiler.
|
||||||
|
|
||||||
Either of the above profilers add a bit of overhead to the system.
|
Either of the above profilers adds a bit of overhead to the system.
|
||||||
If unsure choose "No branch profiling".
|
If unsure, choose "No branch profiling".
|
||||||
|
|
||||||
config BRANCH_PROFILE_NONE
|
config BRANCH_PROFILE_NONE
|
||||||
bool "No branch profiling"
|
bool "No branch profiling"
|
||||||
help
|
help
|
||||||
No branch profiling. Branch profiling adds a bit of overhead.
|
No branch profiling. Branch profiling adds a bit of overhead.
|
||||||
Only enable it if you want to analyse the branching behavior.
|
Only enable it if you want to analyse the branching behavior.
|
||||||
Otherwise keep it disabled.
|
Otherwise keep it disabled.
|
||||||
|
|
||||||
config PROFILE_ANNOTATED_BRANCHES
|
config PROFILE_ANNOTATED_BRANCHES
|
||||||
bool "Trace likely/unlikely profiler"
|
bool "Trace likely/unlikely profiler"
|
||||||
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES
|
|||||||
|
|
||||||
/sys/kernel/debug/tracing/profile_annotated_branch
|
/sys/kernel/debug/tracing/profile_annotated_branch
|
||||||
|
|
||||||
Note: this will add a significant overhead, only turn this
|
Note: this will add a significant overhead; only turn this
|
||||||
on if you need to profile the system's use of these macros.
|
on if you need to profile the system's use of these macros.
|
||||||
|
|
||||||
config PROFILE_ALL_BRANCHES
|
config PROFILE_ALL_BRANCHES
|
||||||
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES
|
|||||||
|
|
||||||
This configuration, when enabled, will impose a great overhead
|
This configuration, when enabled, will impose a great overhead
|
||||||
on the system. This should only be enabled when the system
|
on the system. This should only be enabled when the system
|
||||||
is to be analyzed
|
is to be analyzed in much detail.
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
config TRACING_BRANCHES
|
config TRACING_BRANCHES
|
||||||
@@ -335,7 +335,7 @@ config POWER_TRACER
|
|||||||
depends on X86
|
depends on X86
|
||||||
select GENERIC_TRACER
|
select GENERIC_TRACER
|
||||||
help
|
help
|
||||||
This tracer helps developers to analyze and optimize the kernels
|
This tracer helps developers to analyze and optimize the kernel's
|
||||||
power management decisions, specifically the C-state and P-state
|
power management decisions, specifically the C-state and P-state
|
||||||
behavior.
|
behavior.
|
||||||
|
|
||||||
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER
|
|||||||
select GENERIC_TRACER
|
select GENERIC_TRACER
|
||||||
help
|
help
|
||||||
This tracer records all branches on the system in a circular
|
This tracer records all branches on the system in a circular
|
||||||
buffer giving access to the last N branches for each cpu.
|
buffer, giving access to the last N branches for each cpu.
|
||||||
|
|
||||||
config KMEMTRACE
|
config KMEMTRACE
|
||||||
bool "Trace SLAB allocations"
|
bool "Trace SLAB allocations"
|
||||||
select GENERIC_TRACER
|
select GENERIC_TRACER
|
||||||
help
|
help
|
||||||
kmemtrace provides tracing for slab allocator functions, such as
|
kmemtrace provides tracing for slab allocator functions, such as
|
||||||
kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
|
kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
|
||||||
data is then fed to the userspace application in order to analyse
|
data is then fed to the userspace application in order to analyse
|
||||||
allocation hotspots, internal fragmentation and so on, making it
|
allocation hotspots, internal fragmentation and so on, making it
|
||||||
possible to see how well an allocator performs, as well as debug
|
possible to see how well an allocator performs, as well as debug
|
||||||
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER
|
|||||||
bool "Trace workqueues"
|
bool "Trace workqueues"
|
||||||
select GENERIC_TRACER
|
select GENERIC_TRACER
|
||||||
help
|
help
|
||||||
The workqueue tracer provides some statistical informations
|
The workqueue tracer provides some statistical information
|
||||||
about each cpu workqueue thread such as the number of the
|
about each cpu workqueue thread such as the number of the
|
||||||
works inserted and executed since their creation. It can help
|
works inserted and executed since their creation. It can help
|
||||||
to evaluate the amount of work each of them have to perform.
|
to evaluate the amount of work each of them has to perform.
|
||||||
For example it can help a developer to decide whether he should
|
For example it can help a developer to decide whether he should
|
||||||
choose a per cpu workqueue instead of a singlethreaded one.
|
choose a per-cpu workqueue instead of a singlethreaded one.
|
||||||
|
|
||||||
config BLK_DEV_IO_TRACE
|
config BLK_DEV_IO_TRACE
|
||||||
bool "Support for tracing block io actions"
|
bool "Support for tracing block IO actions"
|
||||||
depends on SYSFS
|
depends on SYSFS
|
||||||
depends on BLOCK
|
depends on BLOCK
|
||||||
select RELAY
|
select RELAY
|
||||||
@@ -456,15 +456,15 @@ config KPROBE_EVENT
|
|||||||
select TRACING
|
select TRACING
|
||||||
default y
|
default y
|
||||||
help
|
help
|
||||||
This allows the user to add tracing events (similar to tracepoints) on the fly
|
This allows the user to add tracing events (similar to tracepoints)
|
||||||
via the ftrace interface. See Documentation/trace/kprobetrace.txt
|
on the fly via the ftrace interface. See
|
||||||
for more details.
|
Documentation/trace/kprobetrace.txt for more details.
|
||||||
|
|
||||||
Those events can be inserted wherever kprobes can probe, and record
|
Those events can be inserted wherever kprobes can probe, and record
|
||||||
various register and memory values.
|
various register and memory values.
|
||||||
|
|
||||||
This option is also required by perf-probe subcommand of perf tools. If
|
This option is also required by perf-probe subcommand of perf tools.
|
||||||
you want to use perf tools, this option is strongly recommended.
|
If you want to use perf tools, this option is strongly recommended.
|
||||||
|
|
||||||
config DYNAMIC_FTRACE
|
config DYNAMIC_FTRACE
|
||||||
bool "enable/disable ftrace tracepoints dynamically"
|
bool "enable/disable ftrace tracepoints dynamically"
|
||||||
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE
|
|||||||
depends on HAVE_DYNAMIC_FTRACE
|
depends on HAVE_DYNAMIC_FTRACE
|
||||||
default y
|
default y
|
||||||
help
|
help
|
||||||
This option will modify all the calls to ftrace dynamically
|
This option will modify all the calls to ftrace dynamically
|
||||||
(will patch them out of the binary image and replaces them
|
(will patch them out of the binary image and replace them
|
||||||
with a No-Op instruction) as they are called. A table is
|
with a No-Op instruction) as they are called. A table is
|
||||||
created to dynamically enable them again.
|
created to dynamically enable them again.
|
||||||
|
|
||||||
This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
|
This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
|
||||||
has native performance as long as no tracing is active.
|
otherwise has native performance as long as no tracing is active.
|
||||||
|
|
||||||
The changes to the code are done by a kernel thread that
|
The changes to the code are done by a kernel thread that
|
||||||
wakes up once a second and checks to see if any ftrace calls
|
wakes up once a second and checks to see if any ftrace calls
|
||||||
were made. If so, it runs stop_machine (stops all CPUS)
|
were made. If so, it runs stop_machine (stops all CPUS)
|
||||||
and modifies the code to jump over the call to ftrace.
|
and modifies the code to jump over the call to ftrace.
|
||||||
|
|
||||||
config FUNCTION_PROFILER
|
config FUNCTION_PROFILER
|
||||||
bool "Kernel function profiler"
|
bool "Kernel function profiler"
|
||||||
depends on FUNCTION_TRACER
|
depends on FUNCTION_TRACER
|
||||||
default n
|
default n
|
||||||
help
|
help
|
||||||
This option enables the kernel function profiler. A file is created
|
This option enables the kernel function profiler. A file is created
|
||||||
in debugfs called function_profile_enabled which defaults to zero.
|
in debugfs called function_profile_enabled which defaults to zero.
|
||||||
When a 1 is echoed into this file profiling begins, and when a
|
When a 1 is echoed into this file profiling begins, and when a
|
||||||
zero is entered, profiling stops. A file in the trace_stats
|
zero is entered, profiling stops. A "functions" file is created in
|
||||||
directory called functions, that show the list of functions that
|
the trace_stats directory; this file shows the list of functions that
|
||||||
have been hit and their counters.
|
have been hit and their counters.
|
||||||
|
|
||||||
If in doubt, say N
|
If in doubt, say N.
|
||||||
|
|
||||||
config FTRACE_MCOUNT_RECORD
|
config FTRACE_MCOUNT_RECORD
|
||||||
def_bool y
|
def_bool y
|
||||||
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK
|
|||||||
tristate "Ring buffer benchmark stress tester"
|
tristate "Ring buffer benchmark stress tester"
|
||||||
depends on RING_BUFFER
|
depends on RING_BUFFER
|
||||||
help
|
help
|
||||||
This option creates a test to stress the ring buffer and bench mark it.
|
This option creates a test to stress the ring buffer and benchmark it.
|
||||||
It creates its own ring buffer such that it will not interfer with
|
It creates its own ring buffer such that it will not interfere with
|
||||||
any other users of the ring buffer (such as ftrace). It then creates
|
any other users of the ring buffer (such as ftrace). It then creates
|
||||||
a producer and consumer that will run for 10 seconds and sleep for
|
a producer and consumer that will run for 10 seconds and sleep for
|
||||||
10 seconds. Each interval it will print out the number of events
|
10 seconds. Each interval it will print out the number of events
|
||||||
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK
|
|||||||
It does not disable interrupts or raise its priority, so it may be
|
It does not disable interrupts or raise its priority, so it may be
|
||||||
affected by processes that are running.
|
affected by processes that are running.
|
||||||
|
|
||||||
If unsure, say N
|
If unsure, say N.
|
||||||
|
|
||||||
endif # FTRACE
|
endif # FTRACE
|
||||||
|
|
||||||
|
@@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
|
|||||||
if (!!(topt->flags->val & topt->opt->bit) != val) {
|
if (!!(topt->flags->val & topt->opt->bit) != val) {
|
||||||
mutex_lock(&trace_types_lock);
|
mutex_lock(&trace_types_lock);
|
||||||
ret = __set_tracer_option(current_trace, topt->flags,
|
ret = __set_tracer_option(current_trace, topt->flags,
|
||||||
topt->opt, val);
|
topt->opt, !val);
|
||||||
mutex_unlock(&trace_types_lock);
|
mutex_unlock(&trace_types_lock);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
|
|||||||
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
|
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
|
||||||
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
|
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
|
||||||
offsetof(typeof(field), item), \
|
offsetof(typeof(field), item), \
|
||||||
sizeof(field.item), 0, FILTER_OTHER); \
|
sizeof(field.item), \
|
||||||
|
is_signed_type(type), FILTER_OTHER); \
|
||||||
if (ret) \
|
if (ret) \
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
|
|||||||
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
|
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
|
||||||
offsetof(typeof(field), \
|
offsetof(typeof(field), \
|
||||||
container.item), \
|
container.item), \
|
||||||
sizeof(field.container.item), 0, \
|
sizeof(field.container.item), \
|
||||||
FILTER_OTHER); \
|
is_signed_type(type), FILTER_OTHER); \
|
||||||
if (ret) \
|
if (ret) \
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@@ -1201,10 +1201,11 @@ static int __probe_event_show_format(struct trace_seq *s,
|
|||||||
#undef SHOW_FIELD
|
#undef SHOW_FIELD
|
||||||
#define SHOW_FIELD(type, item, name) \
|
#define SHOW_FIELD(type, item, name) \
|
||||||
do { \
|
do { \
|
||||||
ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
|
ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
|
||||||
"offset:%u;\tsize:%u;\n", name, \
|
"offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
|
||||||
(unsigned int)offsetof(typeof(field), item),\
|
(unsigned int)offsetof(typeof(field), item),\
|
||||||
(unsigned int)sizeof(type)); \
|
(unsigned int)sizeof(type), \
|
||||||
|
is_signed_type(type)); \
|
||||||
if (!ret) \
|
if (!ret) \
|
||||||
return 0; \
|
return 0; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
@@ -26,12 +26,13 @@
|
|||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
|
||||||
#include "trace_output.h"
|
#include "trace_output.h"
|
||||||
#include "trace_stat.h"
|
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
#include <linux/hw_breakpoint.h>
|
#include <linux/hw_breakpoint.h>
|
||||||
#include <asm/hw_breakpoint.h>
|
#include <asm/hw_breakpoint.h>
|
||||||
|
|
||||||
|
#include <asm/atomic.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For now, let us restrict the no. of symbols traced simultaneously to number
|
* For now, let us restrict the no. of symbols traced simultaneously to number
|
||||||
* of available hardware breakpoint registers.
|
* of available hardware breakpoint registers.
|
||||||
@@ -44,7 +45,7 @@ struct trace_ksym {
|
|||||||
struct perf_event **ksym_hbp;
|
struct perf_event **ksym_hbp;
|
||||||
struct perf_event_attr attr;
|
struct perf_event_attr attr;
|
||||||
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||||
unsigned long counter;
|
atomic64_t counter;
|
||||||
#endif
|
#endif
|
||||||
struct hlist_node ksym_hlist;
|
struct hlist_node ksym_hlist;
|
||||||
};
|
};
|
||||||
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
|
|||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
|
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
|
||||||
if ((entry->attr.bp_addr == hbp_hit_addr) &&
|
if (entry->attr.bp_addr == hbp_hit_addr) {
|
||||||
(entry->counter <= MAX_UL_INT)) {
|
atomic64_inc(&entry->counter);
|
||||||
entry->counter++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
|
|||||||
entry->attr.bp_addr = addr;
|
entry->attr.bp_addr = addr;
|
||||||
entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
|
entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
|
||||||
|
|
||||||
ret = -EAGAIN;
|
|
||||||
entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
|
entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
|
||||||
ksym_hbp_handler);
|
ksym_hbp_handler);
|
||||||
|
|
||||||
@@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file,
|
|||||||
* 2: echo 0 > ksym_trace_filter
|
* 2: echo 0 > ksym_trace_filter
|
||||||
* 3: echo "*:---" > ksym_trace_filter
|
* 3: echo "*:---" > ksym_trace_filter
|
||||||
*/
|
*/
|
||||||
if (!buf[0] || !strcmp(buf, "0") ||
|
if (!input_string[0] || !strcmp(input_string, "0") ||
|
||||||
!strcmp(buf, "*:---")) {
|
!strcmp(input_string, "*:---")) {
|
||||||
__ksym_trace_reset();
|
__ksym_trace_reset();
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto out;
|
goto out;
|
||||||
@@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly =
|
|||||||
.print_line = ksym_trace_output
|
.print_line = ksym_trace_output
|
||||||
};
|
};
|
||||||
|
|
||||||
__init static int init_ksym_trace(void)
|
|
||||||
{
|
|
||||||
struct dentry *d_tracer;
|
|
||||||
struct dentry *entry;
|
|
||||||
|
|
||||||
d_tracer = tracing_init_dentry();
|
|
||||||
ksym_filter_entry_count = 0;
|
|
||||||
|
|
||||||
entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
|
|
||||||
NULL, &ksym_tracing_fops);
|
|
||||||
if (!entry)
|
|
||||||
pr_warning("Could not create debugfs "
|
|
||||||
"'ksym_trace_filter' file\n");
|
|
||||||
|
|
||||||
return register_tracer(&ksym_tracer);
|
|
||||||
}
|
|
||||||
device_initcall(init_ksym_trace);
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||||
static int ksym_tracer_stat_headers(struct seq_file *m)
|
static int ksym_profile_show(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
seq_puts(m, " Access Type ");
|
struct hlist_node *node;
|
||||||
seq_puts(m, " Symbol Counter\n");
|
|
||||||
seq_puts(m, " ----------- ");
|
|
||||||
seq_puts(m, " ------ -------\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int ksym_tracer_stat_show(struct seq_file *m, void *v)
|
|
||||||
{
|
|
||||||
struct hlist_node *stat = v;
|
|
||||||
struct trace_ksym *entry;
|
struct trace_ksym *entry;
|
||||||
int access_type = 0;
|
int access_type = 0;
|
||||||
char fn_name[KSYM_NAME_LEN];
|
char fn_name[KSYM_NAME_LEN];
|
||||||
|
|
||||||
entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
|
seq_puts(m, " Access Type ");
|
||||||
|
seq_puts(m, " Symbol Counter\n");
|
||||||
|
seq_puts(m, " ----------- ");
|
||||||
|
seq_puts(m, " ------ -------\n");
|
||||||
|
|
||||||
access_type = entry->attr.bp_type;
|
rcu_read_lock();
|
||||||
|
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
|
||||||
|
|
||||||
switch (access_type) {
|
access_type = entry->attr.bp_type;
|
||||||
case HW_BREAKPOINT_R:
|
|
||||||
seq_puts(m, " R ");
|
switch (access_type) {
|
||||||
break;
|
case HW_BREAKPOINT_R:
|
||||||
case HW_BREAKPOINT_W:
|
seq_puts(m, " R ");
|
||||||
seq_puts(m, " W ");
|
break;
|
||||||
break;
|
case HW_BREAKPOINT_W:
|
||||||
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
|
seq_puts(m, " W ");
|
||||||
seq_puts(m, " RW ");
|
break;
|
||||||
break;
|
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
|
||||||
default:
|
seq_puts(m, " RW ");
|
||||||
seq_puts(m, " NA ");
|
break;
|
||||||
|
default:
|
||||||
|
seq_puts(m, " NA ");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
|
||||||
|
seq_printf(m, " %-36s", fn_name);
|
||||||
|
else
|
||||||
|
seq_printf(m, " %-36s", "<NA>");
|
||||||
|
seq_printf(m, " %15llu\n",
|
||||||
|
(unsigned long long)atomic64_read(&entry->counter));
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
|
|
||||||
seq_printf(m, " %-36s", fn_name);
|
|
||||||
else
|
|
||||||
seq_printf(m, " %-36s", "<NA>");
|
|
||||||
seq_printf(m, " %15lu\n", entry->counter);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *ksym_tracer_stat_start(struct tracer_stat *trace)
|
static int ksym_profile_open(struct inode *node, struct file *file)
|
||||||
{
|
{
|
||||||
return ksym_filter_head.first;
|
return single_open(file, ksym_profile_show, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *
|
static const struct file_operations ksym_profile_fops = {
|
||||||
ksym_tracer_stat_next(void *v, int idx)
|
.open = ksym_profile_open,
|
||||||
{
|
.read = seq_read,
|
||||||
struct hlist_node *stat = v;
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
return stat->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct tracer_stat ksym_tracer_stats = {
|
|
||||||
.name = "ksym_tracer",
|
|
||||||
.stat_start = ksym_tracer_stat_start,
|
|
||||||
.stat_next = ksym_tracer_stat_next,
|
|
||||||
.stat_headers = ksym_tracer_stat_headers,
|
|
||||||
.stat_show = ksym_tracer_stat_show
|
|
||||||
};
|
};
|
||||||
|
|
||||||
__init static int ksym_tracer_stat_init(void)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = register_stat_tracer(&ksym_tracer_stats);
|
|
||||||
if (ret) {
|
|
||||||
printk(KERN_WARNING "Warning: could not register "
|
|
||||||
"ksym tracer stats\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
fs_initcall(ksym_tracer_stat_init);
|
|
||||||
#endif /* CONFIG_PROFILE_KSYM_TRACER */
|
#endif /* CONFIG_PROFILE_KSYM_TRACER */
|
||||||
|
|
||||||
|
__init static int init_ksym_trace(void)
|
||||||
|
{
|
||||||
|
struct dentry *d_tracer;
|
||||||
|
|
||||||
|
d_tracer = tracing_init_dentry();
|
||||||
|
|
||||||
|
trace_create_file("ksym_trace_filter", 0644, d_tracer,
|
||||||
|
NULL, &ksym_tracing_fops);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||||
|
trace_create_file("ksym_profile", 0444, d_tracer,
|
||||||
|
NULL, &ksym_profile_fops);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return register_tracer(&ksym_tracer);
|
||||||
|
}
|
||||||
|
device_initcall(init_ksym_trace);
|
||||||
|
Reference in New Issue
Block a user