perf record: Add support for sampling taken branch
This patch adds a new option to enable taken branch stack sampling, i.e., leverage the PERF_SAMPLE_BRANCH_STACK feature of perf_events. There is a new option to active this mode: -b. It is possible to pass a set of filters to select the type of branches to sample. The following filters are available: - any : any type of branches - any_call : any function call or system call - any_ret : any function return or system call return - any_ind : any indirect branch - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the branch target is in the hypervisor Filters can be combined by passing a comma separated list to the option: $ perf record -b any_call,u -e cycles:u branchy Signed-off-by: Roberto Agostino Vitillo <ravitillo@lbl.gov> Signed-off-by: Stephane Eranian <eranian@google.com> Cc: peterz@infradead.org Cc: acme@redhat.com Cc: robert.richter@amd.com Cc: ming.m.lin@intel.com Cc: andi@firstfloor.org Cc: asharma@fb.com Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1328826068-11713-13-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
committed by
Ingo Molnar
parent
b5387528f3
commit
bdfebd848f
@@ -152,6 +152,31 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
|
|||||||
corresponding events, i.e., they always refer to events defined earlier on the command
|
corresponding events, i.e., they always refer to events defined earlier on the command
|
||||||
line.
|
line.
|
||||||
|
|
||||||
|
-b::
|
||||||
|
--branch-stack::
|
||||||
|
Enable taken branch stack sampling. Each sample captures a series of consecutive
|
||||||
|
taken branches. The number of branches captured with each sample depends on the
|
||||||
|
underlying hardware, the type of branches of interest, and the executed code.
|
||||||
|
It is possible to select the types of branches captured by enabling filters. The
|
||||||
|
following filters are defined:
|
||||||
|
|
||||||
|
- any : any type of branches
|
||||||
|
- any_call: any function call or system call
|
||||||
|
- any_ret: any function return or system call return
|
||||||
|
- any_ind: any indirect branch
|
||||||
|
- u: only when the branch target is at the user level
|
||||||
|
- k: only when the branch target is in the kernel
|
||||||
|
- hv: only when the target is at the hypervisor level
|
||||||
|
|
||||||
|
+
|
||||||
|
At least one of any, any_call, any_ret, any_ind must be provided. The privilege levels may
|
||||||
|
be ommitted, in which case, the privilege levels of the associated event are applied to the
|
||||||
|
branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to
|
||||||
|
permissions. When sampling on multiple events, branch stack sampling is enabled for all
|
||||||
|
the sampling events. The sampled branch type is the same for all events.
|
||||||
|
Note that taken branch sampling may not be available on all processors.
|
||||||
|
The various filters must be specified as a comma separated list: -b any_ret,u,k
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-stat[1], linkperf:perf-list[1]
|
linkperf:perf-stat[1], linkperf:perf-list[1]
|
||||||
|
@@ -638,6 +638,77 @@ out_delete_session:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BRANCH_OPT(n, m) \
|
||||||
|
{ .name = n, .mode = (m) }
|
||||||
|
|
||||||
|
#define BRANCH_END { .name = NULL }
|
||||||
|
|
||||||
|
struct branch_mode {
|
||||||
|
const char *name;
|
||||||
|
int mode;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct branch_mode branch_modes[] = {
|
||||||
|
BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
|
||||||
|
BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
|
||||||
|
BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
|
||||||
|
BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
|
||||||
|
BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
|
||||||
|
BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
|
||||||
|
BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
|
||||||
|
BRANCH_END
|
||||||
|
};
|
||||||
|
|
||||||
|
static int
|
||||||
|
parse_branch_stack(const struct option *opt, const char *str, int unset __used)
|
||||||
|
{
|
||||||
|
#define ONLY_PLM \
|
||||||
|
(PERF_SAMPLE_BRANCH_USER |\
|
||||||
|
PERF_SAMPLE_BRANCH_KERNEL |\
|
||||||
|
PERF_SAMPLE_BRANCH_HV)
|
||||||
|
|
||||||
|
uint64_t *mode = (uint64_t *)opt->value;
|
||||||
|
const struct branch_mode *br;
|
||||||
|
char *s, *os, *p;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
*mode = 0;
|
||||||
|
|
||||||
|
/* because str is read-only */
|
||||||
|
s = os = strdup(str);
|
||||||
|
if (!s)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
p = strchr(s, ',');
|
||||||
|
if (p)
|
||||||
|
*p = '\0';
|
||||||
|
|
||||||
|
for (br = branch_modes; br->name; br++) {
|
||||||
|
if (!strcasecmp(s, br->name))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!br->name)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
*mode |= br->mode;
|
||||||
|
|
||||||
|
if (!p)
|
||||||
|
break;
|
||||||
|
|
||||||
|
s = p + 1;
|
||||||
|
}
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
if ((*mode & ~ONLY_PLM) == 0) {
|
||||||
|
error("need at least one branch type with -b\n");
|
||||||
|
ret = -1;
|
||||||
|
}
|
||||||
|
error:
|
||||||
|
free(os);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static const char * const record_usage[] = {
|
static const char * const record_usage[] = {
|
||||||
"perf record [<options>] [<command>]",
|
"perf record [<options>] [<command>]",
|
||||||
"perf record [<options>] -- <command> [<options>]",
|
"perf record [<options>] -- <command> [<options>]",
|
||||||
@@ -727,6 +798,9 @@ const struct option record_options[] = {
|
|||||||
"monitor event in cgroup name only",
|
"monitor event in cgroup name only",
|
||||||
parse_cgroups),
|
parse_cgroups),
|
||||||
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
|
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
|
||||||
|
OPT_CALLBACK('b', "branch-stack", &record.opts.branch_stack,
|
||||||
|
"branch mode mask", "branch stack sampling modes",
|
||||||
|
parse_branch_stack),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -222,6 +222,7 @@ struct perf_record_opts {
|
|||||||
unsigned int freq;
|
unsigned int freq;
|
||||||
unsigned int mmap_pages;
|
unsigned int mmap_pages;
|
||||||
unsigned int user_freq;
|
unsigned int user_freq;
|
||||||
|
int branch_stack;
|
||||||
u64 default_interval;
|
u64 default_interval;
|
||||||
u64 user_interval;
|
u64 user_interval;
|
||||||
const char *cpu_list;
|
const char *cpu_list;
|
||||||
|
@@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
|
|||||||
attr->watermark = 0;
|
attr->watermark = 0;
|
||||||
attr->wakeup_events = 1;
|
attr->wakeup_events = 1;
|
||||||
}
|
}
|
||||||
|
if (opts->branch_stack) {
|
||||||
|
attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
||||||
|
attr->branch_sample_type = opts->branch_stack;
|
||||||
|
}
|
||||||
|
|
||||||
attr->mmap = track;
|
attr->mmap = track;
|
||||||
attr->comm = track;
|
attr->comm = track;
|
||||||
|
Reference in New Issue
Block a user