perf/x86: Implement PERF_SAMPLE_BRANCH for Intel CPUs
This patch implements PERF_SAMPLE_BRANCH support for Intel x86processors. It connects PERF_SAMPLE_BRANCH to the actual LBR. The patch adds the hooks in the PMU irq handler to save the LBR on counter overflow for both regular and PEBS modes. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-8-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
committed by
Ingo Molnar
parent
88c9a65e13
commit
60ce0fbd07
@@ -541,6 +541,8 @@ void intel_pmu_lbr_init_atom(void);
|
|||||||
|
|
||||||
void intel_pmu_lbr_init_snb(void);
|
void intel_pmu_lbr_init_snb(void);
|
||||||
|
|
||||||
|
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||||
|
|
||||||
int p4_pmu_init(void);
|
int p4_pmu_init(void);
|
||||||
|
|
||||||
int p6_pmu_init(void);
|
int p6_pmu_init(void);
|
||||||
|
@@ -727,6 +727,19 @@ static __initconst const u64 atom_hw_cache_event_ids
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
|
||||||
|
{
|
||||||
|
/* user explicitly requested branch sampling */
|
||||||
|
if (has_branch_stack(event))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* implicit branch sampling to correct PEBS skid */
|
||||||
|
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void intel_pmu_disable_all(void)
|
static void intel_pmu_disable_all(void)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||||
@@ -881,6 +894,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
|||||||
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
|
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
|
||||||
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
|
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* must disable before any actual event
|
||||||
|
* because any event may be combined with LBR
|
||||||
|
*/
|
||||||
|
if (intel_pmu_needs_lbr_smpl(event))
|
||||||
|
intel_pmu_lbr_disable(event);
|
||||||
|
|
||||||
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
||||||
intel_pmu_disable_fixed(hwc);
|
intel_pmu_disable_fixed(hwc);
|
||||||
return;
|
return;
|
||||||
@@ -935,6 +955,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
|||||||
intel_pmu_enable_bts(hwc->config);
|
intel_pmu_enable_bts(hwc->config);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* must enabled before any actual event
|
||||||
|
* because any event may be combined with LBR
|
||||||
|
*/
|
||||||
|
if (intel_pmu_needs_lbr_smpl(event))
|
||||||
|
intel_pmu_lbr_enable(event);
|
||||||
|
|
||||||
if (event->attr.exclude_host)
|
if (event->attr.exclude_host)
|
||||||
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
|
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
|
||||||
@@ -1057,6 +1083,9 @@ again:
|
|||||||
|
|
||||||
data.period = event->hw.last_period;
|
data.period = event->hw.last_period;
|
||||||
|
|
||||||
|
if (has_branch_stack(event))
|
||||||
|
data.br_stack = &cpuc->lbr_stack;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
if (perf_event_overflow(event, &data, regs))
|
||||||
x86_pmu_stop(event, 0);
|
x86_pmu_stop(event, 0);
|
||||||
}
|
}
|
||||||
@@ -1305,6 +1334,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||||||
event->hw.config = alt_config;
|
event->hw.config = alt_config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (intel_pmu_needs_lbr_smpl(event)) {
|
||||||
|
ret = intel_pmu_setup_lbr_filter(event);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (event->attr.type != PERF_TYPE_RAW)
|
if (event->attr.type != PERF_TYPE_RAW)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@@ -439,9 +439,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|||||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||||
|
|
||||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||||
|
|
||||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
|
||||||
intel_pmu_lbr_enable(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||||
@@ -454,9 +451,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
|||||||
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||||
|
|
||||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||||
|
|
||||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
|
||||||
intel_pmu_lbr_disable(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_pmu_pebs_enable_all(void)
|
void intel_pmu_pebs_enable_all(void)
|
||||||
@@ -572,6 +566,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||||||
* both formats and we don't use the other fields in this
|
* both formats and we don't use the other fields in this
|
||||||
* routine.
|
* routine.
|
||||||
*/
|
*/
|
||||||
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||||
struct pebs_record_core *pebs = __pebs;
|
struct pebs_record_core *pebs = __pebs;
|
||||||
struct perf_sample_data data;
|
struct perf_sample_data data;
|
||||||
struct pt_regs regs;
|
struct pt_regs regs;
|
||||||
@@ -602,6 +597,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||||||
else
|
else
|
||||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||||
|
|
||||||
|
if (has_branch_stack(event))
|
||||||
|
data.br_stack = &cpuc->lbr_stack;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, ®s))
|
if (perf_event_overflow(event, &data, ®s))
|
||||||
x86_pmu_stop(event, 0);
|
x86_pmu_stop(event, 0);
|
||||||
}
|
}
|
||||||
|
@@ -56,6 +56,10 @@ enum {
|
|||||||
|
|
||||||
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
||||||
|
|
||||||
|
#define for_each_branch_sample_type(x) \
|
||||||
|
for ((x) = PERF_SAMPLE_BRANCH_USER; \
|
||||||
|
(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
|
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
|
||||||
* otherwise it becomes near impossible to get a reliable stack.
|
* otherwise it becomes near impossible to get a reliable stack.
|
||||||
@@ -64,6 +68,10 @@ enum {
|
|||||||
static void __intel_pmu_lbr_enable(void)
|
static void __intel_pmu_lbr_enable(void)
|
||||||
{
|
{
|
||||||
u64 debugctl;
|
u64 debugctl;
|
||||||
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||||
|
|
||||||
|
if (cpuc->lbr_sel)
|
||||||
|
wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
|
||||||
|
|
||||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||||
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||||
@@ -119,7 +127,6 @@ void intel_pmu_lbr_enable(struct perf_event *event)
|
|||||||
* Reset the LBR stack if we changed task context to
|
* Reset the LBR stack if we changed task context to
|
||||||
* avoid data leaks.
|
* avoid data leaks.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
|
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
|
||||||
intel_pmu_lbr_reset();
|
intel_pmu_lbr_reset();
|
||||||
cpuc->lbr_context = event->ctx;
|
cpuc->lbr_context = event->ctx;
|
||||||
@@ -138,8 +145,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
|
|||||||
cpuc->lbr_users--;
|
cpuc->lbr_users--;
|
||||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||||
|
|
||||||
if (cpuc->enabled && !cpuc->lbr_users)
|
if (cpuc->enabled && !cpuc->lbr_users) {
|
||||||
__intel_pmu_lbr_disable();
|
__intel_pmu_lbr_disable();
|
||||||
|
/* avoid stale pointer */
|
||||||
|
cpuc->lbr_context = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_pmu_lbr_enable_all(void)
|
void intel_pmu_lbr_enable_all(void)
|
||||||
@@ -158,6 +168,9 @@ void intel_pmu_lbr_disable_all(void)
|
|||||||
__intel_pmu_lbr_disable();
|
__intel_pmu_lbr_disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TOS = most recently recorded branch
|
||||||
|
*/
|
||||||
static inline u64 intel_pmu_lbr_tos(void)
|
static inline u64 intel_pmu_lbr_tos(void)
|
||||||
{
|
{
|
||||||
u64 tos;
|
u64 tos;
|
||||||
@@ -241,6 +254,75 @@ void intel_pmu_lbr_read(void)
|
|||||||
intel_pmu_lbr_read_64(cpuc);
|
intel_pmu_lbr_read_64(cpuc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* setup the HW LBR filter
|
||||||
|
* Used only when available, may not be enough to disambiguate
|
||||||
|
* all branches, may need the help of the SW filter
|
||||||
|
*/
|
||||||
|
static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct hw_perf_event_extra *reg;
|
||||||
|
u64 br_type = event->attr.branch_sample_type;
|
||||||
|
u64 mask = 0, m;
|
||||||
|
u64 v;
|
||||||
|
|
||||||
|
for_each_branch_sample_type(m) {
|
||||||
|
if (!(br_type & m))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
v = x86_pmu.lbr_sel_map[m];
|
||||||
|
if (v == LBR_NOT_SUPP)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
mask |= v;
|
||||||
|
|
||||||
|
if (m == PERF_SAMPLE_BRANCH_ANY)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
reg = &event->hw.branch_reg;
|
||||||
|
reg->idx = EXTRA_REG_LBR;
|
||||||
|
|
||||||
|
/* LBR_SELECT operates in suppress mode so invert mask */
|
||||||
|
reg->config = ~mask & x86_pmu.lbr_sel_mask;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* all the bits supported on some flavor of x86LBR
|
||||||
|
* we ignore BRANCH_HV because it is not supported
|
||||||
|
*/
|
||||||
|
#define PERF_SAMPLE_BRANCH_X86_ALL \
|
||||||
|
(PERF_SAMPLE_BRANCH_ANY |\
|
||||||
|
PERF_SAMPLE_BRANCH_USER |\
|
||||||
|
PERF_SAMPLE_BRANCH_KERNEL)
|
||||||
|
|
||||||
|
int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
||||||
|
{
|
||||||
|
u64 br_type = event->attr.branch_sample_type;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* no LBR on this PMU
|
||||||
|
*/
|
||||||
|
if (!x86_pmu.lbr_nr)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if no LBR HW filter, users can only
|
||||||
|
* capture all branches
|
||||||
|
*/
|
||||||
|
if (!x86_pmu.lbr_sel_map) {
|
||||||
|
if (br_type != PERF_SAMPLE_BRANCH_X86_ALL)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* we ignore branch priv levels we do not
|
||||||
|
* know about: BRANCH_HV
|
||||||
|
*/
|
||||||
|
|
||||||
|
return intel_pmu_setup_hw_lbr_filter(event);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Map interface branch filters onto LBR filters
|
* Map interface branch filters onto LBR filters
|
||||||
*/
|
*/
|
||||||
|
Reference in New Issue
Block a user