perf_events: Update Intel extra regs shared constraints management
This patch improves the code managing the extra shared registers used for offcore_response events on Intel Nehalem/Westmere. The idea is to use static allocation instead of dynamic allocation. This simplifies greatly the get and put constraint routines for those events. The patch also renames per_core to shared_regs because the same data structure gets used whether or not HT is on. When HT is off, those events still need to coordination because they use a extra MSR that has to be shared within an event group. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
committed by
Ingo Molnar
parent
a7ac67ea02
commit
efc9f05df2
@@ -44,6 +44,29 @@ do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* | NHM/WSM | SNB |
|
||||
* register -------------------------------
|
||||
* | HT | no HT | HT | no HT |
|
||||
*-----------------------------------------
|
||||
* offcore | core | core | cpu | core |
|
||||
* lbr_sel | core | core | cpu | core |
|
||||
* ld_lat | cpu | core | cpu | core |
|
||||
*-----------------------------------------
|
||||
*
|
||||
* Given that there is a small number of shared regs,
|
||||
* we can pre-allocate their slot in the per-cpu
|
||||
* per-core reg tables.
|
||||
*/
|
||||
enum extra_reg_type {
|
||||
EXTRA_REG_NONE = -1, /* not used */
|
||||
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
|
||||
/*
|
||||
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
|
||||
*/
|
||||
@@ -132,11 +155,10 @@ struct cpu_hw_events {
|
||||
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
||||
|
||||
/*
|
||||
* Intel percore register state.
|
||||
* Coordinate shared resources between HT threads.
|
||||
* manage shared (per-core, per-cpu) registers
|
||||
* used on Intel NHM/WSM/SNB
|
||||
*/
|
||||
int percore_used; /* Used by this CPU? */
|
||||
struct intel_percore *per_core;
|
||||
struct intel_shared_regs *shared_regs;
|
||||
|
||||
/*
|
||||
* AMD specific bits
|
||||
@@ -186,27 +208,46 @@ struct cpu_hw_events {
|
||||
#define for_each_event_constraint(e, c) \
|
||||
for ((e) = (c); (e)->weight; (e)++)
|
||||
|
||||
/*
|
||||
* Per register state.
|
||||
*/
|
||||
struct er_account {
|
||||
raw_spinlock_t lock; /* per-core: protect structure */
|
||||
u64 config; /* extra MSR config */
|
||||
u64 reg; /* extra MSR number */
|
||||
atomic_t ref; /* reference count */
|
||||
};
|
||||
|
||||
/*
|
||||
* Extra registers for specific events.
|
||||
*
|
||||
* Some events need large masks and require external MSRs.
|
||||
* Define a mapping to these extra registers.
|
||||
* Those extra MSRs end up being shared for all events on
|
||||
* a PMU and sometimes between PMU of sibling HT threads.
|
||||
* In either case, the kernel needs to handle conflicting
|
||||
* accesses to those extra, shared, regs. The data structure
|
||||
* to manage those registers is stored in cpu_hw_event.
|
||||
*/
|
||||
struct extra_reg {
|
||||
unsigned int event;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 valid_mask;
|
||||
int idx; /* per_xxx->regs[] reg index */
|
||||
};
|
||||
|
||||
#define EVENT_EXTRA_REG(e, ms, m, vm) { \
|
||||
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
|
||||
.event = (e), \
|
||||
.msr = (ms), \
|
||||
.config_mask = (m), \
|
||||
.valid_mask = (vm), \
|
||||
.idx = EXTRA_REG_##i \
|
||||
}
|
||||
#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
|
||||
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
|
||||
|
||||
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
|
||||
|
||||
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
|
||||
|
||||
union perf_capabilities {
|
||||
struct {
|
||||
@@ -253,7 +294,6 @@ struct x86_pmu {
|
||||
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event);
|
||||
struct event_constraint *event_constraints;
|
||||
struct event_constraint *percore_constraints;
|
||||
void (*quirks)(void);
|
||||
int perfctr_second_write;
|
||||
|
||||
@@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
|
||||
*/
|
||||
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event_extra *reg;
|
||||
struct extra_reg *er;
|
||||
|
||||
event->hw.extra_reg = 0;
|
||||
event->hw.extra_config = 0;
|
||||
reg = &event->hw.extra_reg;
|
||||
|
||||
if (!x86_pmu.extra_regs)
|
||||
return 0;
|
||||
@@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
|
||||
continue;
|
||||
if (event->attr.config1 & ~er->valid_mask)
|
||||
return -EINVAL;
|
||||
event->hw.extra_reg = er->msr;
|
||||
event->hw.extra_config = event->attr.config1;
|
||||
|
||||
reg->idx = er->idx;
|
||||
reg->config = event->attr.config1;
|
||||
reg->reg = er->msr;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
@@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
|
||||
event->hw.last_cpu = -1;
|
||||
event->hw.last_tag = ~0ULL;
|
||||
|
||||
/* mark unused */
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
|
||||
return x86_pmu.hw_config(event);
|
||||
}
|
||||
|
||||
@@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu)
|
||||
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
|
||||
u64 enable_mask)
|
||||
{
|
||||
if (hwc->extra_reg)
|
||||
wrmsrl(hwc->extra_reg, hwc->extra_config);
|
||||
if (hwc->extra_reg.reg)
|
||||
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
|
||||
wrmsrl(hwc->config_base, hwc->config | enable_mask);
|
||||
}
|
||||
|
||||
@@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event)
|
||||
fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
|
||||
if (!fake_cpuc)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* the event is not yet connected with its
|
||||
* siblings therefore we must first collect
|
||||
|
Reference in New Issue
Block a user