Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (106 commits) perf kvm: Fix copy & paste error in description perf script: Kill script_spec__delete perf top: Fix a memory leak perf stat: Introduce get_ratio_color() helper perf session: Remove impossible condition check perf tools: Fix feature-bits rework fallout, remove unused variable perf script: Add generic perl handler to process events perf tools: Use for_each_set_bit() to iterate over feature flags perf tools: Unify handling of features when writing feature section perf report: Accept fifos as input file perf tools: Moving code in some files perf tools: Fix out-of-bound access to struct perf_session perf tools: Continue processing header on unknown features perf tools: Improve macros for struct feature_ops perf: builtin-record: Document and check that mmap_pages must be a power of two. perf: builtin-record: Provide advice if mmap'ing fails with EPERM. perf tools: Fix truncated annotation perf script: look up thread using tid instead of pid perf tools: Look up thread names for system wide profiling perf tools: Fix comm for processes with named threads ...
This commit is contained in:
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
|
||||
return event->pmu == &pmu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Event scheduler state:
|
||||
*
|
||||
* Assign events iterating over all events and counters, beginning
|
||||
* with events with least weights first. Keep the current iterator
|
||||
* state in struct sched_state.
|
||||
*/
|
||||
struct sched_state {
|
||||
int weight;
|
||||
int event; /* event index */
|
||||
int counter; /* counter index */
|
||||
int unassigned; /* number of events to be assigned left */
|
||||
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
};
|
||||
|
||||
/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
|
||||
#define SCHED_STATES_MAX 2
|
||||
|
||||
struct perf_sched {
|
||||
int max_weight;
|
||||
int max_events;
|
||||
struct event_constraint **constraints;
|
||||
struct sched_state state;
|
||||
int saved_states;
|
||||
struct sched_state saved[SCHED_STATES_MAX];
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize interator that runs through all events and counters.
|
||||
*/
|
||||
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
|
||||
int num, int wmin, int wmax)
|
||||
{
|
||||
int idx;
|
||||
|
||||
memset(sched, 0, sizeof(*sched));
|
||||
sched->max_events = num;
|
||||
sched->max_weight = wmax;
|
||||
sched->constraints = c;
|
||||
|
||||
for (idx = 0; idx < num; idx++) {
|
||||
if (c[idx]->weight == wmin)
|
||||
break;
|
||||
}
|
||||
|
||||
sched->state.event = idx; /* start with min weight */
|
||||
sched->state.weight = wmin;
|
||||
sched->state.unassigned = num;
|
||||
}
|
||||
|
||||
static void perf_sched_save_state(struct perf_sched *sched)
|
||||
{
|
||||
if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
|
||||
return;
|
||||
|
||||
sched->saved[sched->saved_states] = sched->state;
|
||||
sched->saved_states++;
|
||||
}
|
||||
|
||||
static bool perf_sched_restore_state(struct perf_sched *sched)
|
||||
{
|
||||
if (!sched->saved_states)
|
||||
return false;
|
||||
|
||||
sched->saved_states--;
|
||||
sched->state = sched->saved[sched->saved_states];
|
||||
|
||||
/* continue with next counter: */
|
||||
clear_bit(sched->state.counter++, sched->state.used);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Select a counter for the current event to schedule. Return true on
|
||||
* success.
|
||||
*/
|
||||
static bool __perf_sched_find_counter(struct perf_sched *sched)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
int idx;
|
||||
|
||||
if (!sched->state.unassigned)
|
||||
return false;
|
||||
|
||||
if (sched->state.event >= sched->max_events)
|
||||
return false;
|
||||
|
||||
c = sched->constraints[sched->state.event];
|
||||
|
||||
/* Prefer fixed purpose counters */
|
||||
if (x86_pmu.num_counters_fixed) {
|
||||
idx = X86_PMC_IDX_FIXED;
|
||||
for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
|
||||
if (!__test_and_set_bit(idx, sched->state.used))
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
/* Grab the first unused counter starting with idx */
|
||||
idx = sched->state.counter;
|
||||
for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
|
||||
if (!__test_and_set_bit(idx, sched->state.used))
|
||||
goto done;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
done:
|
||||
sched->state.counter = idx;
|
||||
|
||||
if (c->overlap)
|
||||
perf_sched_save_state(sched);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool perf_sched_find_counter(struct perf_sched *sched)
|
||||
{
|
||||
while (!__perf_sched_find_counter(sched)) {
|
||||
if (!perf_sched_restore_state(sched))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Go through all unassigned events and find the next one to schedule.
|
||||
* Take events with the least weight first. Return true on success.
|
||||
*/
|
||||
static bool perf_sched_next_event(struct perf_sched *sched)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
if (!sched->state.unassigned || !--sched->state.unassigned)
|
||||
return false;
|
||||
|
||||
do {
|
||||
/* next event */
|
||||
sched->state.event++;
|
||||
if (sched->state.event >= sched->max_events) {
|
||||
/* next weight */
|
||||
sched->state.event = 0;
|
||||
sched->state.weight++;
|
||||
if (sched->state.weight > sched->max_weight)
|
||||
return false;
|
||||
}
|
||||
c = sched->constraints[sched->state.event];
|
||||
} while (c->weight != sched->state.weight);
|
||||
|
||||
sched->state.counter = 0; /* start with first counter */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assign a counter for each event.
|
||||
*/
|
||||
static int perf_assign_events(struct event_constraint **constraints, int n,
|
||||
int wmin, int wmax, int *assign)
|
||||
{
|
||||
struct perf_sched sched;
|
||||
|
||||
perf_sched_init(&sched, constraints, n, wmin, wmax);
|
||||
|
||||
do {
|
||||
if (!perf_sched_find_counter(&sched))
|
||||
break; /* failed */
|
||||
if (assign)
|
||||
assign[sched.state.event] = sched.state.counter;
|
||||
} while (perf_sched_next_event(&sched));
|
||||
|
||||
return sched.state.unassigned;
|
||||
}
|
||||
|
||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
{
|
||||
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
|
||||
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
int i, j, w, wmax, num = 0;
|
||||
int i, wmin, wmax, num = 0;
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
||||
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
|
||||
constraints[i] = c;
|
||||
wmin = min(wmin, c->weight);
|
||||
wmax = max(wmax, c->weight);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -521,59 +698,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
if (assign)
|
||||
assign[i] = hwc->idx;
|
||||
}
|
||||
if (i == n)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* begin slow path
|
||||
*/
|
||||
/* slow path */
|
||||
if (i != n)
|
||||
num = perf_assign_events(constraints, n, wmin, wmax, assign);
|
||||
|
||||
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
||||
|
||||
/*
|
||||
* weight = number of possible counters
|
||||
*
|
||||
* 1 = most constrained, only works on one counter
|
||||
* wmax = least constrained, works on any counter
|
||||
*
|
||||
* assign events to counters starting with most
|
||||
* constrained events.
|
||||
*/
|
||||
wmax = x86_pmu.num_counters;
|
||||
|
||||
/*
|
||||
* when fixed event counters are present,
|
||||
* wmax is incremented by 1 to account
|
||||
* for one more choice
|
||||
*/
|
||||
if (x86_pmu.num_counters_fixed)
|
||||
wmax++;
|
||||
|
||||
for (w = 1, num = n; num && w <= wmax; w++) {
|
||||
/* for each event */
|
||||
for (i = 0; num && i < n; i++) {
|
||||
c = constraints[i];
|
||||
hwc = &cpuc->event_list[i]->hw;
|
||||
|
||||
if (c->weight != w)
|
||||
continue;
|
||||
|
||||
for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
|
||||
if (!test_bit(j, used_mask))
|
||||
break;
|
||||
}
|
||||
|
||||
if (j == X86_PMC_IDX_MAX)
|
||||
break;
|
||||
|
||||
__set_bit(j, used_mask);
|
||||
|
||||
if (assign)
|
||||
assign[i] = j;
|
||||
num--;
|
||||
}
|
||||
}
|
||||
done:
|
||||
/*
|
||||
* scheduling failed or is just a simulation,
|
||||
* free resources if necessary
|
||||
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
|
||||
|
||||
static int __init init_hw_perf_events(void)
|
||||
{
|
||||
struct x86_pmu_quirk *quirk;
|
||||
struct event_constraint *c;
|
||||
int err;
|
||||
|
||||
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
|
||||
|
||||
pr_cont("%s PMU driver.\n", x86_pmu.name);
|
||||
|
||||
if (x86_pmu.quirks)
|
||||
x86_pmu.quirks();
|
||||
for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
|
||||
quirk->func();
|
||||
|
||||
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
|
||||
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
|
||||
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
|
||||
|
||||
unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
|
||||
0, x86_pmu.num_counters);
|
||||
0, x86_pmu.num_counters, 0);
|
||||
|
||||
if (x86_pmu.event_constraints) {
|
||||
/*
|
||||
* event on fixed counter2 (REF_CYCLES) only works on this
|
||||
* counter, so do not extend mask to generic counters
|
||||
*/
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
if (c->cmask != X86_RAW_EVENT_MASK)
|
||||
if (c->cmask != X86_RAW_EVENT_MASK
|
||||
|| c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
|
||||
continue;
|
||||
}
|
||||
|
||||
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
||||
c->weight += x86_pmu.num_counters;
|
||||
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
|
||||
return misc;
|
||||
}
|
||||
|
||||
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
{
|
||||
cap->version = x86_pmu.version;
|
||||
cap->num_counters_gp = x86_pmu.num_counters;
|
||||
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
cap->bit_width_gp = x86_pmu.cntval_bits;
|
||||
cap->bit_width_fixed = x86_pmu.cntval_bits;
|
||||
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
|
||||
cap->events_mask_len = x86_pmu.events_mask_len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
|
||||
|
@@ -45,6 +45,7 @@ struct event_constraint {
|
||||
u64 code;
|
||||
u64 cmask;
|
||||
int weight;
|
||||
int overlap;
|
||||
};
|
||||
|
||||
struct amd_nb {
|
||||
@@ -151,15 +152,40 @@ struct cpu_hw_events {
|
||||
void *kfree_on_online;
|
||||
};
|
||||
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w) {\
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
|
||||
{ .idxmsk64 = (n) }, \
|
||||
.code = (c), \
|
||||
.cmask = (m), \
|
||||
.weight = (w), \
|
||||
.overlap = (o), \
|
||||
}
|
||||
|
||||
#define EVENT_CONSTRAINT(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
|
||||
|
||||
/*
|
||||
* The overlap flag marks event constraints with overlapping counter
|
||||
* masks. This is the case if the counter mask of such an event is not
|
||||
* a subset of any other counter mask of a constraint with an equal or
|
||||
* higher weight, e.g.:
|
||||
*
|
||||
* c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
|
||||
* c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
|
||||
* c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
*
|
||||
* The event scheduler may not select the correct counter in the first
|
||||
* cycle because it needs to know which subsequent events will be
|
||||
* scheduled. It may fail to schedule the events then. So we set the
|
||||
* overlap flag for such constraints to give the scheduler a hint which
|
||||
* events to select for counter rescheduling.
|
||||
*
|
||||
* Care must be taken as the rescheduling algorithm is O(n!) which
|
||||
* will increase scheduling cycles for an over-commited system
|
||||
* dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
|
||||
* and its counter masks must be kept at a minimum.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code.
|
||||
@@ -235,6 +261,11 @@ union perf_capabilities {
|
||||
u64 capabilities;
|
||||
};
|
||||
|
||||
struct x86_pmu_quirk {
|
||||
struct x86_pmu_quirk *next;
|
||||
void (*func)(void);
|
||||
};
|
||||
|
||||
/*
|
||||
* struct x86_pmu - generic x86 pmu
|
||||
*/
|
||||
@@ -259,6 +290,11 @@ struct x86_pmu {
|
||||
int num_counters_fixed;
|
||||
int cntval_bits;
|
||||
u64 cntval_mask;
|
||||
union {
|
||||
unsigned long events_maskl;
|
||||
unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
|
||||
};
|
||||
int events_mask_len;
|
||||
int apic;
|
||||
u64 max_period;
|
||||
struct event_constraint *
|
||||
@@ -268,7 +304,7 @@ struct x86_pmu {
|
||||
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event);
|
||||
struct event_constraint *event_constraints;
|
||||
void (*quirks)(void);
|
||||
struct x86_pmu_quirk *quirks;
|
||||
int perfctr_second_write;
|
||||
|
||||
int (*cpu_prepare)(int cpu);
|
||||
@@ -309,6 +345,15 @@ struct x86_pmu {
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
|
||||
};
|
||||
|
||||
#define x86_add_quirk(func_) \
|
||||
do { \
|
||||
static struct x86_pmu_quirk __quirk __initdata = { \
|
||||
.func = func_, \
|
||||
}; \
|
||||
__quirk.next = x86_pmu.quirks; \
|
||||
x86_pmu.quirks = &__quirk; \
|
||||
} while (0)
|
||||
|
||||
#define ERF_NO_HT_SHARING 1
|
||||
#define ERF_HAS_RSP_1 2
|
||||
|
||||
|
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
|
||||
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
|
||||
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
|
||||
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
|
||||
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
|
||||
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
|
||||
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
|
||||
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
|
||||
|
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
|
||||
[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
|
||||
};
|
||||
|
||||
static struct event_constraint intel_core_event_constraints[] __read_mostly =
|
||||
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
|
||||
{
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
/*
|
||||
* Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
|
||||
* 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
|
||||
* ratio between these counters.
|
||||
*/
|
||||
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
||||
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
|
||||
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
||||
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
|
||||
{
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
|
||||
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
|
||||
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
|
||||
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
|
||||
{
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
|
||||
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
|
||||
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
|
||||
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
|
||||
{
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
||||
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
|
||||
{
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.guest_get_msrs = intel_guest_get_msrs,
|
||||
};
|
||||
|
||||
static void intel_clovertown_quirks(void)
|
||||
static __init void intel_clovertown_quirk(void)
|
||||
{
|
||||
/*
|
||||
* PEBS is unreliable due to:
|
||||
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
|
||||
x86_pmu.pebs_constraints = NULL;
|
||||
}
|
||||
|
||||
static void intel_sandybridge_quirks(void)
|
||||
static __init void intel_sandybridge_quirk(void)
|
||||
{
|
||||
printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
|
||||
x86_pmu.pebs = 0;
|
||||
x86_pmu.pebs_constraints = NULL;
|
||||
}
|
||||
|
||||
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
|
||||
{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
|
||||
{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
|
||||
{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
|
||||
{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
|
||||
{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
|
||||
{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
|
||||
{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
|
||||
};
|
||||
|
||||
static __init void intel_arch_events_quirk(void)
|
||||
{
|
||||
int bit;
|
||||
|
||||
/* disable event that reported as not presend by cpuid */
|
||||
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
|
||||
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
|
||||
printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
|
||||
intel_arch_events_map[bit].name);
|
||||
}
|
||||
}
|
||||
|
||||
static __init void intel_nehalem_quirk(void)
|
||||
{
|
||||
union cpuid10_ebx ebx;
|
||||
|
||||
ebx.full = x86_pmu.events_maskl;
|
||||
if (ebx.split.no_branch_misses_retired) {
|
||||
/*
|
||||
* Erratum AAJ80 detected, we work it around by using
|
||||
* the BR_MISP_EXEC.ANY event. This will over-count
|
||||
* branch-misses, but it's still much better than the
|
||||
* architectural event which is often completely bogus:
|
||||
*/
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
|
||||
ebx.split.no_branch_misses_retired = 0;
|
||||
x86_pmu.events_maskl = ebx.full;
|
||||
printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
|
||||
}
|
||||
}
|
||||
|
||||
__init int intel_pmu_init(void)
|
||||
{
|
||||
union cpuid10_edx edx;
|
||||
union cpuid10_eax eax;
|
||||
union cpuid10_ebx ebx;
|
||||
unsigned int unused;
|
||||
unsigned int ebx;
|
||||
int version;
|
||||
|
||||
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
||||
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
|
||||
* Check whether the Architectural PerfMon supports
|
||||
* Branch Misses Retired hw_event or not.
|
||||
*/
|
||||
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
|
||||
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
|
||||
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
|
||||
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
|
||||
return -ENODEV;
|
||||
|
||||
version = eax.split.version_id;
|
||||
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.cntval_bits = eax.split.bit_width;
|
||||
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
|
||||
|
||||
x86_pmu.events_maskl = ebx.full;
|
||||
x86_pmu.events_mask_len = eax.split.mask_length;
|
||||
|
||||
/*
|
||||
* Quirk: v2 perfmon does not report fixed-purpose events, so
|
||||
* assume at least 3 events:
|
||||
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
|
||||
|
||||
intel_ds_init();
|
||||
|
||||
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
|
||||
|
||||
/*
|
||||
* Install the hw-cache-events table:
|
||||
*/
|
||||
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
|
||||
x86_pmu.quirks = intel_clovertown_quirks;
|
||||
x86_add_quirk(intel_clovertown_quirk);
|
||||
case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
|
||||
case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
|
||||
case 29: /* six-core 45 nm xeon "Dunnington" */
|
||||
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
|
||||
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
|
||||
|
||||
if (ebx & 0x40) {
|
||||
/*
|
||||
* Erratum AAJ80 detected, we work it around by using
|
||||
* the BR_MISP_EXEC.ANY event. This will over-count
|
||||
* branch-misses, but it's still much better than the
|
||||
* architectural event which is often completely bogus:
|
||||
*/
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
|
||||
x86_add_quirk(intel_nehalem_quirk);
|
||||
|
||||
pr_cont("erratum AAJ80 worked around, ");
|
||||
}
|
||||
pr_cont("Nehalem events, ");
|
||||
break;
|
||||
|
||||
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case 42: /* SandyBridge */
|
||||
x86_pmu.quirks = intel_sandybridge_quirks;
|
||||
x86_add_quirk(intel_sandybridge_quirk);
|
||||
case 45: /* SandyBridge, "Romely-EP" */
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
void arch_jump_label_transform_static(struct jump_entry *entry,
|
||||
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
|
||||
enum jump_label_type type)
|
||||
{
|
||||
__jump_label_transform(entry, type, text_poke_early);
|
||||
|
Reference in New Issue
Block a user