Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits) perf_counter: Zero dead bytes from ftrace raw samples size alignment perf_counter: Subtract the buffer size field from the event record size perf_counter: Require CAP_SYS_ADMIN for raw tracepoint data perf_counter: Correct PERF_SAMPLE_RAW output perf tools: callchain: Fix bad rounding of minimum rate perf_counter tools: Fix libbfd detection for systems with libz dependency perf: "Longum est iter per praecepta, breve et efficax per exempla" perf_counter: Fix a race on perf_counter_ctx perf_counter: Fix tracepoint sampling to be part of generic sampling perf_counter: Work around gcc warning by initializing tracepoint record unconditionally perf tools: callchain: Fix sum of percentages to be 100% by displaying amount of ignored chains in fractal mode perf tools: callchain: Fix 'perf report' display to be callchain by default perf tools: callchain: Fix spurious 'perf report' warnings: ignore empty callchains perf record: Fix the -A UI for empty or non-existent perf.data perf util: Fix do_read() to fail on EOF instead of busy-looping perf list: Fix the output to not include tracepoints without an id perf_counter/powerpc: Fix oops on cpus without perf_counter hardware support perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale perf report: Add debug help for the finding of symbol bugs - show the symtab origin (DSO, build-id, kernel, etc) perf report: Fix per task mult-counter stat reporting ...
This commit is contained in:
@ -2646,7 +2646,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
|
||||
u64 counter;
|
||||
} group_entry;
|
||||
struct perf_callchain_entry *callchain = NULL;
|
||||
struct perf_tracepoint_record *tp;
|
||||
int callchain_size = 0;
|
||||
u64 time;
|
||||
struct {
|
||||
@ -2715,9 +2714,16 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
|
||||
header.size += sizeof(u64);
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TP_RECORD) {
|
||||
tp = data->private;
|
||||
header.size += tp->size;
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
int size = sizeof(u32);
|
||||
|
||||
if (data->raw)
|
||||
size += data->raw->size;
|
||||
else
|
||||
size += sizeof(u32);
|
||||
|
||||
WARN_ON_ONCE(size & (sizeof(u64)-1));
|
||||
header.size += size;
|
||||
}
|
||||
|
||||
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
|
||||
@ -2783,8 +2789,21 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TP_RECORD)
|
||||
perf_output_copy(&handle, tp->record, tp->size);
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
if (data->raw) {
|
||||
perf_output_put(&handle, data->raw->size);
|
||||
perf_output_copy(&handle, data->raw->data, data->raw->size);
|
||||
} else {
|
||||
struct {
|
||||
u32 size;
|
||||
u32 data;
|
||||
} raw = {
|
||||
.size = sizeof(u32),
|
||||
.data = 0,
|
||||
};
|
||||
perf_output_put(&handle, raw);
|
||||
}
|
||||
}
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
@ -2849,7 +2868,8 @@ perf_counter_read_event(struct perf_counter *counter,
|
||||
*/
|
||||
|
||||
struct perf_task_event {
|
||||
struct task_struct *task;
|
||||
struct task_struct *task;
|
||||
struct perf_counter_context *task_ctx;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
@ -2909,24 +2929,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
|
||||
static void perf_counter_task_event(struct perf_task_event *task_event)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct perf_counter_context *ctx;
|
||||
struct perf_counter_context *ctx = task_event->task_ctx;
|
||||
|
||||
cpuctx = &get_cpu_var(perf_cpu_context);
|
||||
perf_counter_task_ctx(&cpuctx->ctx, task_event);
|
||||
put_cpu_var(perf_cpu_context);
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* doesn't really matter which of the child contexts the
|
||||
* events ends up in.
|
||||
*/
|
||||
ctx = rcu_dereference(current->perf_counter_ctxp);
|
||||
if (!ctx)
|
||||
ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
|
||||
if (ctx)
|
||||
perf_counter_task_ctx(ctx, task_event);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void perf_counter_task(struct task_struct *task, int new)
|
||||
static void perf_counter_task(struct task_struct *task,
|
||||
struct perf_counter_context *task_ctx,
|
||||
int new)
|
||||
{
|
||||
struct perf_task_event task_event;
|
||||
|
||||
@ -2936,8 +2955,9 @@ static void perf_counter_task(struct task_struct *task, int new)
|
||||
return;
|
||||
|
||||
task_event = (struct perf_task_event){
|
||||
.task = task,
|
||||
.event = {
|
||||
.task = task,
|
||||
.task_ctx = task_ctx,
|
||||
.event = {
|
||||
.header = {
|
||||
.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
|
||||
.misc = 0,
|
||||
@ -2955,7 +2975,7 @@ static void perf_counter_task(struct task_struct *task, int new)
|
||||
|
||||
void perf_counter_fork(struct task_struct *task)
|
||||
{
|
||||
perf_counter_task(task, 1);
|
||||
perf_counter_task(task, NULL, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3344,87 +3364,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
|
||||
* Generic software counter infrastructure
|
||||
*/
|
||||
|
||||
static void perf_swcounter_update(struct perf_counter *counter)
|
||||
/*
|
||||
* We directly increment counter->count and keep a second value in
|
||||
* counter->hw.period_left to count intervals. This period counter
|
||||
* is kept in the range [-sample_period, 0] so that we can use the
|
||||
* sign as trigger.
|
||||
*/
|
||||
|
||||
static u64 perf_swcounter_set_period(struct perf_counter *counter)
|
||||
{
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
u64 prev, now;
|
||||
s64 delta;
|
||||
u64 period = hwc->last_period;
|
||||
u64 nr, offset;
|
||||
s64 old, val;
|
||||
|
||||
hwc->last_period = hwc->sample_period;
|
||||
|
||||
again:
|
||||
prev = atomic64_read(&hwc->prev_count);
|
||||
now = atomic64_read(&hwc->count);
|
||||
if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
|
||||
old = val = atomic64_read(&hwc->period_left);
|
||||
if (val < 0)
|
||||
return 0;
|
||||
|
||||
nr = div64_u64(period + val, period);
|
||||
offset = nr * period;
|
||||
val -= offset;
|
||||
if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
|
||||
goto again;
|
||||
|
||||
delta = now - prev;
|
||||
|
||||
atomic64_add(delta, &counter->count);
|
||||
atomic64_sub(delta, &hwc->period_left);
|
||||
}
|
||||
|
||||
static void perf_swcounter_set_period(struct perf_counter *counter)
|
||||
{
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
s64 left = atomic64_read(&hwc->period_left);
|
||||
s64 period = hwc->sample_period;
|
||||
|
||||
if (unlikely(left <= -period)) {
|
||||
left = period;
|
||||
atomic64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
}
|
||||
|
||||
if (unlikely(left <= 0)) {
|
||||
left += period;
|
||||
atomic64_add(period, &hwc->period_left);
|
||||
hwc->last_period = period;
|
||||
}
|
||||
|
||||
atomic64_set(&hwc->prev_count, -left);
|
||||
atomic64_set(&hwc->count, -left);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
|
||||
{
|
||||
enum hrtimer_restart ret = HRTIMER_RESTART;
|
||||
struct perf_sample_data data;
|
||||
struct perf_counter *counter;
|
||||
u64 period;
|
||||
|
||||
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
|
||||
counter->pmu->read(counter);
|
||||
|
||||
data.addr = 0;
|
||||
data.regs = get_irq_regs();
|
||||
/*
|
||||
* In case we exclude kernel IPs or are somehow not in interrupt
|
||||
* context, provide the next best thing, the user IP.
|
||||
*/
|
||||
if ((counter->attr.exclude_kernel || !data.regs) &&
|
||||
!counter->attr.exclude_user)
|
||||
data.regs = task_pt_regs(current);
|
||||
|
||||
if (data.regs) {
|
||||
if (perf_counter_overflow(counter, 0, &data))
|
||||
ret = HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
period = max_t(u64, 10000, counter->hw.sample_period);
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
|
||||
|
||||
return ret;
|
||||
return nr;
|
||||
}
|
||||
|
||||
static void perf_swcounter_overflow(struct perf_counter *counter,
|
||||
int nmi, struct perf_sample_data *data)
|
||||
{
|
||||
data->period = counter->hw.last_period;
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
u64 overflow;
|
||||
|
||||
perf_swcounter_update(counter);
|
||||
perf_swcounter_set_period(counter);
|
||||
if (perf_counter_overflow(counter, nmi, data))
|
||||
/* soft-disable the counter */
|
||||
;
|
||||
data->period = counter->hw.last_period;
|
||||
overflow = perf_swcounter_set_period(counter);
|
||||
|
||||
if (hwc->interrupts == MAX_INTERRUPTS)
|
||||
return;
|
||||
|
||||
for (; overflow; overflow--) {
|
||||
if (perf_counter_overflow(counter, nmi, data)) {
|
||||
/*
|
||||
* We inhibit the overflow from happening when
|
||||
* hwc->interrupts == MAX_INTERRUPTS.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_swcounter_unthrottle(struct perf_counter *counter)
|
||||
{
|
||||
/*
|
||||
* Nothing to do, we already reset hwc->interrupts.
|
||||
*/
|
||||
}
|
||||
|
||||
static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
|
||||
int nmi, struct perf_sample_data *data)
|
||||
{
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
|
||||
atomic64_add(nr, &counter->count);
|
||||
|
||||
if (!hwc->sample_period)
|
||||
return;
|
||||
|
||||
if (!data->regs)
|
||||
return;
|
||||
|
||||
if (!atomic64_add_negative(nr, &hwc->period_left))
|
||||
perf_swcounter_overflow(counter, nmi, data);
|
||||
}
|
||||
|
||||
static int perf_swcounter_is_counting(struct perf_counter *counter)
|
||||
@ -3488,15 +3502,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
|
||||
int nmi, struct perf_sample_data *data)
|
||||
{
|
||||
int neg = atomic64_add_negative(nr, &counter->hw.count);
|
||||
|
||||
if (counter->hw.sample_period && !neg && data->regs)
|
||||
perf_swcounter_overflow(counter, nmi, data);
|
||||
}
|
||||
|
||||
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
|
||||
enum perf_type_id type,
|
||||
u32 event, u64 nr, int nmi,
|
||||
@ -3575,26 +3580,65 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
|
||||
|
||||
static void perf_swcounter_read(struct perf_counter *counter)
|
||||
{
|
||||
perf_swcounter_update(counter);
|
||||
}
|
||||
|
||||
static int perf_swcounter_enable(struct perf_counter *counter)
|
||||
{
|
||||
perf_swcounter_set_period(counter);
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
|
||||
if (hwc->sample_period) {
|
||||
hwc->last_period = hwc->sample_period;
|
||||
perf_swcounter_set_period(counter);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_swcounter_disable(struct perf_counter *counter)
|
||||
{
|
||||
perf_swcounter_update(counter);
|
||||
}
|
||||
|
||||
static const struct pmu perf_ops_generic = {
|
||||
.enable = perf_swcounter_enable,
|
||||
.disable = perf_swcounter_disable,
|
||||
.read = perf_swcounter_read,
|
||||
.unthrottle = perf_swcounter_unthrottle,
|
||||
};
|
||||
|
||||
/*
|
||||
* hrtimer based swcounter callback
|
||||
*/
|
||||
|
||||
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
|
||||
{
|
||||
enum hrtimer_restart ret = HRTIMER_RESTART;
|
||||
struct perf_sample_data data;
|
||||
struct perf_counter *counter;
|
||||
u64 period;
|
||||
|
||||
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
|
||||
counter->pmu->read(counter);
|
||||
|
||||
data.addr = 0;
|
||||
data.regs = get_irq_regs();
|
||||
/*
|
||||
* In case we exclude kernel IPs or are somehow not in interrupt
|
||||
* context, provide the next best thing, the user IP.
|
||||
*/
|
||||
if ((counter->attr.exclude_kernel || !data.regs) &&
|
||||
!counter->attr.exclude_user)
|
||||
data.regs = task_pt_regs(current);
|
||||
|
||||
if (data.regs) {
|
||||
if (perf_counter_overflow(counter, 0, &data))
|
||||
ret = HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
period = max_t(u64, 10000, counter->hw.sample_period);
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Software counter: cpu wall time clock
|
||||
*/
|
||||
@ -3715,15 +3759,15 @@ static const struct pmu perf_ops_task_clock = {
|
||||
void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
|
||||
int entry_size)
|
||||
{
|
||||
struct perf_tracepoint_record tp = {
|
||||
struct perf_raw_record raw = {
|
||||
.size = entry_size,
|
||||
.record = record,
|
||||
.data = record,
|
||||
};
|
||||
|
||||
struct perf_sample_data data = {
|
||||
.regs = get_irq_regs(),
|
||||
.addr = addr,
|
||||
.private = &tp,
|
||||
.raw = &raw,
|
||||
};
|
||||
|
||||
if (!data.regs)
|
||||
@ -3743,6 +3787,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
|
||||
|
||||
static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
|
||||
{
|
||||
/*
|
||||
* Raw tracepoint data is a severe data leak, only allow root to
|
||||
* have these.
|
||||
*/
|
||||
if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (ftrace_profile_enable(counter->attr.config))
|
||||
return NULL;
|
||||
|
||||
@ -4285,7 +4337,7 @@ void perf_counter_exit_task(struct task_struct *child)
|
||||
unsigned long flags;
|
||||
|
||||
if (likely(!child->perf_counter_ctxp)) {
|
||||
perf_counter_task(child, 0);
|
||||
perf_counter_task(child, NULL, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4305,6 +4357,7 @@ void perf_counter_exit_task(struct task_struct *child)
|
||||
* incremented the context's refcount before we do put_ctx below.
|
||||
*/
|
||||
spin_lock(&child_ctx->lock);
|
||||
child->perf_counter_ctxp = NULL;
|
||||
/*
|
||||
* If this context is a clone; unclone it so it can't get
|
||||
* swapped to another process while we're removing all
|
||||
@ -4318,9 +4371,7 @@ void perf_counter_exit_task(struct task_struct *child)
|
||||
* won't get any samples after PERF_EVENT_EXIT. We can however still
|
||||
* get a few PERF_EVENT_READ events.
|
||||
*/
|
||||
perf_counter_task(child, 0);
|
||||
|
||||
child->perf_counter_ctxp = NULL;
|
||||
perf_counter_task(child, child_ctx, 0);
|
||||
|
||||
/*
|
||||
* We can recurse on the same lock type through:
|
||||
|
Reference in New Issue
Block a user