Merge branch 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (60 commits) perf tools: Avoid unnecessary work in directory lookups perf stat: Clean up statistics calculations a bit more perf stat: More advanced variance computation perf stat: Use stddev_mean in stead of stddev perf stat: Remove the limit on repeat perf stat: Change noise calculation to use stddev x86, perf_counter, bts: Do not allow kernel BTS tracing for now x86, perf_counter, bts: Correct pointer-to-u64 casts x86, perf_counter, bts: Fail if BTS is not available perf_counter: Fix output-sharing error path perf trace: Fix read_string() perf trace: Print out in nanoseconds perf tools: Seek to the end of the header area perf trace: Fix parsing of perf.data perf trace: Sample timestamps as well perf_counter: Introduce new (non-)paranoia level to allow raw tracepoint access perf trace: Sample the CPU too perf tools: Work around strict aliasing related warnings perf tools: Clean up warnings list in the Makefile perf tools: Complete support for dynamic strings ...
This commit is contained in:
@ -46,12 +46,18 @@ static atomic_t nr_task_counters __read_mostly;
|
||||
|
||||
/*
|
||||
* perf counter paranoia level:
|
||||
* 0 - not paranoid
|
||||
* 1 - disallow cpu counters to unpriv
|
||||
* 2 - disallow kernel profiling to unpriv
|
||||
* -1 - not paranoid at all
|
||||
* 0 - disallow raw tracepoint access for unpriv
|
||||
* 1 - disallow cpu counters for unpriv
|
||||
* 2 - disallow kernel profiling for unpriv
|
||||
*/
|
||||
int sysctl_perf_counter_paranoid __read_mostly = 1;
|
||||
|
||||
static inline bool perf_paranoid_tracepoint_raw(void)
|
||||
{
|
||||
return sysctl_perf_counter_paranoid > -1;
|
||||
}
|
||||
|
||||
static inline bool perf_paranoid_cpu(void)
|
||||
{
|
||||
return sysctl_perf_counter_paranoid > 0;
|
||||
@ -469,7 +475,8 @@ static void update_counter_times(struct perf_counter *counter)
|
||||
struct perf_counter_context *ctx = counter->ctx;
|
||||
u64 run_end;
|
||||
|
||||
if (counter->state < PERF_COUNTER_STATE_INACTIVE)
|
||||
if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
|
||||
counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
|
||||
return;
|
||||
|
||||
counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
|
||||
@ -518,7 +525,7 @@ static void __perf_counter_disable(void *info)
|
||||
*/
|
||||
if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
|
||||
update_context_time(ctx);
|
||||
update_counter_times(counter);
|
||||
update_group_times(counter);
|
||||
if (counter == counter->group_leader)
|
||||
group_sched_out(counter, cpuctx, ctx);
|
||||
else
|
||||
@ -573,7 +580,7 @@ static void perf_counter_disable(struct perf_counter *counter)
|
||||
* in, so we can change the state safely.
|
||||
*/
|
||||
if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
|
||||
update_counter_times(counter);
|
||||
update_group_times(counter);
|
||||
counter->state = PERF_COUNTER_STATE_OFF;
|
||||
}
|
||||
|
||||
@ -850,6 +857,27 @@ retry:
|
||||
spin_unlock_irq(&ctx->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Put a counter into inactive state and update time fields.
|
||||
* Enabling the leader of a group effectively enables all
|
||||
* the group members that aren't explicitly disabled, so we
|
||||
* have to update their ->tstamp_enabled also.
|
||||
* Note: this works for group members as well as group leaders
|
||||
* since the non-leader members' sibling_lists will be empty.
|
||||
*/
|
||||
static void __perf_counter_mark_enabled(struct perf_counter *counter,
|
||||
struct perf_counter_context *ctx)
|
||||
{
|
||||
struct perf_counter *sub;
|
||||
|
||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||
counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
|
||||
list_for_each_entry(sub, &counter->sibling_list, list_entry)
|
||||
if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
|
||||
sub->tstamp_enabled =
|
||||
ctx->time - sub->total_time_enabled;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cross CPU call to enable a performance counter
|
||||
*/
|
||||
@ -877,8 +905,7 @@ static void __perf_counter_enable(void *info)
|
||||
|
||||
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
|
||||
goto unlock;
|
||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||
counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
|
||||
__perf_counter_mark_enabled(counter, ctx);
|
||||
|
||||
/*
|
||||
* If the counter is in a group and isn't the group leader,
|
||||
@ -971,11 +998,9 @@ static void perf_counter_enable(struct perf_counter *counter)
|
||||
* Since we have the lock this context can't be scheduled
|
||||
* in, so we can change the state safely.
|
||||
*/
|
||||
if (counter->state == PERF_COUNTER_STATE_OFF) {
|
||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||
counter->tstamp_enabled =
|
||||
ctx->time - counter->total_time_enabled;
|
||||
}
|
||||
if (counter->state == PERF_COUNTER_STATE_OFF)
|
||||
__perf_counter_mark_enabled(counter, ctx);
|
||||
|
||||
out:
|
||||
spin_unlock_irq(&ctx->lock);
|
||||
}
|
||||
@ -1479,9 +1504,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
|
||||
counter->attr.enable_on_exec = 0;
|
||||
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
|
||||
continue;
|
||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||
counter->tstamp_enabled =
|
||||
ctx->time - counter->total_time_enabled;
|
||||
__perf_counter_mark_enabled(counter, ctx);
|
||||
enabled = 1;
|
||||
}
|
||||
|
||||
@ -1675,6 +1698,11 @@ static void free_counter(struct perf_counter *counter)
|
||||
atomic_dec(&nr_task_counters);
|
||||
}
|
||||
|
||||
if (counter->output) {
|
||||
fput(counter->output->filp);
|
||||
counter->output = NULL;
|
||||
}
|
||||
|
||||
if (counter->destroy)
|
||||
counter->destroy(counter);
|
||||
|
||||
@ -1960,6 +1988,8 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int perf_counter_set_output(struct perf_counter *counter, int output_fd);
|
||||
|
||||
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct perf_counter *counter = file->private_data;
|
||||
@ -1983,6 +2013,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
case PERF_COUNTER_IOC_PERIOD:
|
||||
return perf_counter_period(counter, (u64 __user *)arg);
|
||||
|
||||
case PERF_COUNTER_IOC_SET_OUTPUT:
|
||||
return perf_counter_set_output(counter, arg);
|
||||
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
@ -2253,6 +2286,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
|
||||
WARN_ON_ONCE(counter->ctx->parent_ctx);
|
||||
mutex_lock(&counter->mmap_mutex);
|
||||
if (counter->output) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (atomic_inc_not_zero(&counter->mmap_count)) {
|
||||
if (nr_pages != counter->data->nr_pages)
|
||||
ret = -EINVAL;
|
||||
@ -2638,6 +2676,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_counter *counter, unsigned int size,
|
||||
int nmi, int sample)
|
||||
{
|
||||
struct perf_counter *output_counter;
|
||||
struct perf_mmap_data *data;
|
||||
unsigned int offset, head;
|
||||
int have_lost;
|
||||
@ -2647,13 +2686,17 @@ static int perf_output_begin(struct perf_output_handle *handle,
|
||||
u64 lost;
|
||||
} lost_event;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* For inherited counters we send all the output towards the parent.
|
||||
*/
|
||||
if (counter->parent)
|
||||
counter = counter->parent;
|
||||
|
||||
rcu_read_lock();
|
||||
output_counter = rcu_dereference(counter->output);
|
||||
if (output_counter)
|
||||
counter = output_counter;
|
||||
|
||||
data = rcu_dereference(counter->data);
|
||||
if (!data)
|
||||
goto out;
|
||||
@ -3934,6 +3977,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
|
||||
* have these.
|
||||
*/
|
||||
if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
|
||||
perf_paranoid_tracepoint_raw() &&
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
@ -4202,6 +4246,57 @@ err_size:
|
||||
goto out;
|
||||
}
|
||||
|
||||
int perf_counter_set_output(struct perf_counter *counter, int output_fd)
|
||||
{
|
||||
struct perf_counter *output_counter = NULL;
|
||||
struct file *output_file = NULL;
|
||||
struct perf_counter *old_output;
|
||||
int fput_needed = 0;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!output_fd)
|
||||
goto set;
|
||||
|
||||
output_file = fget_light(output_fd, &fput_needed);
|
||||
if (!output_file)
|
||||
return -EBADF;
|
||||
|
||||
if (output_file->f_op != &perf_fops)
|
||||
goto out;
|
||||
|
||||
output_counter = output_file->private_data;
|
||||
|
||||
/* Don't chain output fds */
|
||||
if (output_counter->output)
|
||||
goto out;
|
||||
|
||||
/* Don't set an output fd when we already have an output channel */
|
||||
if (counter->data)
|
||||
goto out;
|
||||
|
||||
atomic_long_inc(&output_file->f_count);
|
||||
|
||||
set:
|
||||
mutex_lock(&counter->mmap_mutex);
|
||||
old_output = counter->output;
|
||||
rcu_assign_pointer(counter->output, output_counter);
|
||||
mutex_unlock(&counter->mmap_mutex);
|
||||
|
||||
if (old_output) {
|
||||
/*
|
||||
* we need to make sure no existing perf_output_*()
|
||||
* is still referencing this counter.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
fput(old_output->filp);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
fput_light(output_file, fput_needed);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_perf_counter_open - open a performance counter, associate it to a task/cpu
|
||||
*
|
||||
@ -4221,15 +4316,15 @@ SYSCALL_DEFINE5(perf_counter_open,
|
||||
struct file *group_file = NULL;
|
||||
int fput_needed = 0;
|
||||
int fput_needed2 = 0;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
/* for future expandability... */
|
||||
if (flags)
|
||||
if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
|
||||
return -EINVAL;
|
||||
|
||||
ret = perf_copy_attr(attr_uptr, &attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
err = perf_copy_attr(attr_uptr, &attr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!attr.exclude_kernel) {
|
||||
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||
@ -4252,8 +4347,8 @@ SYSCALL_DEFINE5(perf_counter_open,
|
||||
* Look up the group leader (we will attach this counter to it):
|
||||
*/
|
||||
group_leader = NULL;
|
||||
if (group_fd != -1) {
|
||||
ret = -EINVAL;
|
||||
if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
|
||||
err = -EINVAL;
|
||||
group_file = fget_light(group_fd, &fput_needed);
|
||||
if (!group_file)
|
||||
goto err_put_context;
|
||||
@ -4282,18 +4377,24 @@ SYSCALL_DEFINE5(perf_counter_open,
|
||||
|
||||
counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
|
||||
NULL, GFP_KERNEL);
|
||||
ret = PTR_ERR(counter);
|
||||
err = PTR_ERR(counter);
|
||||
if (IS_ERR(counter))
|
||||
goto err_put_context;
|
||||
|
||||
ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
|
||||
if (ret < 0)
|
||||
err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
|
||||
if (err < 0)
|
||||
goto err_free_put_context;
|
||||
|
||||
counter_file = fget_light(ret, &fput_needed2);
|
||||
counter_file = fget_light(err, &fput_needed2);
|
||||
if (!counter_file)
|
||||
goto err_free_put_context;
|
||||
|
||||
if (flags & PERF_FLAG_FD_OUTPUT) {
|
||||
err = perf_counter_set_output(counter, group_fd);
|
||||
if (err)
|
||||
goto err_fput_free_put_context;
|
||||
}
|
||||
|
||||
counter->filp = counter_file;
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
mutex_lock(&ctx->mutex);
|
||||
@ -4307,20 +4408,20 @@ SYSCALL_DEFINE5(perf_counter_open,
|
||||
list_add_tail(&counter->owner_entry, ¤t->perf_counter_list);
|
||||
mutex_unlock(¤t->perf_counter_mutex);
|
||||
|
||||
err_fput_free_put_context:
|
||||
fput_light(counter_file, fput_needed2);
|
||||
|
||||
out_fput:
|
||||
fput_light(group_file, fput_needed);
|
||||
|
||||
return ret;
|
||||
|
||||
err_free_put_context:
|
||||
kfree(counter);
|
||||
if (err < 0)
|
||||
kfree(counter);
|
||||
|
||||
err_put_context:
|
||||
put_ctx(ctx);
|
||||
if (err < 0)
|
||||
put_ctx(ctx);
|
||||
|
||||
goto out_fput;
|
||||
fput_light(group_file, fput_needed);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user