perf_counter: add an mmap method to allow userspace to read hardware counters
Impact: new feature giving performance improvement This adds the ability for userspace to do an mmap on a hardware counter fd and get access to a read-only page that contains the information needed to translate a hardware counter value to the full 64-bit counter value that would be returned by a read on the fd. This is useful on architectures that allow user programs to read the hardware counters, such as PowerPC. The mmap will only succeed if the counter is a hardware counter monitoring the current process. On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter and translate it to the full 64-bit value in about 30ns using the mmapped page, compared to about 830ns for the read syscall on the counter, so this does give a significant performance improvement. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Orig-LKML-Reference: <20090323172417.297057964@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
committed by
Ingo Molnar
parent
96f6d44443
commit
37d8182838
@@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable)
|
|||||||
atomic64_set(&counter->hw.prev_count, val);
|
atomic64_set(&counter->hw.prev_count, val);
|
||||||
counter->hw.idx = hwc_index[i] + 1;
|
counter->hw.idx = hwc_index[i] + 1;
|
||||||
write_pmc(counter->hw.idx, val);
|
write_pmc(counter->hw.idx, val);
|
||||||
|
if (counter->user_page)
|
||||||
|
perf_counter_update_userpage(counter);
|
||||||
}
|
}
|
||||||
mb();
|
mb();
|
||||||
cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
|
cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
|
||||||
@@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter)
|
|||||||
ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
|
ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
|
||||||
write_pmc(counter->hw.idx, 0);
|
write_pmc(counter->hw.idx, 0);
|
||||||
counter->hw.idx = 0;
|
counter->hw.idx = 0;
|
||||||
|
if (counter->user_page)
|
||||||
|
perf_counter_update_userpage(counter);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
|
|||||||
write_pmc(counter->hw.idx, val);
|
write_pmc(counter->hw.idx, val);
|
||||||
atomic64_set(&counter->hw.prev_count, val);
|
atomic64_set(&counter->hw.prev_count, val);
|
||||||
atomic64_set(&counter->hw.period_left, left);
|
atomic64_set(&counter->hw.period_left, left);
|
||||||
|
if (counter->user_page)
|
||||||
|
perf_counter_update_userpage(counter);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Finally record data if requested.
|
* Finally record data if requested.
|
||||||
|
@@ -143,6 +143,17 @@ struct perf_counter_hw_event {
|
|||||||
#define PERF_COUNTER_IOC_ENABLE _IO('$', 0)
|
#define PERF_COUNTER_IOC_ENABLE _IO('$', 0)
|
||||||
#define PERF_COUNTER_IOC_DISABLE _IO('$', 1)
|
#define PERF_COUNTER_IOC_DISABLE _IO('$', 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure of the page that can be mapped via mmap
|
||||||
|
*/
|
||||||
|
struct perf_counter_mmap_page {
|
||||||
|
__u32 version; /* version number of this structure */
|
||||||
|
__u32 compat_version; /* lowest version this is compat with */
|
||||||
|
__u32 lock; /* seqlock for synchronization */
|
||||||
|
__u32 index; /* hardware counter identifier */
|
||||||
|
__s64 offset; /* add to hardware counter value */
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
/*
|
/*
|
||||||
* Kernel-internal data types and definitions:
|
* Kernel-internal data types and definitions:
|
||||||
@@ -278,6 +289,9 @@ struct perf_counter {
|
|||||||
int oncpu;
|
int oncpu;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
/* pointer to page shared with userspace via mmap */
|
||||||
|
unsigned long user_page;
|
||||||
|
|
||||||
/* read() / irq related data */
|
/* read() / irq related data */
|
||||||
wait_queue_head_t waitq;
|
wait_queue_head_t waitq;
|
||||||
/* optional: for NMIs */
|
/* optional: for NMIs */
|
||||||
@@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void);
|
|||||||
extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
|
extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
|
||||||
struct perf_cpu_context *cpuctx,
|
struct perf_cpu_context *cpuctx,
|
||||||
struct perf_counter_context *ctx, int cpu);
|
struct perf_counter_context *ctx, int cpu);
|
||||||
|
extern void perf_counter_update_userpage(struct perf_counter *counter);
|
||||||
|
|
||||||
extern void perf_counter_output(struct perf_counter *counter,
|
extern void perf_counter_output(struct perf_counter *counter,
|
||||||
int nmi, struct pt_regs *regs);
|
int nmi, struct pt_regs *regs);
|
||||||
|
@@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file)
|
|||||||
mutex_unlock(&counter->mutex);
|
mutex_unlock(&counter->mutex);
|
||||||
mutex_unlock(&ctx->mutex);
|
mutex_unlock(&ctx->mutex);
|
||||||
|
|
||||||
|
free_page(counter->user_page);
|
||||||
free_counter(counter);
|
free_counter(counter);
|
||||||
put_context(ctx);
|
put_context(ctx);
|
||||||
|
|
||||||
@@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void perf_counter_update_userpage(struct perf_counter *counter)
|
||||||
|
{
|
||||||
|
struct perf_counter_mmap_page *userpg;
|
||||||
|
|
||||||
|
if (!counter->user_page)
|
||||||
|
return;
|
||||||
|
userpg = (struct perf_counter_mmap_page *) counter->user_page;
|
||||||
|
|
||||||
|
++userpg->lock;
|
||||||
|
smp_wmb();
|
||||||
|
userpg->index = counter->hw.idx;
|
||||||
|
userpg->offset = atomic64_read(&counter->count);
|
||||||
|
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
|
||||||
|
userpg->offset -= atomic64_read(&counter->hw.prev_count);
|
||||||
|
smp_wmb();
|
||||||
|
++userpg->lock;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
|
{
|
||||||
|
struct perf_counter *counter = vma->vm_file->private_data;
|
||||||
|
|
||||||
|
if (!counter->user_page)
|
||||||
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
|
vmf->page = virt_to_page(counter->user_page);
|
||||||
|
get_page(vmf->page);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct vm_operations_struct perf_mmap_vmops = {
|
||||||
|
.fault = perf_mmap_fault,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct perf_counter *counter = file->private_data;
|
||||||
|
unsigned long userpg;
|
||||||
|
|
||||||
|
if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
|
||||||
|
return -EINVAL;
|
||||||
|
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For now, restrict to the case of a hardware counter
|
||||||
|
* on the current task.
|
||||||
|
*/
|
||||||
|
if (is_software_counter(counter) || counter->task != current)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
userpg = counter->user_page;
|
||||||
|
if (!userpg) {
|
||||||
|
userpg = get_zeroed_page(GFP_KERNEL);
|
||||||
|
mutex_lock(&counter->mutex);
|
||||||
|
if (counter->user_page) {
|
||||||
|
free_page(userpg);
|
||||||
|
userpg = counter->user_page;
|
||||||
|
} else {
|
||||||
|
counter->user_page = userpg;
|
||||||
|
}
|
||||||
|
mutex_unlock(&counter->mutex);
|
||||||
|
if (!userpg)
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
perf_counter_update_userpage(counter);
|
||||||
|
|
||||||
|
vma->vm_flags &= ~VM_MAYWRITE;
|
||||||
|
vma->vm_flags |= VM_RESERVED;
|
||||||
|
vma->vm_ops = &perf_mmap_vmops;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct file_operations perf_fops = {
|
static const struct file_operations perf_fops = {
|
||||||
.release = perf_release,
|
.release = perf_release,
|
||||||
.read = perf_read,
|
.read = perf_read,
|
||||||
.poll = perf_poll,
|
.poll = perf_poll,
|
||||||
.unlocked_ioctl = perf_ioctl,
|
.unlocked_ioctl = perf_ioctl,
|
||||||
.compat_ioctl = perf_ioctl,
|
.compat_ioctl = perf_ioctl,
|
||||||
|
.mmap = perf_mmap,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user