perf events: Change perf parameter --pid to process-wide collection instead of thread-wide

Parameter --pid (or -p) of perf currently means a thread-wide
collection. For exmaple, if a process whose id is 8888 has 10
threads, 'perf top -p 8888' just collects the main thread
statistics. That's misleading. Users are used to attach a whole
process when debugging a process by gdb. To follow normal usage
style, the patch change --pid to process-wide collection and add
--tid (-t) to mean a thread-wide collection.

Usage example is:

 # perf top -p 8888
 # perf record -p 8888 -f sleep 10
 # perf stat -p 8888 -f sleep 10

Above commands collect the statistics of all threads of process
8888.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jes Sorensen <Jes.Sorensen@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: zhiteng.huang@intel.com
Cc: Zachary Amsden <zamsden@redhat.com>
LKML-Reference: <1268922965-14774-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Zhang, Yanmin
2010-03-18 11:36:05 -03:00
committed by Ingo Molnar
parent 46be604b5b
commit d6d901c23a
5 changed files with 366 additions and 187 deletions

View File

@ -46,6 +46,7 @@
#include "util/debug.h"
#include "util/header.h"
#include "util/cpumap.h"
#include "util/thread.h"
#include <sys/prctl.h>
#include <math.h>
@ -74,10 +75,13 @@ static int run_count = 1;
static int inherit = 1;
static int scale = 1;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
static pid_t *all_tids = NULL;
static int thread_num = 0;
static pid_t child_pid = -1;
static int null_run = 0;
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
static int event_scaled[MAX_COUNTERS];
@ -140,9 +144,10 @@ struct stats runtime_branches_stats;
#define ERR_PERF_OPEN \
"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
static void create_perf_stat_counter(int counter, int pid)
static void create_perf_stat_counter(int counter)
{
struct perf_event_attr *attr = attrs + counter;
int thread;
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@ -152,10 +157,11 @@ static void create_perf_stat_counter(int counter, int pid)
unsigned int cpu;
for (cpu = 0; cpu < nr_cpus; cpu++) {
fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0);
if (fd[cpu][counter] < 0 && verbose)
fd[cpu][counter][0] = sys_perf_event_open(attr,
-1, cpumap[cpu], -1, 0);
if (fd[cpu][counter][0] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter,
fd[cpu][counter], strerror(errno));
fd[cpu][counter][0], strerror(errno));
}
} else {
attr->inherit = inherit;
@ -163,11 +169,14 @@ static void create_perf_stat_counter(int counter, int pid)
attr->disabled = 1;
attr->enable_on_exec = 1;
}
fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0);
if (fd[0][counter] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter,
fd[0][counter], strerror(errno));
for (thread = 0; thread < thread_num; thread++) {
fd[0][counter][thread] = sys_perf_event_open(attr,
all_tids[thread], -1, -1, 0);
if (fd[0][counter][thread] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter,
fd[0][counter][thread],
strerror(errno));
}
}
}
@ -192,25 +201,28 @@ static void read_counter(int counter)
unsigned int cpu;
size_t res, nv;
int scaled;
int i;
int i, thread;
count[0] = count[1] = count[2] = 0;
nv = scale ? 3 : 1;
for (cpu = 0; cpu < nr_cpus; cpu++) {
if (fd[cpu][counter] < 0)
continue;
for (thread = 0; thread < thread_num; thread++) {
if (fd[cpu][counter][thread] < 0)
continue;
res = read(fd[cpu][counter], single_count, nv * sizeof(u64));
assert(res == nv * sizeof(u64));
res = read(fd[cpu][counter][thread],
single_count, nv * sizeof(u64));
assert(res == nv * sizeof(u64));
close(fd[cpu][counter]);
fd[cpu][counter] = -1;
close(fd[cpu][counter][thread]);
fd[cpu][counter][thread] = -1;
count[0] += single_count[0];
if (scale) {
count[1] += single_count[1];
count[2] += single_count[2];
count[0] += single_count[0];
if (scale) {
count[1] += single_count[1];
count[2] += single_count[2];
}
}
}
@ -253,7 +265,6 @@ static int run_perf_stat(int argc __used, const char **argv)
unsigned long long t0, t1;
int status = 0;
int counter;
int pid;
int child_ready_pipe[2], go_pipe[2];
const bool forks = (argc > 0);
char buf;
@ -299,6 +310,9 @@ static int run_perf_stat(int argc __used, const char **argv)
exit(-1);
}
if (target_tid == -1 && target_pid == -1 && !system_wide)
all_tids[0] = child_pid;
/*
* Wait for the child to be ready to exec.
*/
@ -309,12 +323,8 @@ static int run_perf_stat(int argc __used, const char **argv)
close(child_ready_pipe[0]);
}
if (target_pid == -1)
pid = child_pid;
else
pid = target_pid;
for (counter = 0; counter < nr_counters; counter++)
create_perf_stat_counter(counter, pid);
create_perf_stat_counter(counter);
/*
* Enable counters and exec the command:
@ -433,12 +443,14 @@ static void print_stat(int argc, const char **argv)
fprintf(stderr, "\n");
fprintf(stderr, " Performance counter stats for ");
if(target_pid == -1) {
if(target_pid == -1 && target_tid == -1) {
fprintf(stderr, "\'%s", argv[0]);
for (i = 1; i < argc; i++)
fprintf(stderr, " %s", argv[i]);
}else
fprintf(stderr, "task pid \'%d", target_pid);
} else if (target_pid != -1)
fprintf(stderr, "process id \'%d", target_pid);
else
fprintf(stderr, "thread id \'%d", target_tid);
fprintf(stderr, "\'");
if (run_count > 1)
@ -493,7 +505,9 @@ static const struct option options[] = {
OPT_BOOLEAN('i', "inherit", &inherit,
"child tasks inherit counters"),
OPT_INTEGER('p', "pid", &target_pid,
"stat events on existing pid"),
"stat events on existing process id"),
OPT_INTEGER('t', "tid", &target_tid,
"stat events on existing thread id"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('c', "scale", &scale,
@ -510,10 +524,11 @@ static const struct option options[] = {
int cmd_stat(int argc, const char **argv, const char *prefix __used)
{
int status;
int i,j;
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target_pid == -1)
if (!argc && target_pid == -1 && target_tid == -1)
usage_with_options(stat_usage, options);
if (run_count <= 0)
usage_with_options(stat_usage, options);
@ -529,6 +544,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
else
nr_cpus = 1;
if (target_pid != -1) {
target_tid = target_pid;
thread_num = find_all_tid(target_pid, &all_tids);
if (thread_num <= 0) {
fprintf(stderr, "Can't find all threads of pid %d\n",
target_pid);
usage_with_options(stat_usage, options);
}
} else {
all_tids=malloc(sizeof(pid_t));
if (!all_tids)
return -ENOMEM;
all_tids[0] = target_tid;
thread_num = 1;
}
for (i = 0; i < MAX_NR_CPUS; i++) {
for (j = 0; j < MAX_COUNTERS; j++) {
fd[i][j] = malloc(sizeof(int)*thread_num);
if (!fd[i][j])
return -ENOMEM;
}
}
/*
* We dont want to block the signals - that would cause
* child tasks to inherit that and Ctrl-C would not work.