perfcounters: generalize the counter scheduler
Impact: clean up and refactor code refactor the counter scheduler: separate out in/out functions and introduce a counter-rotation function as well. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info)
|
|||||||
spin_lock(&ctx->lock);
|
spin_lock(&ctx->lock);
|
||||||
|
|
||||||
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
||||||
counter->hw_ops->disable(counter);
|
|
||||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||||
|
counter->hw_ops->disable(counter);
|
||||||
ctx->nr_active--;
|
ctx->nr_active--;
|
||||||
cpuctx->active_oncpu--;
|
cpuctx->active_oncpu--;
|
||||||
counter->task = NULL;
|
counter->task = NULL;
|
||||||
|
counter->oncpu = -1;
|
||||||
}
|
}
|
||||||
ctx->nr_counters--;
|
ctx->nr_counters--;
|
||||||
|
|
||||||
@@ -192,8 +193,36 @@ retry:
|
|||||||
spin_unlock_irq(&ctx->lock);
|
spin_unlock_irq(&ctx->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
counter_sched_in(struct perf_counter *counter,
|
||||||
|
struct perf_cpu_context *cpuctx,
|
||||||
|
struct perf_counter_context *ctx,
|
||||||
|
int cpu)
|
||||||
|
{
|
||||||
|
if (counter->state == PERF_COUNTER_STATE_OFF)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
counter->state = PERF_COUNTER_STATE_ACTIVE;
|
||||||
|
counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
|
||||||
|
/*
|
||||||
|
* The new state must be visible before we turn it on in the hardware:
|
||||||
|
*/
|
||||||
|
smp_wmb();
|
||||||
|
|
||||||
|
if (counter->hw_ops->enable(counter)) {
|
||||||
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||||
|
counter->oncpu = -1;
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuctx->active_oncpu++;
|
||||||
|
ctx->nr_active++;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cross CPU call to install and enable a preformance counter
|
* Cross CPU call to install and enable a performance counter
|
||||||
*/
|
*/
|
||||||
static void __perf_install_in_context(void *info)
|
static void __perf_install_in_context(void *info)
|
||||||
{
|
{
|
||||||
@@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info)
|
|||||||
* counters on a global level. NOP for non NMI based counters.
|
* counters on a global level. NOP for non NMI based counters.
|
||||||
*/
|
*/
|
||||||
perf_flags = hw_perf_save_disable();
|
perf_flags = hw_perf_save_disable();
|
||||||
list_add_counter(counter, ctx);
|
|
||||||
hw_perf_restore(perf_flags);
|
|
||||||
|
|
||||||
|
list_add_counter(counter, ctx);
|
||||||
ctx->nr_counters++;
|
ctx->nr_counters++;
|
||||||
|
|
||||||
if (cpuctx->active_oncpu < perf_max_counters) {
|
counter_sched_in(counter, cpuctx, ctx, cpu);
|
||||||
counter->state = PERF_COUNTER_STATE_ACTIVE;
|
|
||||||
counter->oncpu = cpu;
|
|
||||||
ctx->nr_active++;
|
|
||||||
cpuctx->active_oncpu++;
|
|
||||||
counter->hw_ops->enable(counter);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ctx->task && cpuctx->max_pertask)
|
if (!ctx->task && cpuctx->max_pertask)
|
||||||
cpuctx->max_pertask--;
|
cpuctx->max_pertask--;
|
||||||
|
|
||||||
|
hw_perf_restore(perf_flags);
|
||||||
|
|
||||||
spin_unlock(&ctx->lock);
|
spin_unlock(&ctx->lock);
|
||||||
curr_rq_unlock_irq_restore(&flags);
|
curr_rq_unlock_irq_restore(&flags);
|
||||||
}
|
}
|
||||||
@@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter,
|
|||||||
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
|
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
counter->hw_ops->disable(counter);
|
|
||||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||||
|
counter->hw_ops->disable(counter);
|
||||||
counter->oncpu = -1;
|
counter->oncpu = -1;
|
||||||
|
|
||||||
cpuctx->active_oncpu--;
|
cpuctx->active_oncpu--;
|
||||||
@@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter,
|
|||||||
counter_sched_out(counter, cpuctx, ctx);
|
counter_sched_out(counter, cpuctx, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __perf_counter_sched_out(struct perf_counter_context *ctx,
|
||||||
|
struct perf_cpu_context *cpuctx)
|
||||||
|
{
|
||||||
|
struct perf_counter *counter;
|
||||||
|
|
||||||
|
if (likely(!ctx->nr_counters))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock(&ctx->lock);
|
||||||
|
if (ctx->nr_active) {
|
||||||
|
list_for_each_entry(counter, &ctx->counter_list, list_entry)
|
||||||
|
group_sched_out(counter, cpuctx, ctx);
|
||||||
|
}
|
||||||
|
spin_unlock(&ctx->lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called from scheduler to remove the counters of the current task,
|
* Called from scheduler to remove the counters of the current task,
|
||||||
* with interrupts disabled.
|
* with interrupts disabled.
|
||||||
@@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
|
|||||||
{
|
{
|
||||||
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
||||||
struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
||||||
struct perf_counter *counter;
|
|
||||||
|
|
||||||
if (likely(!cpuctx->task_ctx))
|
if (likely(!cpuctx->task_ctx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
spin_lock(&ctx->lock);
|
__perf_counter_sched_out(ctx, cpuctx);
|
||||||
if (ctx->nr_active) {
|
|
||||||
list_for_each_entry(counter, &ctx->counter_list, list_entry)
|
|
||||||
group_sched_out(counter, cpuctx, ctx);
|
|
||||||
}
|
|
||||||
spin_unlock(&ctx->lock);
|
|
||||||
cpuctx->task_ctx = NULL;
|
cpuctx->task_ctx = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
|
||||||
counter_sched_in(struct perf_counter *counter,
|
|
||||||
struct perf_cpu_context *cpuctx,
|
|
||||||
struct perf_counter_context *ctx,
|
|
||||||
int cpu)
|
|
||||||
{
|
{
|
||||||
if (counter->state == PERF_COUNTER_STATE_OFF)
|
__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (counter->hw_ops->enable(counter))
|
|
||||||
return -EAGAIN;
|
|
||||||
|
|
||||||
counter->state = PERF_COUNTER_STATE_ACTIVE;
|
|
||||||
counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
|
|
||||||
|
|
||||||
cpuctx->active_oncpu++;
|
|
||||||
ctx->nr_active++;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@@ -416,21 +435,10 @@ group_error:
|
|||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void
|
||||||
* Called from scheduler to add the counters of the current task
|
__perf_counter_sched_in(struct perf_counter_context *ctx,
|
||||||
* with interrupts disabled.
|
struct perf_cpu_context *cpuctx, int cpu)
|
||||||
*
|
|
||||||
* We restore the counter value and then enable it.
|
|
||||||
*
|
|
||||||
* This does not protect us against NMI, but enable()
|
|
||||||
* sets the enabled bit in the control field of counter _before_
|
|
||||||
* accessing the counter control register. If a NMI hits, then it will
|
|
||||||
* keep the counter running.
|
|
||||||
*/
|
|
||||||
void perf_counter_task_sched_in(struct task_struct *task, int cpu)
|
|
||||||
{
|
{
|
||||||
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
|
||||||
struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
|
||||||
struct perf_counter *counter;
|
struct perf_counter *counter;
|
||||||
|
|
||||||
if (likely(!ctx->nr_counters))
|
if (likely(!ctx->nr_counters))
|
||||||
@@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock(&ctx->lock);
|
spin_unlock(&ctx->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from scheduler to add the counters of the current task
|
||||||
|
* with interrupts disabled.
|
||||||
|
*
|
||||||
|
* We restore the counter value and then enable it.
|
||||||
|
*
|
||||||
|
* This does not protect us against NMI, but enable()
|
||||||
|
* sets the enabled bit in the control field of counter _before_
|
||||||
|
* accessing the counter control register. If a NMI hits, then it will
|
||||||
|
* keep the counter running.
|
||||||
|
*/
|
||||||
|
void perf_counter_task_sched_in(struct task_struct *task, int cpu)
|
||||||
|
{
|
||||||
|
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
||||||
|
struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
||||||
|
|
||||||
|
__perf_counter_sched_in(ctx, cpuctx, cpu);
|
||||||
cpuctx->task_ctx = ctx;
|
cpuctx->task_ctx = ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
|
||||||
|
{
|
||||||
|
struct perf_counter_context *ctx = &cpuctx->ctx;
|
||||||
|
|
||||||
|
__perf_counter_sched_in(ctx, cpuctx, cpu);
|
||||||
|
}
|
||||||
|
|
||||||
int perf_counter_task_disable(void)
|
int perf_counter_task_disable(void)
|
||||||
{
|
{
|
||||||
struct task_struct *curr = current;
|
struct task_struct *curr = current;
|
||||||
@@ -514,6 +547,8 @@ int perf_counter_task_enable(void)
|
|||||||
/* force the update of the task clock: */
|
/* force the update of the task clock: */
|
||||||
__task_delta_exec(curr, 1);
|
__task_delta_exec(curr, 1);
|
||||||
|
|
||||||
|
perf_counter_task_sched_out(curr, cpu);
|
||||||
|
|
||||||
spin_lock(&ctx->lock);
|
spin_lock(&ctx->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -538,19 +573,18 @@ int perf_counter_task_enable(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
/*
|
||||||
|
* Round-robin a context's counters:
|
||||||
|
*/
|
||||||
|
static void rotate_ctx(struct perf_counter_context *ctx)
|
||||||
{
|
{
|
||||||
struct perf_counter_context *ctx = &curr->perf_counter_ctx;
|
|
||||||
struct perf_counter *counter;
|
struct perf_counter *counter;
|
||||||
u64 perf_flags;
|
u64 perf_flags;
|
||||||
|
|
||||||
if (likely(!ctx->nr_counters))
|
if (!ctx->nr_counters)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
perf_counter_task_sched_out(curr, cpu);
|
|
||||||
|
|
||||||
spin_lock(&ctx->lock);
|
spin_lock(&ctx->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Rotate the first entry last (works just fine for group counters too):
|
* Rotate the first entry last (works just fine for group counters too):
|
||||||
*/
|
*/
|
||||||
@@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
|||||||
hw_perf_restore(perf_flags);
|
hw_perf_restore(perf_flags);
|
||||||
|
|
||||||
spin_unlock(&ctx->lock);
|
spin_unlock(&ctx->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
||||||
|
{
|
||||||
|
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
||||||
|
struct perf_counter_context *ctx = &curr->perf_counter_ctx;
|
||||||
|
const int rotate_percpu = 0;
|
||||||
|
|
||||||
|
if (rotate_percpu)
|
||||||
|
perf_counter_cpu_sched_out(cpuctx);
|
||||||
|
perf_counter_task_sched_out(curr, cpu);
|
||||||
|
|
||||||
|
if (rotate_percpu)
|
||||||
|
rotate_ctx(&cpuctx->ctx);
|
||||||
|
rotate_ctx(ctx);
|
||||||
|
|
||||||
|
if (rotate_percpu)
|
||||||
|
perf_counter_cpu_sched_in(cpuctx, cpu);
|
||||||
perf_counter_task_sched_in(curr, cpu);
|
perf_counter_task_sched_in(curr, cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
|
|||||||
struct task_struct *curr = counter->task;
|
struct task_struct *curr = counter->task;
|
||||||
u64 delta;
|
u64 delta;
|
||||||
|
|
||||||
WARN_ON_ONCE(counter->task != current);
|
|
||||||
|
|
||||||
delta = __task_delta_exec(curr, update);
|
delta = __task_delta_exec(curr, update);
|
||||||
|
|
||||||
return curr->se.sum_exec_runtime + delta;
|
return curr->se.sum_exec_runtime + delta;
|
||||||
@@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
|||||||
counter->group_leader = group_leader;
|
counter->group_leader = group_leader;
|
||||||
counter->hw_ops = NULL;
|
counter->hw_ops = NULL;
|
||||||
|
|
||||||
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||||
if (hw_event->disabled)
|
if (hw_event->disabled)
|
||||||
counter->state = PERF_COUNTER_STATE_OFF;
|
counter->state = PERF_COUNTER_STATE_OFF;
|
||||||
|
|
||||||
@@ -1331,36 +1381,50 @@ __perf_counter_exit_task(struct task_struct *child,
|
|||||||
{
|
{
|
||||||
struct perf_counter *parent_counter;
|
struct perf_counter *parent_counter;
|
||||||
u64 parent_val, child_val;
|
u64 parent_val, child_val;
|
||||||
unsigned long flags;
|
|
||||||
u64 perf_flags;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Disable and unlink this counter.
|
* If we do not self-reap then we have to wait for the
|
||||||
*
|
* child task to unschedule (it will happen for sure),
|
||||||
* Be careful about zapping the list - IRQ/NMI context
|
* so that its counter is at its final count. (This
|
||||||
* could still be processing it:
|
* condition triggers rarely - child tasks usually get
|
||||||
|
* off their CPU before the parent has a chance to
|
||||||
|
* get this far into the reaping action)
|
||||||
*/
|
*/
|
||||||
curr_rq_lock_irq_save(&flags);
|
if (child != current) {
|
||||||
perf_flags = hw_perf_save_disable();
|
wait_task_inactive(child, 0);
|
||||||
|
list_del_init(&child_counter->list_entry);
|
||||||
if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
} else {
|
||||||
struct perf_cpu_context *cpuctx;
|
struct perf_cpu_context *cpuctx;
|
||||||
|
unsigned long flags;
|
||||||
|
u64 perf_flags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable and unlink this counter.
|
||||||
|
*
|
||||||
|
* Be careful about zapping the list - IRQ/NMI context
|
||||||
|
* could still be processing it:
|
||||||
|
*/
|
||||||
|
curr_rq_lock_irq_save(&flags);
|
||||||
|
perf_flags = hw_perf_save_disable();
|
||||||
|
|
||||||
cpuctx = &__get_cpu_var(perf_cpu_context);
|
cpuctx = &__get_cpu_var(perf_cpu_context);
|
||||||
|
|
||||||
child_counter->hw_ops->disable(child_counter);
|
if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
||||||
child_counter->state = PERF_COUNTER_STATE_INACTIVE;
|
child_counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||||
child_counter->oncpu = -1;
|
child_counter->hw_ops->disable(child_counter);
|
||||||
|
cpuctx->active_oncpu--;
|
||||||
|
child_ctx->nr_active--;
|
||||||
|
child_counter->oncpu = -1;
|
||||||
|
}
|
||||||
|
|
||||||
cpuctx->active_oncpu--;
|
list_del_init(&child_counter->list_entry);
|
||||||
child_ctx->nr_active--;
|
|
||||||
|
child_ctx->nr_counters--;
|
||||||
|
|
||||||
|
hw_perf_restore(perf_flags);
|
||||||
|
curr_rq_unlock_irq_restore(&flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_del_init(&child_counter->list_entry);
|
|
||||||
|
|
||||||
hw_perf_restore(perf_flags);
|
|
||||||
curr_rq_unlock_irq_restore(&flags);
|
|
||||||
|
|
||||||
parent_counter = child_counter->parent;
|
parent_counter = child_counter->parent;
|
||||||
/*
|
/*
|
||||||
* It can happen that parent exits first, and has counters
|
* It can happen that parent exits first, and has counters
|
||||||
|
Reference in New Issue
Block a user