irq_work: Add generic hardirq context callbacks

Provide a mechanism that allows running code in IRQ context. It is
most useful for NMI code that needs to interact with the rest of the
system -- like wakeup a task to drain buffers.

Perf currently has such a mechanism, so extract that and provide it as
a generic feature, independent of perf so that others may also
benefit.

The IRQ context callback is generated through self-IPIs where
possible, or on architectures like powerpc the decrementer (the
built-in timer facility) is set to generate an interrupt immediately.

Architectures that don't have anything like this get to do with a
callback from the timer tick. These architectures can call
irq_work_run() at the tail of any IRQ handlers that might enqueue such
work (like the perf IRQ handler) to avoid undue latencies in
processing the work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
[ various fixes ]
Signed-off-by: Huang Ying <ying.huang@intel.com>
LKML-Reference: <1287036094.7768.291.camel@yhuang-dev>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra
2010-10-14 14:01:34 +08:00
committed by Ingo Molnar
parent 8e5fc1a732
commit e360adbe29
39 changed files with 311 additions and 242 deletions

View File

@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head)
kfree(event);
}
static void perf_pending_sync(struct perf_event *event);
static void perf_buffer_put(struct perf_buffer *buffer);
static void free_event(struct perf_event *event)
{
perf_pending_sync(event);
irq_work_sync(&event->pending);
if (!event->parent) {
atomic_dec(&nr_events);
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event)
}
}
/*
* Pending wakeups
*
* Handle the case where we need to wakeup up from NMI (or rq->lock) context.
*
* The NMI bit means we cannot possibly take locks. Therefore, maintain a
* single linked list and use cmpxchg() to add entries lockless.
*/
static void perf_pending_event(struct perf_pending_entry *entry)
static void perf_pending_event(struct irq_work *entry)
{
struct perf_event *event = container_of(entry,
struct perf_event, pending);
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
}
}
#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
PENDING_TAIL,
};
static void perf_pending_queue(struct perf_pending_entry *entry,
void (*func)(struct perf_pending_entry *))
{
struct perf_pending_entry **head;
if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
return;
entry->func = func;
head = &get_cpu_var(perf_pending_head);
do {
entry->next = *head;
} while (cmpxchg(head, entry->next, entry) != entry->next);
set_perf_event_pending();
put_cpu_var(perf_pending_head);
}
static int __perf_pending_run(void)
{
struct perf_pending_entry *list;
int nr = 0;
list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
while (list != PENDING_TAIL) {
void (*func)(struct perf_pending_entry *);
struct perf_pending_entry *entry = list;
list = list->next;
func = entry->func;
entry->next = NULL;
/*
* Ensure we observe the unqueue before we issue the wakeup,
* so that we won't be waiting forever.
* -- see perf_not_pending().
*/
smp_wmb();
func(entry);
nr++;
}
return nr;
}
static inline int perf_not_pending(struct perf_event *event)
{
/*
* If we flush on whatever cpu we run, there is a chance we don't
* need to wait.
*/
get_cpu();
__perf_pending_run();
put_cpu();
/*
* Ensure we see the proper queue state before going to sleep
* so that we do not miss the wakeup. -- see perf_pending_handle()
*/
smp_rmb();
return event->pending.next == NULL;
}
static void perf_pending_sync(struct perf_event *event)
{
wait_event(event->waitq, perf_not_pending(event));
}
void perf_event_do_pending(void)
{
__perf_pending_run();
}
/*
* We assume there is only KVM supporting the callbacks.
* Later on, we might change it to a list if there is
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
if (handle->nmi) {
handle->event->pending_wakeup = 1;
perf_pending_queue(&handle->event->pending,
perf_pending_event);
irq_work_queue(&handle->event->pending);
} else
perf_event_wakeup(handle->event);
}
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
event->pending_kill = POLL_HUP;
if (nmi) {
event->pending_disable = 1;
perf_pending_queue(&event->pending,
perf_pending_event);
irq_work_queue(&event->pending);
} else
perf_event_disable(event);
}
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
INIT_LIST_HEAD(&event->event_entry);
INIT_LIST_HEAD(&event->sibling_list);
init_waitqueue_head(&event->waitq);
init_irq_work(&event->pending, perf_pending_event);
mutex_init(&event->mmap_mutex);