irq_work: Add generic hardirq context callbacks
Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [ various fixes ] Signed-off-by: Huang Ying <ying.huang@intel.com> LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
committed by
Ingo Molnar
parent
8e5fc1a732
commit
e360adbe29
@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head)
|
||||
kfree(event);
|
||||
}
|
||||
|
||||
static void perf_pending_sync(struct perf_event *event);
|
||||
static void perf_buffer_put(struct perf_buffer *buffer);
|
||||
|
||||
static void free_event(struct perf_event *event)
|
||||
{
|
||||
perf_pending_sync(event);
|
||||
irq_work_sync(&event->pending);
|
||||
|
||||
if (!event->parent) {
|
||||
atomic_dec(&nr_events);
|
||||
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Pending wakeups
|
||||
*
|
||||
* Handle the case where we need to wakeup up from NMI (or rq->lock) context.
|
||||
*
|
||||
* The NMI bit means we cannot possibly take locks. Therefore, maintain a
|
||||
* single linked list and use cmpxchg() to add entries lockless.
|
||||
*/
|
||||
|
||||
static void perf_pending_event(struct perf_pending_entry *entry)
|
||||
static void perf_pending_event(struct irq_work *entry)
|
||||
{
|
||||
struct perf_event *event = container_of(entry,
|
||||
struct perf_event, pending);
|
||||
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
|
||||
}
|
||||
}
|
||||
|
||||
#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
|
||||
|
||||
static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
|
||||
PENDING_TAIL,
|
||||
};
|
||||
|
||||
static void perf_pending_queue(struct perf_pending_entry *entry,
|
||||
void (*func)(struct perf_pending_entry *))
|
||||
{
|
||||
struct perf_pending_entry **head;
|
||||
|
||||
if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
|
||||
return;
|
||||
|
||||
entry->func = func;
|
||||
|
||||
head = &get_cpu_var(perf_pending_head);
|
||||
|
||||
do {
|
||||
entry->next = *head;
|
||||
} while (cmpxchg(head, entry->next, entry) != entry->next);
|
||||
|
||||
set_perf_event_pending();
|
||||
|
||||
put_cpu_var(perf_pending_head);
|
||||
}
|
||||
|
||||
static int __perf_pending_run(void)
|
||||
{
|
||||
struct perf_pending_entry *list;
|
||||
int nr = 0;
|
||||
|
||||
list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
|
||||
while (list != PENDING_TAIL) {
|
||||
void (*func)(struct perf_pending_entry *);
|
||||
struct perf_pending_entry *entry = list;
|
||||
|
||||
list = list->next;
|
||||
|
||||
func = entry->func;
|
||||
entry->next = NULL;
|
||||
/*
|
||||
* Ensure we observe the unqueue before we issue the wakeup,
|
||||
* so that we won't be waiting forever.
|
||||
* -- see perf_not_pending().
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
func(entry);
|
||||
nr++;
|
||||
}
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
static inline int perf_not_pending(struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* If we flush on whatever cpu we run, there is a chance we don't
|
||||
* need to wait.
|
||||
*/
|
||||
get_cpu();
|
||||
__perf_pending_run();
|
||||
put_cpu();
|
||||
|
||||
/*
|
||||
* Ensure we see the proper queue state before going to sleep
|
||||
* so that we do not miss the wakeup. -- see perf_pending_handle()
|
||||
*/
|
||||
smp_rmb();
|
||||
return event->pending.next == NULL;
|
||||
}
|
||||
|
||||
static void perf_pending_sync(struct perf_event *event)
|
||||
{
|
||||
wait_event(event->waitq, perf_not_pending(event));
|
||||
}
|
||||
|
||||
void perf_event_do_pending(void)
|
||||
{
|
||||
__perf_pending_run();
|
||||
}
|
||||
|
||||
/*
|
||||
* We assume there is only KVM supporting the callbacks.
|
||||
* Later on, we might change it to a list if there is
|
||||
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
|
||||
|
||||
if (handle->nmi) {
|
||||
handle->event->pending_wakeup = 1;
|
||||
perf_pending_queue(&handle->event->pending,
|
||||
perf_pending_event);
|
||||
irq_work_queue(&handle->event->pending);
|
||||
} else
|
||||
perf_event_wakeup(handle->event);
|
||||
}
|
||||
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
|
||||
event->pending_kill = POLL_HUP;
|
||||
if (nmi) {
|
||||
event->pending_disable = 1;
|
||||
perf_pending_queue(&event->pending,
|
||||
perf_pending_event);
|
||||
irq_work_queue(&event->pending);
|
||||
} else
|
||||
perf_event_disable(event);
|
||||
}
|
||||
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
INIT_LIST_HEAD(&event->event_entry);
|
||||
INIT_LIST_HEAD(&event->sibling_list);
|
||||
init_waitqueue_head(&event->waitq);
|
||||
init_irq_work(&event->pending, perf_pending_event);
|
||||
|
||||
mutex_init(&event->mmap_mutex);
|
||||
|
||||
|
Reference in New Issue
Block a user