Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits) perf python scripting: Add futex-contention script perf python scripting: Fixup cut'n'paste error in sctop script perf scripting: Shut up 'perf record' final status perf record: Remove newline character from perror() argument perf python scripting: Support fedora 11 (audit 1.7.17) perf python scripting: Improve the syscalls-by-pid script perf python scripting: print the syscall name on sctop perf python scripting: Improve the syscalls-counts script perf python scripting: Improve the failed-syscalls-by-pid script kprobes: Remove redundant text_mutex lock in optimize x86/oprofile: Fix uninitialized variable use in debug printk tracing: Fix 'faild' -> 'failed' typo perf probe: Fix format specified for Dwarf_Off parameter perf trace: Fix detection of script extension perf trace: Use $PERF_EXEC_PATH in canned report scripts perf tools: Document event modifiers perf tools: Remove direct slang.h include perf_events: Fix for transaction recovery in group_sched_in() perf_events: Revert: Fix transaction recovery in group_sched_in() perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations ...
This commit is contained in:
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
|
||||
/* NOTE: change this value only with kprobe_mutex held */
|
||||
static bool kprobes_all_disarmed;
|
||||
|
||||
static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
|
||||
/* This protects kprobe_table and optimizing_list */
|
||||
static DEFINE_MUTEX(kprobe_mutex);
|
||||
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
|
||||
static struct {
|
||||
spinlock_t lock ____cacheline_aligned_in_smp;
|
||||
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/* This should be called with kprobe_mutex locked */
|
||||
static void __kprobes optimize_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)
|
||||
return;
|
||||
|
||||
kprobes_allow_optimization = true;
|
||||
mutex_lock(&text_mutex);
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
hlist_for_each_entry_rcu(p, node, head, hlist)
|
||||
if (!kprobe_disabled(p))
|
||||
optimize_kprobe(p);
|
||||
}
|
||||
mutex_unlock(&text_mutex);
|
||||
printk(KERN_INFO "Kprobes globally optimized\n");
|
||||
}
|
||||
|
||||
/* This should be called with kprobe_mutex locked */
|
||||
static void __kprobes unoptimize_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
|
@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)
|
||||
return event->cpu == -1 || event->cpu == smp_processor_id();
|
||||
}
|
||||
|
||||
static int
|
||||
__event_sched_out(struct perf_event *event,
|
||||
static void
|
||||
event_sched_out(struct perf_event *event,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,
|
||||
}
|
||||
|
||||
if (event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
return 0;
|
||||
return;
|
||||
|
||||
event->state = PERF_EVENT_STATE_INACTIVE;
|
||||
if (event->pending_disable) {
|
||||
event->pending_disable = 0;
|
||||
event->state = PERF_EVENT_STATE_OFF;
|
||||
}
|
||||
event->tstamp_stopped = ctx->time;
|
||||
event->pmu->del(event, 0);
|
||||
event->oncpu = -1;
|
||||
|
||||
@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,
|
||||
ctx->nr_active--;
|
||||
if (event->attr.exclusive || !cpuctx->active_oncpu)
|
||||
cpuctx->exclusive = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
event_sched_out(struct perf_event *event,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __event_sched_out(event, cpuctx, ctx);
|
||||
if (ret)
|
||||
event->tstamp_stopped = ctx->time;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -664,7 +652,7 @@ retry:
|
||||
}
|
||||
|
||||
static int
|
||||
__event_sched_in(struct perf_event *event,
|
||||
event_sched_in(struct perf_event *event,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
@@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event,
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
event->tstamp_running += ctx->time - event->tstamp_stopped;
|
||||
|
||||
if (!is_software_event(event))
|
||||
cpuctx->active_oncpu++;
|
||||
ctx->nr_active++;
|
||||
@@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
event_sched_in(struct perf_event *event,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
int ret = __event_sched_in(event, cpuctx, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
event->tstamp_running += ctx->time - event->tstamp_stopped;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
group_commit_event_sched_in(struct perf_event *group_event,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
struct perf_event *event;
|
||||
u64 now = ctx->time;
|
||||
|
||||
group_event->tstamp_running += now - group_event->tstamp_stopped;
|
||||
/*
|
||||
* Schedule in siblings as one group (if any):
|
||||
*/
|
||||
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
|
||||
event->tstamp_running += now - event->tstamp_stopped;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
group_sched_in(struct perf_event *group_event,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
@@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event,
|
||||
{
|
||||
struct perf_event *event, *partial_group = NULL;
|
||||
struct pmu *pmu = group_event->pmu;
|
||||
u64 now = ctx->time;
|
||||
bool simulate = false;
|
||||
|
||||
if (group_event->state == PERF_EVENT_STATE_OFF)
|
||||
return 0;
|
||||
|
||||
pmu->start_txn(pmu);
|
||||
|
||||
/*
|
||||
* use __event_sched_in() to delay updating tstamp_running
|
||||
* until the transaction is committed. In case of failure
|
||||
* we will keep an unmodified tstamp_running which is a
|
||||
* requirement to get correct timing information
|
||||
*/
|
||||
if (__event_sched_in(group_event, cpuctx, ctx)) {
|
||||
if (event_sched_in(group_event, cpuctx, ctx)) {
|
||||
pmu->cancel_txn(pmu);
|
||||
return -EAGAIN;
|
||||
}
|
||||
@@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event,
|
||||
* Schedule in siblings as one group (if any):
|
||||
*/
|
||||
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
|
||||
if (__event_sched_in(event, cpuctx, ctx)) {
|
||||
if (event_sched_in(event, cpuctx, ctx)) {
|
||||
partial_group = event;
|
||||
goto group_error;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pmu->commit_txn(pmu)) {
|
||||
/* commit tstamp_running */
|
||||
group_commit_event_sched_in(group_event, cpuctx, ctx);
|
||||
if (!pmu->commit_txn(pmu))
|
||||
return 0;
|
||||
}
|
||||
|
||||
group_error:
|
||||
/*
|
||||
* Groups can be scheduled in as one unit only, so undo any
|
||||
* partial group before returning:
|
||||
* The events up to the failed event are scheduled out normally,
|
||||
* tstamp_stopped will be updated.
|
||||
*
|
||||
* use __event_sched_out() to avoid updating tstamp_stopped
|
||||
* because the event never actually ran
|
||||
* The failed events and the remaining siblings need to have
|
||||
* their timings updated as if they had gone thru event_sched_in()
|
||||
* and event_sched_out(). This is required to get consistent timings
|
||||
* across the group. This also takes care of the case where the group
|
||||
* could never be scheduled by ensuring tstamp_stopped is set to mark
|
||||
* the time the event was actually stopped, such that time delta
|
||||
* calculation in update_event_times() is correct.
|
||||
*/
|
||||
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
|
||||
if (event == partial_group)
|
||||
break;
|
||||
__event_sched_out(event, cpuctx, ctx);
|
||||
simulate = true;
|
||||
|
||||
if (simulate) {
|
||||
event->tstamp_running += now - event->tstamp_stopped;
|
||||
event->tstamp_stopped = now;
|
||||
} else {
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
}
|
||||
}
|
||||
__event_sched_out(group_event, cpuctx, ctx);
|
||||
event_sched_out(group_event, cpuctx, ctx);
|
||||
|
||||
pmu->cancel_txn(pmu);
|
||||
|
||||
|
@@ -229,18 +229,20 @@ restart:
|
||||
|
||||
do {
|
||||
if (pending & 1) {
|
||||
unsigned int vec_nr = h - softirq_vec;
|
||||
int prev_count = preempt_count();
|
||||
kstat_incr_softirqs_this_cpu(h - softirq_vec);
|
||||
|
||||
trace_softirq_entry(h, softirq_vec);
|
||||
kstat_incr_softirqs_this_cpu(vec_nr);
|
||||
|
||||
trace_softirq_entry(vec_nr);
|
||||
h->action(h);
|
||||
trace_softirq_exit(h, softirq_vec);
|
||||
trace_softirq_exit(vec_nr);
|
||||
if (unlikely(prev_count != preempt_count())) {
|
||||
printk(KERN_ERR "huh, entered softirq %td %s %p"
|
||||
printk(KERN_ERR "huh, entered softirq %u %s %p"
|
||||
"with preempt_count %08x,"
|
||||
" exited with %08x?\n", h - softirq_vec,
|
||||
softirq_to_name[h - softirq_vec],
|
||||
h->action, prev_count, preempt_count());
|
||||
" exited with %08x?\n", vec_nr,
|
||||
softirq_to_name[vec_nr], h->action,
|
||||
prev_count, preempt_count());
|
||||
preempt_count() = prev_count;
|
||||
}
|
||||
|
||||
|
@@ -224,6 +224,9 @@ enum {
|
||||
RB_LEN_TIME_STAMP = 16,
|
||||
};
|
||||
|
||||
#define skip_time_extend(event) \
|
||||
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
|
||||
|
||||
static inline int rb_null_event(struct ring_buffer_event *event)
|
||||
{
|
||||
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
|
||||
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
|
||||
return length + RB_EVNT_HDR_SIZE;
|
||||
}
|
||||
|
||||
/* inline for ring buffer fast paths */
|
||||
static unsigned
|
||||
/*
|
||||
* Return the length of the given event. Will return
|
||||
* the length of the time extend if the event is a
|
||||
* time extend.
|
||||
*/
|
||||
static inline unsigned
|
||||
rb_event_length(struct ring_buffer_event *event)
|
||||
{
|
||||
switch (event->type_len) {
|
||||
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return total length of time extend and data,
|
||||
* or just the event length for all other events.
|
||||
*/
|
||||
static inline unsigned
|
||||
rb_event_ts_length(struct ring_buffer_event *event)
|
||||
{
|
||||
unsigned len = 0;
|
||||
|
||||
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
|
||||
/* time extends include the data event after it */
|
||||
len = RB_LEN_TIME_EXTEND;
|
||||
event = skip_time_extend(event);
|
||||
}
|
||||
return len + rb_event_length(event);
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_event_length - return the length of the event
|
||||
* @event: the event to get the length of
|
||||
*
|
||||
* Returns the size of the data load of a data event.
|
||||
* If the event is something other than a data event, it
|
||||
* returns the size of the event itself. With the exception
|
||||
* of a TIME EXTEND, where it still returns the size of the
|
||||
* data load of the data event after it.
|
||||
*/
|
||||
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
|
||||
{
|
||||
unsigned length = rb_event_length(event);
|
||||
unsigned length;
|
||||
|
||||
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
|
||||
event = skip_time_extend(event);
|
||||
|
||||
length = rb_event_length(event);
|
||||
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
|
||||
return length;
|
||||
length -= RB_EVNT_HDR_SIZE;
|
||||
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
|
||||
static void *
|
||||
rb_event_data(struct ring_buffer_event *event)
|
||||
{
|
||||
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
|
||||
event = skip_time_extend(event);
|
||||
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
|
||||
/* If length is in len field, then array[0] has the data */
|
||||
if (event->type_len)
|
||||
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
|
||||
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
|
||||
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
|
||||
|
||||
/* Max number of timestamps that can fit on a page */
|
||||
#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
|
||||
|
||||
int ring_buffer_print_page_header(struct trace_seq *s)
|
||||
{
|
||||
struct buffer_data_page field;
|
||||
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
|
||||
iter->head = 0;
|
||||
}
|
||||
|
||||
/* Slow path, do not inline */
|
||||
static noinline struct ring_buffer_event *
|
||||
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
|
||||
{
|
||||
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
|
||||
|
||||
/* Not the first event on the page? */
|
||||
if (rb_event_index(event)) {
|
||||
event->time_delta = delta & TS_MASK;
|
||||
event->array[0] = delta >> TS_SHIFT;
|
||||
} else {
|
||||
/* nope, just zero it */
|
||||
event->time_delta = 0;
|
||||
event->array[0] = 0;
|
||||
}
|
||||
|
||||
return skip_time_extend(event);
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_update_event - update event type and data
|
||||
* @event: the even to update
|
||||
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
|
||||
* data field.
|
||||
*/
|
||||
static void
|
||||
rb_update_event(struct ring_buffer_event *event,
|
||||
unsigned type, unsigned length)
|
||||
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
struct ring_buffer_event *event, unsigned length,
|
||||
int add_timestamp, u64 delta)
|
||||
{
|
||||
event->type_len = type;
|
||||
/* Only a commit updates the timestamp */
|
||||
if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
|
||||
delta = 0;
|
||||
|
||||
switch (type) {
|
||||
|
||||
case RINGBUF_TYPE_PADDING:
|
||||
case RINGBUF_TYPE_TIME_EXTEND:
|
||||
case RINGBUF_TYPE_TIME_STAMP:
|
||||
break;
|
||||
|
||||
case 0:
|
||||
length -= RB_EVNT_HDR_SIZE;
|
||||
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
|
||||
event->array[0] = length;
|
||||
else
|
||||
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
/*
|
||||
* If we need to add a timestamp, then we
|
||||
* add it to the start of the resevered space.
|
||||
*/
|
||||
if (unlikely(add_timestamp)) {
|
||||
event = rb_add_time_stamp(event, delta);
|
||||
length -= RB_LEN_TIME_EXTEND;
|
||||
delta = 0;
|
||||
}
|
||||
|
||||
event->time_delta = delta;
|
||||
length -= RB_EVNT_HDR_SIZE;
|
||||
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
|
||||
event->type_len = 0;
|
||||
event->array[0] = length;
|
||||
} else
|
||||
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
local_sub(length, &tail_page->write);
|
||||
}
|
||||
|
||||
static struct ring_buffer_event *
|
||||
/*
|
||||
* This is the slow path, force gcc not to inline it.
|
||||
*/
|
||||
static noinline struct ring_buffer_event *
|
||||
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
unsigned long length, unsigned long tail,
|
||||
struct buffer_page *tail_page, u64 *ts)
|
||||
struct buffer_page *tail_page, u64 ts)
|
||||
{
|
||||
struct buffer_page *commit_page = cpu_buffer->commit_page;
|
||||
struct ring_buffer *buffer = cpu_buffer->buffer;
|
||||
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
* Nested commits always have zero deltas, so
|
||||
* just reread the time stamp
|
||||
*/
|
||||
*ts = rb_time_stamp(buffer);
|
||||
next_page->page->time_stamp = *ts;
|
||||
ts = rb_time_stamp(buffer);
|
||||
next_page->page->time_stamp = ts;
|
||||
}
|
||||
|
||||
out_again:
|
||||
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
|
||||
static struct ring_buffer_event *
|
||||
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
unsigned type, unsigned long length, u64 *ts)
|
||||
unsigned long length, u64 ts,
|
||||
u64 delta, int add_timestamp)
|
||||
{
|
||||
struct buffer_page *tail_page;
|
||||
struct ring_buffer_event *event;
|
||||
unsigned long tail, write;
|
||||
|
||||
/*
|
||||
* If the time delta since the last event is too big to
|
||||
* hold in the time field of the event, then we append a
|
||||
* TIME EXTEND event ahead of the data event.
|
||||
*/
|
||||
if (unlikely(add_timestamp))
|
||||
length += RB_LEN_TIME_EXTEND;
|
||||
|
||||
tail_page = cpu_buffer->tail_page;
|
||||
write = local_add_return(length, &tail_page->write);
|
||||
|
||||
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
tail = write - length;
|
||||
|
||||
/* See if we shot pass the end of this buffer page */
|
||||
if (write > BUF_PAGE_SIZE)
|
||||
if (unlikely(write > BUF_PAGE_SIZE))
|
||||
return rb_move_tail(cpu_buffer, length, tail,
|
||||
tail_page, ts);
|
||||
|
||||
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
|
||||
event = __rb_page_index(tail_page, tail);
|
||||
kmemcheck_annotate_bitfield(event, bitfield);
|
||||
rb_update_event(event, type, length);
|
||||
rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
|
||||
|
||||
/* The passed in type is zero for DATA */
|
||||
if (likely(!type))
|
||||
local_inc(&tail_page->entries);
|
||||
local_inc(&tail_page->entries);
|
||||
|
||||
/*
|
||||
* If this is the first commit on the page, then update
|
||||
* its timestamp.
|
||||
*/
|
||||
if (!tail)
|
||||
tail_page->page->time_stamp = *ts;
|
||||
tail_page->page->time_stamp = ts;
|
||||
|
||||
return event;
|
||||
}
|
||||
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
unsigned long addr;
|
||||
|
||||
new_index = rb_event_index(event);
|
||||
old_index = new_index + rb_event_length(event);
|
||||
old_index = new_index + rb_event_ts_length(event);
|
||||
addr = (unsigned long)event;
|
||||
addr &= PAGE_MASK;
|
||||
|
||||
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
u64 *ts, u64 *delta)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
int ret;
|
||||
|
||||
WARN_ONCE(*delta > (1ULL << 59),
|
||||
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
|
||||
(unsigned long long)*delta,
|
||||
(unsigned long long)*ts,
|
||||
(unsigned long long)cpu_buffer->write_stamp);
|
||||
|
||||
/*
|
||||
* The delta is too big, we to add a
|
||||
* new timestamp.
|
||||
*/
|
||||
event = __rb_reserve_next(cpu_buffer,
|
||||
RINGBUF_TYPE_TIME_EXTEND,
|
||||
RB_LEN_TIME_EXTEND,
|
||||
ts);
|
||||
if (!event)
|
||||
return -EBUSY;
|
||||
|
||||
if (PTR_ERR(event) == -EAGAIN)
|
||||
return -EAGAIN;
|
||||
|
||||
/* Only a commited time event can update the write stamp */
|
||||
if (rb_event_is_commit(cpu_buffer, event)) {
|
||||
/*
|
||||
* If this is the first on the page, then it was
|
||||
* updated with the page itself. Try to discard it
|
||||
* and if we can't just make it zero.
|
||||
*/
|
||||
if (rb_event_index(event)) {
|
||||
event->time_delta = *delta & TS_MASK;
|
||||
event->array[0] = *delta >> TS_SHIFT;
|
||||
} else {
|
||||
/* try to discard, since we do not need this */
|
||||
if (!rb_try_to_discard(cpu_buffer, event)) {
|
||||
/* nope, just zero it */
|
||||
event->time_delta = 0;
|
||||
event->array[0] = 0;
|
||||
}
|
||||
}
|
||||
cpu_buffer->write_stamp = *ts;
|
||||
/* let the caller know this was the commit */
|
||||
ret = 1;
|
||||
} else {
|
||||
/* Try to discard the event */
|
||||
if (!rb_try_to_discard(cpu_buffer, event)) {
|
||||
/* Darn, this is just wasted space */
|
||||
event->time_delta = 0;
|
||||
event->array[0] = 0;
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
*delta = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
local_inc(&cpu_buffer->committing);
|
||||
local_inc(&cpu_buffer->commits);
|
||||
}
|
||||
|
||||
static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
unsigned long commits;
|
||||
|
||||
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
|
||||
unsigned long length)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
u64 ts, delta = 0;
|
||||
int commit = 0;
|
||||
u64 ts, delta;
|
||||
int nr_loops = 0;
|
||||
int add_timestamp;
|
||||
u64 diff;
|
||||
|
||||
rb_start_commit(cpu_buffer);
|
||||
|
||||
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
|
||||
|
||||
length = rb_calculate_event_length(length);
|
||||
again:
|
||||
add_timestamp = 0;
|
||||
delta = 0;
|
||||
|
||||
/*
|
||||
* We allow for interrupts to reenter here and do a trace.
|
||||
* If one does, it will cause this original code to loop
|
||||
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
|
||||
goto out_fail;
|
||||
|
||||
ts = rb_time_stamp(cpu_buffer->buffer);
|
||||
diff = ts - cpu_buffer->write_stamp;
|
||||
|
||||
/*
|
||||
* Only the first commit can update the timestamp.
|
||||
* Yes there is a race here. If an interrupt comes in
|
||||
* just after the conditional and it traces too, then it
|
||||
* will also check the deltas. More than one timestamp may
|
||||
* also be made. But only the entry that did the actual
|
||||
* commit will be something other than zero.
|
||||
*/
|
||||
if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
|
||||
rb_page_write(cpu_buffer->tail_page) ==
|
||||
rb_commit_index(cpu_buffer))) {
|
||||
u64 diff;
|
||||
|
||||
diff = ts - cpu_buffer->write_stamp;
|
||||
|
||||
/* make sure this diff is calculated here */
|
||||
barrier();
|
||||
|
||||
/* Did the write stamp get updated already? */
|
||||
if (unlikely(ts < cpu_buffer->write_stamp))
|
||||
goto get_event;
|
||||
/* make sure this diff is calculated here */
|
||||
barrier();
|
||||
|
||||
/* Did the write stamp get updated already? */
|
||||
if (likely(ts >= cpu_buffer->write_stamp)) {
|
||||
delta = diff;
|
||||
if (unlikely(test_time_stamp(delta))) {
|
||||
|
||||
commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
|
||||
if (commit == -EBUSY)
|
||||
goto out_fail;
|
||||
|
||||
if (commit == -EAGAIN)
|
||||
goto again;
|
||||
|
||||
RB_WARN_ON(cpu_buffer, commit < 0);
|
||||
WARN_ONCE(delta > (1ULL << 59),
|
||||
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
|
||||
(unsigned long long)delta,
|
||||
(unsigned long long)ts,
|
||||
(unsigned long long)cpu_buffer->write_stamp);
|
||||
add_timestamp = 1;
|
||||
}
|
||||
}
|
||||
|
||||
get_event:
|
||||
event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
|
||||
event = __rb_reserve_next(cpu_buffer, length, ts,
|
||||
delta, add_timestamp);
|
||||
if (unlikely(PTR_ERR(event) == -EAGAIN))
|
||||
goto again;
|
||||
|
||||
if (!event)
|
||||
goto out_fail;
|
||||
|
||||
if (!rb_event_is_commit(cpu_buffer, event))
|
||||
delta = 0;
|
||||
|
||||
event->time_delta = delta;
|
||||
|
||||
return event;
|
||||
|
||||
out_fail:
|
||||
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
|
||||
|
||||
#define TRACE_RECURSIVE_DEPTH 16
|
||||
|
||||
static int trace_recursive_lock(void)
|
||||
/* Keep this code out of the fast path cache */
|
||||
static noinline void trace_recursive_fail(void)
|
||||
{
|
||||
current->trace_recursion++;
|
||||
|
||||
if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
|
||||
return 0;
|
||||
|
||||
/* Disable all tracing before we do anything else */
|
||||
tracing_off_permanent();
|
||||
|
||||
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
|
||||
in_nmi());
|
||||
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
static inline int trace_recursive_lock(void)
|
||||
{
|
||||
current->trace_recursion++;
|
||||
|
||||
if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
|
||||
return 0;
|
||||
|
||||
trace_recursive_fail();
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void trace_recursive_unlock(void)
|
||||
static inline void trace_recursive_unlock(void)
|
||||
{
|
||||
WARN_ON_ONCE(!current->trace_recursion);
|
||||
|
||||
@@ -2308,12 +2298,28 @@ static void
|
||||
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
struct ring_buffer_event *event)
|
||||
{
|
||||
u64 delta;
|
||||
|
||||
/*
|
||||
* The event first in the commit queue updates the
|
||||
* time stamp.
|
||||
*/
|
||||
if (rb_event_is_commit(cpu_buffer, event))
|
||||
cpu_buffer->write_stamp += event->time_delta;
|
||||
if (rb_event_is_commit(cpu_buffer, event)) {
|
||||
/*
|
||||
* A commit event that is first on a page
|
||||
* updates the write timestamp with the page stamp
|
||||
*/
|
||||
if (!rb_event_index(event))
|
||||
cpu_buffer->write_stamp =
|
||||
cpu_buffer->commit_page->page->time_stamp;
|
||||
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
|
||||
delta = event->array[0];
|
||||
delta <<= TS_SHIFT;
|
||||
delta += event->time_delta;
|
||||
cpu_buffer->write_stamp += delta;
|
||||
} else
|
||||
cpu_buffer->write_stamp += event->time_delta;
|
||||
}
|
||||
}
|
||||
|
||||
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
|
||||
|
||||
static inline void rb_event_discard(struct ring_buffer_event *event)
|
||||
{
|
||||
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
|
||||
event = skip_time_extend(event);
|
||||
|
||||
/* array[0] holds the actual length for the discarded event */
|
||||
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
|
||||
event->type_len = RINGBUF_TYPE_PADDING;
|
||||
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
|
||||
|
||||
again:
|
||||
/*
|
||||
* We repeat when a timestamp is encountered. It is possible
|
||||
* to get multiple timestamps from an interrupt entering just
|
||||
* as one timestamp is about to be written, or from discarded
|
||||
* commits. The most that we can have is the number on a single page.
|
||||
* We repeat when a time extend is encountered.
|
||||
* Since the time extend is always attached to a data event,
|
||||
* we should never loop more than once.
|
||||
* (We never hit the following condition more than twice).
|
||||
*/
|
||||
if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
|
||||
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
|
||||
return NULL;
|
||||
|
||||
reader = rb_get_reader_page(cpu_buffer);
|
||||
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* We repeat when a timestamp is encountered.
|
||||
* We can get multiple timestamps by nested interrupts or also
|
||||
* if filtering is on (discarding commits). Since discarding
|
||||
* commits can be frequent we can get a lot of timestamps.
|
||||
* But we limit them by not adding timestamps if they begin
|
||||
* at the start of a page.
|
||||
* We repeat when a time extend is encountered.
|
||||
* Since the time extend is always attached to a data event,
|
||||
* we should never loop more than once.
|
||||
* (We never hit the following condition more than twice).
|
||||
*/
|
||||
if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
|
||||
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
|
||||
return NULL;
|
||||
|
||||
if (rb_per_cpu_empty(cpu_buffer))
|
||||
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
|
||||
if (len > (commit - read))
|
||||
len = (commit - read);
|
||||
|
||||
size = rb_event_length(event);
|
||||
/* Always keep the time extend and data together */
|
||||
size = rb_event_ts_length(event);
|
||||
|
||||
if (len < size)
|
||||
goto out_unlock;
|
||||
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
|
||||
break;
|
||||
|
||||
event = rb_reader_event(cpu_buffer);
|
||||
size = rb_event_length(event);
|
||||
/* Always keep the time extend and data together */
|
||||
size = rb_event_ts_length(event);
|
||||
} while (len > size);
|
||||
|
||||
/* update bpage */
|
||||
|
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
|
||||
{
|
||||
struct dentry *d_percpu = tracing_dentry_percpu();
|
||||
struct dentry *d_cpu;
|
||||
/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
|
||||
char cpu_dir[7];
|
||||
char cpu_dir[30]; /* 30 characters should be more than enough */
|
||||
|
||||
if (cpu > 999 || cpu < 0)
|
||||
return;
|
||||
|
||||
sprintf(cpu_dir, "cpu%ld", cpu);
|
||||
snprintf(cpu_dir, 30, "cpu%ld", cpu);
|
||||
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
|
||||
if (!d_cpu) {
|
||||
pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
|
||||
|
Reference in New Issue
Block a user