Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  kernel/profile.c: Switch /proc/irq/prof_cpu_mask to seq_file
  tracing: Export trace_profile_buf symbols
  tracing/events: use list_for_entry_continue
  tracing: remove max_tracer_type_len
  function-graph: use ftrace_graph_funcs directly
  tracing: Remove markers
  tracing: Allocate the ftrace event profile buffer dynamically
  tracing: Factorize the events profile accounting
This commit is contained in:
Linus Torvalds
2009-09-21 09:05:47 -07:00
26 changed files with 298 additions and 1659 deletions

View File

@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/

View File

@@ -1,930 +0,0 @@
/*
* Copyright (C) 2007 Mathieu Desnoyers
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/jhash.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/marker.h>
#include <linux/err.h>
#include <linux/slab.h>
extern struct marker __start___markers[];
extern struct marker __stop___markers[];
/* Set to 1 to enable marker debug output */
static const int marker_debug;
/*
* markers_mutex nests inside module_mutex. Markers mutex protects the builtin
* and module markers and the hash table.
*/
static DEFINE_MUTEX(markers_mutex);
/*
* Marker hash table, containing the active markers.
* Protected by module_mutex.
*/
#define MARKER_HASH_BITS 6
#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
static struct hlist_head marker_table[MARKER_TABLE_SIZE];
/*
* Note about RCU :
* It is used to make sure every handler has finished using its private data
* between two consecutive operation (add or remove) on a given marker. It is
* also used to delay the free of multiple probes array until a quiescent state
* is reached.
* marker entries modifications are protected by the markers_mutex.
*/
struct marker_entry {
struct hlist_node hlist;
char *format;
/* Probe wrapper */
void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
int refcount; /* Number of times armed. 0 if disarmed. */
struct rcu_head rcu;
void *oldptr;
int rcu_pending;
unsigned char ptype:1;
unsigned char format_allocated:1;
char name[0]; /* Contains name'\0'format'\0' */
};
/**
* __mark_empty_function - Empty probe callback
* @probe_private: probe private data
* @call_private: call site private data
* @fmt: format string
* @...: variable argument list
*
* Empty callback provided as a probe to the markers. By providing this to a
* disabled marker, we make sure the execution flow is always valid even
* though the function pointer change and the marker enabling are two distinct
* operations that modifies the execution flow of preemptible code.
*/
notrace void __mark_empty_function(void *probe_private, void *call_private,
const char *fmt, va_list *args)
{
}
EXPORT_SYMBOL_GPL(__mark_empty_function);
/*
* marker_probe_cb Callback that prepares the variable argument list for probes.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @...: Variable argument list.
*
* Since we do not use "typical" pointer based RCU in the 1 argument case, we
* need to put a full smp_rmb() in this branch. This is why we do not use
* rcu_dereference() for the pointer read.
*/
notrace void marker_probe_cb(const struct marker *mdata,
void *call_private, ...)
{
va_list args;
char ptype;
/*
* rcu_read_lock_sched does two things : disabling preemption to make
* sure the teardown of the callbacks can be done correctly when they
* are in modules and they insure RCU read coherency.
*/
rcu_read_lock_sched_notrace();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
/* Must read the ptype before ptr. They are not data dependant,
* so we put an explicit smp_rmb() here. */
smp_rmb();
func = mdata->single.func;
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
va_start(args, call_private);
func(mdata->single.probe_private, call_private, mdata->format,
&args);
va_end(args);
} else {
struct marker_probe_closure *multi;
int i;
/*
* Read mdata->ptype before mdata->multi.
*/
smp_rmb();
multi = mdata->multi;
/*
* multi points to an array, therefore accessing the array
* depends on reading multi. However, even in this case,
* we must insure that the pointer is read _before_ the array
* data. Same as rcu_dereference, but we need a full smp_rmb()
* in the fast path, so put the explicit barrier here.
*/
smp_read_barrier_depends();
for (i = 0; multi[i].func; i++) {
va_start(args, call_private);
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
va_end(args);
}
}
rcu_read_unlock_sched_notrace();
}
EXPORT_SYMBOL_GPL(marker_probe_cb);
/*
* marker_probe_cb Callback that does not prepare the variable argument list.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @...: Variable argument list.
*
* Should be connected to markers "MARK_NOARGS".
*/
static notrace void marker_probe_cb_noarg(const struct marker *mdata,
void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
rcu_read_lock_sched_notrace();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
/* Must read the ptype before ptr. They are not data dependant,
* so we put an explicit smp_rmb() here. */
smp_rmb();
func = mdata->single.func;
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
func(mdata->single.probe_private, call_private, mdata->format,
&args);
} else {
struct marker_probe_closure *multi;
int i;
/*
* Read mdata->ptype before mdata->multi.
*/
smp_rmb();
multi = mdata->multi;
/*
* multi points to an array, therefore accessing the array
* depends on reading multi. However, even in this case,
* we must insure that the pointer is read _before_ the array
* data. Same as rcu_dereference, but we need a full smp_rmb()
* in the fast path, so put the explicit barrier here.
*/
smp_read_barrier_depends();
for (i = 0; multi[i].func; i++)
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
rcu_read_unlock_sched_notrace();
}
static void free_old_closure(struct rcu_head *head)
{
struct marker_entry *entry = container_of(head,
struct marker_entry, rcu);
kfree(entry->oldptr);
/* Make sure we free the data before setting the pending flag to 0 */
smp_wmb();
entry->rcu_pending = 0;
}
static void debug_print_probes(struct marker_entry *entry)
{
int i;
if (!marker_debug)
return;
if (!entry->ptype) {
printk(KERN_DEBUG "Single probe : %p %p\n",
entry->single.func,
entry->single.probe_private);
} else {
for (i = 0; entry->multi[i].func; i++)
printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
entry->multi[i].func,
entry->multi[i].probe_private);
}
}
static struct marker_probe_closure *
marker_entry_add_probe(struct marker_entry *entry,
marker_probe_func *probe, void *probe_private)
{
int nr_probes = 0;
struct marker_probe_closure *old, *new;
WARN_ON(!probe);
debug_print_probes(entry);
old = entry->multi;
if (!entry->ptype) {
if (entry->single.func == probe &&
entry->single.probe_private == probe_private)
return ERR_PTR(-EBUSY);
if (entry->single.func == __mark_empty_function) {
/* 0 -> 1 probes */
entry->single.func = probe;
entry->single.probe_private = probe_private;
entry->refcount = 1;
entry->ptype = 0;
debug_print_probes(entry);
return NULL;
} else {
/* 1 -> 2 probes */
nr_probes = 1;
old = NULL;
}
} else {
/* (N -> N+1), (N != 0, 1) probes */
for (nr_probes = 0; old[nr_probes].func; nr_probes++)
if (old[nr_probes].func == probe
&& old[nr_probes].probe_private
== probe_private)
return ERR_PTR(-EBUSY);
}
/* + 2 : one for new probe, one for NULL func */
new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
GFP_KERNEL);
if (new == NULL)
return ERR_PTR(-ENOMEM);
if (!old)
new[0] = entry->single;
else
memcpy(new, old,
nr_probes * sizeof(struct marker_probe_closure));
new[nr_probes].func = probe;
new[nr_probes].probe_private = probe_private;
entry->refcount = nr_probes + 1;
entry->multi = new;
entry->ptype = 1;
debug_print_probes(entry);
return old;
}
static struct marker_probe_closure *
marker_entry_remove_probe(struct marker_entry *entry,
marker_probe_func *probe, void *probe_private)
{
int nr_probes = 0, nr_del = 0, i;
struct marker_probe_closure *old, *new;
old = entry->multi;
debug_print_probes(entry);
if (!entry->ptype) {
/* 0 -> N is an error */
WARN_ON(entry->single.func == __mark_empty_function);
/* 1 -> 0 probes */
WARN_ON(probe && entry->single.func != probe);
WARN_ON(entry->single.probe_private != probe_private);
entry->single.func = __mark_empty_function;
entry->refcount = 0;
entry->ptype = 0;
debug_print_probes(entry);
return NULL;
} else {
/* (N -> M), (N > 1, M >= 0) probes */
for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
if ((!probe || old[nr_probes].func == probe)
&& old[nr_probes].probe_private
== probe_private)
nr_del++;
}
}
if (nr_probes - nr_del == 0) {
/* N -> 0, (N > 1) */
entry->single.func = __mark_empty_function;
entry->refcount = 0;
entry->ptype = 0;
} else if (nr_probes - nr_del == 1) {
/* N -> 1, (N > 1) */
for (i = 0; old[i].func; i++)
if ((probe && old[i].func != probe) ||
old[i].probe_private != probe_private)
entry->single = old[i];
entry->refcount = 1;
entry->ptype = 0;
} else {
int j = 0;
/* N -> M, (N > 1, M > 1) */
/* + 1 for NULL */
new = kzalloc((nr_probes - nr_del + 1)
* sizeof(struct marker_probe_closure), GFP_KERNEL);
if (new == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0; old[i].func; i++)
if ((probe && old[i].func != probe) ||
old[i].probe_private != probe_private)
new[j++] = old[i];
entry->refcount = nr_probes - nr_del;
entry->ptype = 1;
entry->multi = new;
}
debug_print_probes(entry);
return old;
}
/*
* Get marker if the marker is present in the marker hash table.
* Must be called with markers_mutex held.
* Returns NULL if not present.
*/
static struct marker_entry *get_marker(const char *name)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
u32 hash = jhash(name, strlen(name), 0);
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name))
return e;
}
return NULL;
}
/*
* Add the marker to the marker hash table. Must be called with markers_mutex
* held.
*/
static struct marker_entry *add_marker(const char *name, const char *format)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
size_t name_len = strlen(name) + 1;
size_t format_len = 0;
u32 hash = jhash(name, name_len-1, 0);
if (format)
format_len = strlen(format) + 1;
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name)) {
printk(KERN_NOTICE
"Marker %s busy\n", name);
return ERR_PTR(-EBUSY); /* Already there */
}
}
/*
* Using kmalloc here to allocate a variable length element. Could
* cause some memory fragmentation if overused.
*/
e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
GFP_KERNEL);
if (!e)
return ERR_PTR(-ENOMEM);
memcpy(&e->name[0], name, name_len);
if (format) {
e->format = &e->name[name_len];
memcpy(e->format, format, format_len);
if (strcmp(e->format, MARK_NOARGS) == 0)
e->call = marker_probe_cb_noarg;
else
e->call = marker_probe_cb;
trace_mark(core_marker_format, "name %s format %s",
e->name, e->format);
} else {
e->format = NULL;
e->call = marker_probe_cb;
}
e->single.func = __mark_empty_function;
e->single.probe_private = NULL;
e->multi = NULL;
e->ptype = 0;
e->format_allocated = 0;
e->refcount = 0;
e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
return e;
}
/*
* Remove the marker from the marker hash table. Must be called with mutex_lock
* held.
*/
static int remove_marker(const char *name)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
int found = 0;
size_t len = strlen(name) + 1;
u32 hash = jhash(name, len-1, 0);
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name)) {
found = 1;
break;
}
}
if (!found)
return -ENOENT;
if (e->single.func != __mark_empty_function)
return -EBUSY;
hlist_del(&e->hlist);
if (e->format_allocated)
kfree(e->format);
/* Make sure the call_rcu has been executed */
if (e->rcu_pending)
rcu_barrier_sched();
kfree(e);
return 0;
}
/*
* Set the mark_entry format to the format found in the element.
*/
static int marker_set_format(struct marker_entry *entry, const char *format)
{
entry->format = kstrdup(format, GFP_KERNEL);
if (!entry->format)
return -ENOMEM;
entry->format_allocated = 1;
trace_mark(core_marker_format, "name %s format %s",
entry->name, entry->format);
return 0;
}
/*
* Sets the probe callback corresponding to one marker.
*/
static int set_marker(struct marker_entry *entry, struct marker *elem,
int active)
{
int ret = 0;
WARN_ON(strcmp(entry->name, elem->name) != 0);
if (entry->format) {
if (strcmp(entry->format, elem->format) != 0) {
printk(KERN_NOTICE
"Format mismatch for probe %s "
"(%s), marker (%s)\n",
entry->name,
entry->format,
elem->format);
return -EPERM;
}
} else {
ret = marker_set_format(entry, elem->format);
if (ret)
return ret;
}
/*
* probe_cb setup (statically known) is done here. It is
* asynchronous with the rest of execution, therefore we only
* pass from a "safe" callback (with argument) to an "unsafe"
* callback (does not set arguments).
*/
elem->call = entry->call;
/*
* Sanity check :
* We only update the single probe private data when the ptr is
* set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
*/
WARN_ON(elem->single.func != __mark_empty_function
&& elem->single.probe_private != entry->single.probe_private
&& !elem->ptype);
elem->single.probe_private = entry->single.probe_private;
/*
* Make sure the private data is valid when we update the
* single probe ptr.
*/
smp_wmb();
elem->single.func = entry->single.func;
/*
* We also make sure that the new probe callbacks array is consistent
* before setting a pointer to it.
*/
rcu_assign_pointer(elem->multi, entry->multi);
/*
* Update the function or multi probe array pointer before setting the
* ptype.
*/
smp_wmb();
elem->ptype = entry->ptype;
if (elem->tp_name && (active ^ elem->state)) {
WARN_ON(!elem->tp_cb);
/*
* It is ok to directly call the probe registration because type
* checking has been done in the __trace_mark_tp() macro.
*/
if (active) {
/*
* try_module_get should always succeed because we hold
* lock_module() to get the tp_cb address.
*/
ret = try_module_get(__module_text_address(
(unsigned long)elem->tp_cb));
BUG_ON(!ret);
ret = tracepoint_probe_register_noupdate(
elem->tp_name,
elem->tp_cb);
} else {
ret = tracepoint_probe_unregister_noupdate(
elem->tp_name,
elem->tp_cb);
/*
* tracepoint_probe_update_all() must be called
* before the module containing tp_cb is unloaded.
*/
module_put(__module_text_address(
(unsigned long)elem->tp_cb));
}
}
elem->state = active;
return ret;
}
/*
* Disable a marker and its probe callback.
* Note: only waiting an RCU period after setting elem->call to the empty
* function insures that the original callback is not used anymore. This insured
* by rcu_read_lock_sched around the call site.
*/
static void disable_marker(struct marker *elem)
{
int ret;
/* leave "call" as is. It is known statically. */
if (elem->tp_name && elem->state) {
WARN_ON(!elem->tp_cb);
/*
* It is ok to directly call the probe registration because type
* checking has been done in the __trace_mark_tp() macro.
*/
ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
elem->tp_cb);
WARN_ON(ret);
/*
* tracepoint_probe_update_all() must be called
* before the module containing tp_cb is unloaded.
*/
module_put(__module_text_address((unsigned long)elem->tp_cb));
}
elem->state = 0;
elem->single.func = __mark_empty_function;
/* Update the function before setting the ptype */
smp_wmb();
elem->ptype = 0; /* single probe */
/*
* Leave the private data and id there, because removal is racy and
* should be done only after an RCU period. These are never used until
* the next initialization anyway.
*/
}
/**
* marker_update_probe_range - Update a probe range
* @begin: beginning of the range
* @end: end of the range
*
* Updates the probe callback corresponding to a range of markers.
*/
void marker_update_probe_range(struct marker *begin,
struct marker *end)
{
struct marker *iter;
struct marker_entry *mark_entry;
mutex_lock(&markers_mutex);
for (iter = begin; iter < end; iter++) {
mark_entry = get_marker(iter->name);
if (mark_entry) {
set_marker(mark_entry, iter, !!mark_entry->refcount);
/*
* ignore error, continue
*/
} else {
disable_marker(iter);
}
}
mutex_unlock(&markers_mutex);
}
/*
* Update probes, removing the faulty probes.
*
* Internal callback only changed before the first probe is connected to it.
* Single probe private data can only be changed on 0 -> 1 and 2 -> 1
* transitions. All other transitions will leave the old private data valid.
* This makes the non-atomicity of the callback/private data updates valid.
*
* "special case" updates :
* 0 -> 1 callback
* 1 -> 0 callback
* 1 -> 2 callbacks
* 2 -> 1 callbacks
* Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
* Site effect : marker_set_format may delete the marker entry (creating a
* replacement).
*/
static void marker_update_probes(void)
{
/* Core kernel markers */
marker_update_probe_range(__start___markers, __stop___markers);
/* Markers in modules. */
module_update_markers();
tracepoint_probe_update_all();
}
/**
* marker_probe_register - Connect a probe to a marker
* @name: marker name
* @format: format string
* @probe: probe handler
* @probe_private: probe private data
*
* private data must be a valid allocated memory address, or NULL.
* Returns 0 if ok, error value on error.
* The probe address must at least be aligned on the architecture pointer size.
*/
int marker_probe_register(const char *name, const char *format,
marker_probe_func *probe, void *probe_private)
{
struct marker_entry *entry;
int ret = 0;
struct marker_probe_closure *old;
mutex_lock(&markers_mutex);
entry = get_marker(name);
if (!entry) {
entry = add_marker(name, format);
if (IS_ERR(entry))
ret = PTR_ERR(entry);
} else if (format) {
if (!entry->format)
ret = marker_set_format(entry, format);
else if (strcmp(entry->format, format))
ret = -EPERM;
}
if (ret)
goto end;
/*
* If we detect that a call_rcu is pending for this marker,
* make sure it's executed now.
*/
if (entry->rcu_pending)
rcu_barrier_sched();
old = marker_entry_add_probe(entry, probe, probe_private);
if (IS_ERR(old)) {
ret = PTR_ERR(old);
goto end;
}
mutex_unlock(&markers_mutex);
marker_update_probes();
mutex_lock(&markers_mutex);
entry = get_marker(name);
if (!entry)
goto end;
if (entry->rcu_pending)
rcu_barrier_sched();
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
call_rcu_sched(&entry->rcu, free_old_closure);
end:
mutex_unlock(&markers_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(marker_probe_register);
/**
* marker_probe_unregister - Disconnect a probe from a marker
* @name: marker name
* @probe: probe function pointer
* @probe_private: probe private data
*
* Returns the private data given to marker_probe_register, or an ERR_PTR().
* We do not need to call a synchronize_sched to make sure the probes have
* finished running before doing a module unload, because the module unload
* itself uses stop_machine(), which insures that every preempt disabled section
* have finished.
*/
int marker_probe_unregister(const char *name,
marker_probe_func *probe, void *probe_private)
{
struct marker_entry *entry;
struct marker_probe_closure *old;
int ret = -ENOENT;
mutex_lock(&markers_mutex);
entry = get_marker(name);
if (!entry)
goto end;
if (entry->rcu_pending)
rcu_barrier_sched();
old = marker_entry_remove_probe(entry, probe, probe_private);
mutex_unlock(&markers_mutex);
marker_update_probes();
mutex_lock(&markers_mutex);
entry = get_marker(name);
if (!entry)
goto end;
if (entry->rcu_pending)
rcu_barrier_sched();
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
call_rcu_sched(&entry->rcu, free_old_closure);
remove_marker(name); /* Ignore busy error message */
ret = 0;
end:
mutex_unlock(&markers_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(marker_probe_unregister);
static struct marker_entry *
get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
{
struct marker_entry *entry;
unsigned int i;
struct hlist_head *head;
struct hlist_node *node;
for (i = 0; i < MARKER_TABLE_SIZE; i++) {
head = &marker_table[i];
hlist_for_each_entry(entry, node, head, hlist) {
if (!entry->ptype) {
if (entry->single.func == probe
&& entry->single.probe_private
== probe_private)
return entry;
} else {
struct marker_probe_closure *closure;
closure = entry->multi;
for (i = 0; closure[i].func; i++) {
if (closure[i].func == probe &&
closure[i].probe_private
== probe_private)
return entry;
}
}
}
}
return NULL;
}
/**
* marker_probe_unregister_private_data - Disconnect a probe from a marker
* @probe: probe function
* @probe_private: probe private data
*
* Unregister a probe by providing the registered private data.
* Only removes the first marker found in hash table.
* Return 0 on success or error value.
* We do not need to call a synchronize_sched to make sure the probes have
* finished running before doing a module unload, because the module unload
* itself uses stop_machine(), which insures that every preempt disabled section
* have finished.
*/
int marker_probe_unregister_private_data(marker_probe_func *probe,
void *probe_private)
{
struct marker_entry *entry;
int ret = 0;
struct marker_probe_closure *old;
mutex_lock(&markers_mutex);
entry = get_marker_from_private_data(probe, probe_private);
if (!entry) {
ret = -ENOENT;
goto end;
}
if (entry->rcu_pending)
rcu_barrier_sched();
old = marker_entry_remove_probe(entry, NULL, probe_private);
mutex_unlock(&markers_mutex);
marker_update_probes();
mutex_lock(&markers_mutex);
entry = get_marker_from_private_data(probe, probe_private);
if (!entry)
goto end;
if (entry->rcu_pending)
rcu_barrier_sched();
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
call_rcu_sched(&entry->rcu, free_old_closure);
remove_marker(entry->name); /* Ignore busy error message */
end:
mutex_unlock(&markers_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
/**
* marker_get_private_data - Get a marker's probe private data
* @name: marker name
* @probe: probe to match
* @num: get the nth matching probe's private data
*
* Returns the nth private data pointer (starting from 0) matching, or an
* ERR_PTR.
* Returns the private data pointer, or an ERR_PTR.
* The private data pointer should _only_ be dereferenced if the caller is the
* owner of the data, or its content could vanish. This is mostly used to
* confirm that a caller is the owner of a registered probe.
*/
void *marker_get_private_data(const char *name, marker_probe_func *probe,
int num)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
size_t name_len = strlen(name) + 1;
u32 hash = jhash(name, name_len-1, 0);
int i;
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name)) {
if (!e->ptype) {
if (num == 0 && e->single.func == probe)
return e->single.probe_private;
} else {
struct marker_probe_closure *closure;
int match = 0;
closure = e->multi;
for (i = 0; closure[i].func; i++) {
if (closure[i].func != probe)
continue;
if (match++ == num)
return closure[i].probe_private;
}
}
break;
}
}
return ERR_PTR(-ENOENT);
}
EXPORT_SYMBOL_GPL(marker_get_private_data);
#ifdef CONFIG_MODULES
int marker_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct module *mod = data;
switch (val) {
case MODULE_STATE_COMING:
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
break;
case MODULE_STATE_GOING:
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
break;
}
return 0;
}
struct notifier_block marker_module_nb = {
.notifier_call = marker_module_notify,
.priority = 0,
};
static int init_markers(void)
{
return register_module_notifier(&marker_module_nb);
}
__initcall(init_markers);
#endif /* CONFIG_MODULES */

View File

@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod,
sizeof(*mod->ctors), &mod->num_ctors);
#endif
#ifdef CONFIG_MARKERS
mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
sizeof(*mod->markers), &mod->num_markers);
#endif
#ifdef CONFIG_TRACEPOINTS
mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
"__tracepoints",
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod,
EXPORT_SYMBOL(module_layout);
#endif
#ifdef CONFIG_MARKERS
void module_update_markers(void)
{
struct module *mod;
mutex_lock(&module_mutex);
list_for_each_entry(mod, &modules, list)
if (!mod->taints)
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
mutex_unlock(&module_mutex);
}
#endif
#ifdef CONFIG_TRACEPOINTS
void module_update_tracepoints(void)
{

View File

@@ -442,48 +442,51 @@ void profile_tick(int type)
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <asm/uaccess.h>
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
{
int len = cpumask_scnprintf(page, count, data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
return len;
seq_cpumask(m, prof_cpu_mask);
seq_putc(m, '\n');
return 0;
}
static int prof_cpu_mask_write_proc(struct file *file,
const char __user *buffer, unsigned long count, void *data)
static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, prof_cpu_mask_proc_show, NULL);
}
static ssize_t prof_cpu_mask_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
struct cpumask *mask = data;
unsigned long full_count = count, err;
cpumask_var_t new_value;
int err;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value);
if (!err) {
cpumask_copy(mask, new_value);
err = full_count;
cpumask_copy(prof_cpu_mask, new_value);
err = count;
}
free_cpumask_var(new_value);
return err;
}
static const struct file_operations prof_cpu_mask_proc_fops = {
.open = prof_cpu_mask_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = prof_cpu_mask_proc_write,
};
void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
{
struct proc_dir_entry *entry;
/* create /proc/irq/prof_cpu_mask */
entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
if (!entry)
return;
entry->data = prof_cpu_mask;
entry->read_proc = prof_cpu_mask_read_proc;
entry->write_proc = prof_cpu_mask_write_proc;
proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops);
}
/*

View File

@@ -2414,11 +2414,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
static void *
__g_next(struct seq_file *m, loff_t *pos)
{
unsigned long *array = m->private;
if (*pos >= ftrace_graph_count)
return NULL;
return &array[*pos];
return &ftrace_graph_funcs[*pos];
}
static void *
@@ -2482,17 +2480,11 @@ ftrace_graph_open(struct inode *inode, struct file *file)
ftrace_graph_count = 0;
memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
}
if (file->f_mode & FMODE_READ) {
ret = seq_open(file, &ftrace_graph_seq_ops);
if (!ret) {
struct seq_file *m = file->private_data;
m->private = ftrace_graph_funcs;
}
} else
file->private_data = ftrace_graph_funcs;
mutex_unlock(&graph_lock);
if (file->f_mode & FMODE_READ)
ret = seq_open(file, &ftrace_graph_seq_ops);
return ret;
}
@@ -2560,7 +2552,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_parser parser;
unsigned long *array;
size_t read = 0;
ssize_t ret;
@@ -2574,12 +2565,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
goto out;
}
if (file->f_mode & FMODE_READ) {
struct seq_file *m = file->private_data;
array = m->private;
} else
array = file->private_data;
if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
ret = -ENOMEM;
goto out;
@@ -2591,7 +2576,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
parser.buffer[parser.idx] = 0;
/* we allow only one expression at a time */
ret = ftrace_set_func(array, &ftrace_graph_count,
ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
parser.buffer);
if (ret)
goto out;

View File

@@ -125,13 +125,13 @@ int ftrace_dump_on_oops;
static int tracing_set_tracer(const char *buf);
#define BOOTUP_TRACER_SIZE 100
static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
#define MAX_TRACER_SIZE 100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
static char *default_bootup_tracer;
static int __init set_ftrace(char *str)
{
strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
default_bootup_tracer = bootup_tracer_buf;
/* We are using ftrace early, expand it */
ring_buffer_expanded = 1;
@@ -241,13 +241,6 @@ static struct tracer *trace_types __read_mostly;
/* current_trace points to the tracer that is currently active */
static struct tracer *current_trace __read_mostly;
/*
* max_tracer_type_len is used to simplify the allocating of
* buffers to read userspace tracer names. We keep track of
* the longest tracer name registered.
*/
static int max_tracer_type_len;
/*
* trace_types_lock is used to protect the trace_types list.
* This lock is also used to keep user access serialized.
@@ -619,7 +612,6 @@ __releases(kernel_lock)
__acquires(kernel_lock)
{
struct tracer *t;
int len;
int ret = 0;
if (!type->name) {
@@ -627,6 +619,11 @@ __acquires(kernel_lock)
return -1;
}
if (strlen(type->name) > MAX_TRACER_SIZE) {
pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
return -1;
}
/*
* When this gets called we hold the BKL which means that
* preemption is disabled. Various trace selftests however
@@ -641,7 +638,7 @@ __acquires(kernel_lock)
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
pr_info("Trace %s already registered\n",
pr_info("Tracer %s already registered\n",
type->name);
ret = -1;
goto out;
@@ -692,9 +689,6 @@ __acquires(kernel_lock)
type->next = trace_types;
trace_types = type;
len = strlen(type->name);
if (len > max_tracer_type_len)
max_tracer_type_len = len;
out:
tracing_selftest_running = false;
@@ -703,7 +697,7 @@ __acquires(kernel_lock)
if (ret || !default_bootup_tracer)
goto out_unlock;
if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
goto out_unlock;
printk(KERN_INFO "Starting tracer '%s'\n", type->name);
@@ -725,14 +719,13 @@ __acquires(kernel_lock)
void unregister_tracer(struct tracer *type)
{
struct tracer **t;
int len;
mutex_lock(&trace_types_lock);
for (t = &trace_types; *t; t = &(*t)->next) {
if (*t == type)
goto found;
}
pr_info("Trace %s not registered\n", type->name);
pr_info("Tracer %s not registered\n", type->name);
goto out;
found:
@@ -745,17 +738,7 @@ void unregister_tracer(struct tracer *type)
current_trace->stop(&global_trace);
current_trace = &nop_trace;
}
if (strlen(type->name) != max_tracer_type_len)
goto out;
max_tracer_type_len = 0;
for (t = &trace_types; *t; t = &(*t)->next) {
len = strlen((*t)->name);
if (len > max_tracer_type_len)
max_tracer_type_len = len;
}
out:
out:
mutex_unlock(&trace_types_lock);
}
@@ -2604,7 +2587,7 @@ static ssize_t
tracing_set_trace_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[max_tracer_type_len+2];
char buf[MAX_TRACER_SIZE+2];
int r;
mutex_lock(&trace_types_lock);
@@ -2754,15 +2737,15 @@ static ssize_t
tracing_set_trace_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[max_tracer_type_len+1];
char buf[MAX_TRACER_SIZE+1];
int i;
size_t ret;
int err;
ret = cnt;
if (cnt > max_tracer_type_len)
cnt = max_tracer_type_len;
if (cnt > MAX_TRACER_SIZE)
cnt = MAX_TRACER_SIZE;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;

View File

@@ -8,6 +8,57 @@
#include <linux/module.h>
#include "trace.h"
/*
* We can't use a size but a type in alloc_percpu()
* So let's create a dummy type that matches the desired size
*/
typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
char *trace_profile_buf;
EXPORT_SYMBOL_GPL(trace_profile_buf);
char *trace_profile_buf_nmi;
EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
/* Count the events in use (per event id, not per instance) */
static int total_profile_count;
static int ftrace_profile_enable_event(struct ftrace_event_call *event)
{
char *buf;
int ret = -ENOMEM;
if (atomic_inc_return(&event->profile_count))
return 0;
if (!total_profile_count++) {
buf = (char *)alloc_percpu(profile_buf_t);
if (!buf)
goto fail_buf;
rcu_assign_pointer(trace_profile_buf, buf);
buf = (char *)alloc_percpu(profile_buf_t);
if (!buf)
goto fail_buf_nmi;
rcu_assign_pointer(trace_profile_buf_nmi, buf);
}
ret = event->profile_enable();
if (!ret)
return 0;
kfree(trace_profile_buf_nmi);
fail_buf_nmi:
kfree(trace_profile_buf);
fail_buf:
total_profile_count--;
atomic_dec(&event->profile_count);
return ret;
}
int ftrace_profile_enable(int event_id)
{
struct ftrace_event_call *event;
@@ -17,7 +68,7 @@ int ftrace_profile_enable(int event_id)
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id && event->profile_enable &&
try_module_get(event->mod)) {
ret = event->profile_enable(event);
ret = ftrace_profile_enable_event(event);
break;
}
}
@@ -26,6 +77,33 @@ int ftrace_profile_enable(int event_id)
return ret;
}
static void ftrace_profile_disable_event(struct ftrace_event_call *event)
{
char *buf, *nmi_buf;
if (!atomic_add_negative(-1, &event->profile_count))
return;
event->profile_disable();
if (!--total_profile_count) {
buf = trace_profile_buf;
rcu_assign_pointer(trace_profile_buf, NULL);
nmi_buf = trace_profile_buf_nmi;
rcu_assign_pointer(trace_profile_buf_nmi, NULL);
/*
* Ensure every events in profiling have finished before
* releasing the buffers
*/
synchronize_sched();
free_percpu(buf);
free_percpu(nmi_buf);
}
}
void ftrace_profile_disable(int event_id)
{
struct ftrace_event_call *event;
@@ -33,7 +111,7 @@ void ftrace_profile_disable(int event_id)
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
event->profile_disable(event);
ftrace_profile_disable_event(event);
module_put(event->mod);
break;
}

View File

@@ -271,42 +271,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct list_head *list = m->private;
struct ftrace_event_call *call;
struct ftrace_event_call *call = v;
(*pos)++;
for (;;) {
if (list == &ftrace_events)
return NULL;
call = list_entry(list, struct ftrace_event_call, list);
list_for_each_entry_continue(call, &ftrace_events, list) {
/*
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
if (call->regfunc)
break;
list = list->next;
return call;
}
m->private = list->next;
return call;
return NULL;
}
static void *t_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_event_call *call = NULL;
struct ftrace_event_call *call;
loff_t l;
mutex_lock(&event_mutex);
m->private = ftrace_events.next;
call = list_entry(&ftrace_events, struct ftrace_event_call, list);
for (l = 0; l <= *pos; ) {
call = t_next(m, NULL, &l);
call = t_next(m, call, &l);
if (!call)
break;
}
@@ -316,37 +306,28 @@ static void *t_start(struct seq_file *m, loff_t *pos)
static void *
s_next(struct seq_file *m, void *v, loff_t *pos)
{
struct list_head *list = m->private;
struct ftrace_event_call *call;
struct ftrace_event_call *call = v;
(*pos)++;
retry:
if (list == &ftrace_events)
return NULL;
call = list_entry(list, struct ftrace_event_call, list);
if (!call->enabled) {
list = list->next;
goto retry;
list_for_each_entry_continue(call, &ftrace_events, list) {
if (call->enabled)
return call;
}
m->private = list->next;
return call;
return NULL;
}
static void *s_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_event_call *call = NULL;
struct ftrace_event_call *call;
loff_t l;
mutex_lock(&event_mutex);
m->private = ftrace_events.next;
call = list_entry(&ftrace_events, struct ftrace_event_call, list);
for (l = 0; l <= *pos; ) {
call = s_next(m, NULL, &l);
call = s_next(m, call, &l);
if (!call)
break;
}

View File

@@ -11,7 +11,6 @@
#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/marker.h>
#include <linux/mutex.h>
#include <linux/ctype.h>
#include <linux/list.h>

View File

@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
static void prof_syscall_enter(struct pt_regs *regs, long id)
{
struct syscall_trace_enter *rec;
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
char *raw_data;
int syscall_nr;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
do {
char raw_data[size];
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
return;
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rec = (struct syscall_trace_enter *) raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->enter_id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
} while(0);
cpu = smp_processor_id();
if (in_nmi())
raw_data = rcu_dereference(trace_profile_buf_nmi);
else
raw_data = rcu_dereference(trace_profile_buf);
if (!raw_data)
goto end;
raw_data = per_cpu_ptr(raw_data, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_enter *) raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->enter_id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
end:
local_irq_restore(flags);
}
int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
static void prof_syscall_exit(struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit rec;
struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr;
char *raw_data;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
if (!sys_data)
return;
tracing_generic_entry_update(&rec.ent, 0, 0);
rec.ent.type = sys_data->exit_id;
rec.nr = syscall_nr;
rec.ret = syscall_get_return_value(current, regs);
/* We can probably do that at build time */
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
/*
* Impossible, but be paranoid with the future
* How to put this check outside runtime?
*/
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"exit event has grown above profile buffer size"))
return;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
cpu = smp_processor_id();
if (in_nmi())
raw_data = rcu_dereference(trace_profile_buf_nmi);
else
raw_data = rcu_dereference(trace_profile_buf);
if (!raw_data)
goto end;
raw_data = per_cpu_ptr(raw_data, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_exit *)raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->exit_id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
end:
local_irq_restore(flags);
}
int reg_prof_syscall_exit(char *name)