oprofile: Implement performance counter multiplexing

The number of hardware counters is limited. The multiplexing feature
enables OProfile to gather more events than counters are provided by
the hardware. This is realized by switching between events at an user
specified time interval.

A new file (/dev/oprofile/time_slice) is added for the user to specify
the timer interval in ms. If the number of events to profile is higher
than the number of hardware counters available, the patch will
schedule a work queue that switches the event counter and re-writes
the different sets of values into it. The switching mechanism needs to
be implemented for each architecture to support multiplexing. This
patch only implements AMD CPU support, but multiplexing can be easily
extended for other models and architectures.

There are follow-on patches that rework parts of this patch.

Signed-off-by: Jason Yeh <jason.yeh@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
This commit is contained in:
Jason Yeh
2009-07-08 13:49:38 +02:00
committed by Robert Richter
parent 6e63ea4b0b
commit 4d4036e0e7
12 changed files with 415 additions and 20 deletions

View File

@@ -12,6 +12,8 @@
#include <linux/init.h>
#include <linux/oprofile.h>
#include <linux/moduleparam.h>
#include <linux/workqueue.h>
#include <linux/time.h>
#include <asm/mutex.h>
#include "oprof.h"
@@ -27,6 +29,15 @@ unsigned long oprofile_backtrace_depth;
static unsigned long is_setup;
static DEFINE_MUTEX(start_mutex);
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
static void switch_worker(struct work_struct *work);
static DECLARE_DELAYED_WORK(switch_work, switch_worker);
unsigned long timeout_jiffies;
#define MULTIPLEXING_TIMER_DEFAULT 1
#endif
/* timer
0 - use performance monitoring hardware if available
1 - use the timer int mechanism regardless
@@ -87,6 +98,20 @@ out:
return err;
}
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
static void start_switch_worker(void)
{
schedule_delayed_work(&switch_work, timeout_jiffies);
}
static void switch_worker(struct work_struct *work)
{
if (!oprofile_ops.switch_events())
start_switch_worker();
}
#endif
/* Actually start profiling (echo 1>/dev/oprofile/enable) */
int oprofile_start(void)
@@ -108,6 +133,11 @@ int oprofile_start(void)
if ((err = oprofile_ops.start()))
goto out;
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
if (oprofile_ops.switch_events)
start_switch_worker();
#endif
oprofile_started = 1;
out:
mutex_unlock(&start_mutex);
@@ -123,6 +153,11 @@ void oprofile_stop(void)
goto out;
oprofile_ops.stop();
oprofile_started = 0;
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
cancel_delayed_work_sync(&switch_work);
#endif
/* wake up the daemon to read what remains */
wake_up_buffer_waiter();
out:
@@ -155,6 +190,36 @@ post_sync:
mutex_unlock(&start_mutex);
}
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
/* User inputs in ms, converts to jiffies */
int oprofile_set_timeout(unsigned long val_msec)
{
int err = 0;
mutex_lock(&start_mutex);
if (oprofile_started) {
err = -EBUSY;
goto out;
}
if (!oprofile_ops.switch_events) {
err = -EINVAL;
goto out;
}
timeout_jiffies = msecs_to_jiffies(val_msec);
if (timeout_jiffies == MAX_JIFFY_OFFSET)
timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT);
out:
mutex_unlock(&start_mutex);
return err;
}
#endif
int oprofile_set_backtrace(unsigned long val)
{
@@ -179,10 +244,23 @@ out:
return err;
}
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
static void __init oprofile_multiplexing_init(void)
{
timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT);
}
#endif
static int __init oprofile_init(void)
{
int err;
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
oprofile_multiplexing_init();
#endif
err = oprofile_arch_init(&oprofile_ops);
if (err < 0 || timer) {