sched: sched_clock_idle_[sleep|wakeup]_event()
construct a more or less wall-clock time out of sched_clock(), by using ACPI-idle's existing knowledge about how much time we spent idling. This allows the rq clock to work around TSC-stops-in-C2, TSC-gets-corrupted-in-C3 type of problems. ( Besides the scheduler's statistics this also benefits blktrace and printk-timestamps as well. ) Furthermore, the precise before-C2/C3-sleep and after-C2/C3-wakeup callbacks allow the scheduler to get out the most of the period where the CPU has a reliable TSC. This results in slightly more precise task statistics. the ACPI bits were acked by Len. Signed-off-by: Ingo Molnar <mingo@elte.hu> Acked-by: Len Brown <len.brown@intel.com>
This commit is contained in:
@@ -292,7 +292,6 @@ static struct clocksource clocksource_tsc = {
|
|||||||
|
|
||||||
void mark_tsc_unstable(char *reason)
|
void mark_tsc_unstable(char *reason)
|
||||||
{
|
{
|
||||||
sched_clock_unstable_event();
|
|
||||||
if (!tsc_unstable) {
|
if (!tsc_unstable) {
|
||||||
tsc_unstable = 1;
|
tsc_unstable = 1;
|
||||||
tsc_enabled = 0;
|
tsc_enabled = 0;
|
||||||
|
@@ -63,6 +63,7 @@
|
|||||||
ACPI_MODULE_NAME("processor_idle");
|
ACPI_MODULE_NAME("processor_idle");
|
||||||
#define ACPI_PROCESSOR_FILE_POWER "power"
|
#define ACPI_PROCESSOR_FILE_POWER "power"
|
||||||
#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
|
#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
|
||||||
|
#define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY)
|
||||||
#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
|
#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
|
||||||
#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
|
#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
|
||||||
static void (*pm_idle_save) (void) __read_mostly;
|
static void (*pm_idle_save) (void) __read_mostly;
|
||||||
@@ -462,6 +463,9 @@ static void acpi_processor_idle(void)
|
|||||||
* TBD: Can't get time duration while in C1, as resumes
|
* TBD: Can't get time duration while in C1, as resumes
|
||||||
* go to an ISR rather than here. Need to instrument
|
* go to an ISR rather than here. Need to instrument
|
||||||
* base interrupt handler.
|
* base interrupt handler.
|
||||||
|
*
|
||||||
|
* Note: the TSC better not stop in C1, sched_clock() will
|
||||||
|
* skew otherwise.
|
||||||
*/
|
*/
|
||||||
sleep_ticks = 0xFFFFFFFF;
|
sleep_ticks = 0xFFFFFFFF;
|
||||||
break;
|
break;
|
||||||
@@ -469,6 +473,8 @@ static void acpi_processor_idle(void)
|
|||||||
case ACPI_STATE_C2:
|
case ACPI_STATE_C2:
|
||||||
/* Get start time (ticks) */
|
/* Get start time (ticks) */
|
||||||
t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
|
t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
|
||||||
|
/* Tell the scheduler that we are going deep-idle: */
|
||||||
|
sched_clock_idle_sleep_event();
|
||||||
/* Invoke C2 */
|
/* Invoke C2 */
|
||||||
acpi_state_timer_broadcast(pr, cx, 1);
|
acpi_state_timer_broadcast(pr, cx, 1);
|
||||||
acpi_cstate_enter(cx);
|
acpi_cstate_enter(cx);
|
||||||
@@ -479,17 +485,22 @@ static void acpi_processor_idle(void)
|
|||||||
/* TSC halts in C2, so notify users */
|
/* TSC halts in C2, so notify users */
|
||||||
mark_tsc_unstable("possible TSC halt in C2");
|
mark_tsc_unstable("possible TSC halt in C2");
|
||||||
#endif
|
#endif
|
||||||
|
/* Compute time (ticks) that we were actually asleep */
|
||||||
|
sleep_ticks = ticks_elapsed(t1, t2);
|
||||||
|
|
||||||
|
/* Tell the scheduler how much we idled: */
|
||||||
|
sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
|
||||||
|
|
||||||
/* Re-enable interrupts */
|
/* Re-enable interrupts */
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
/* Do not account our idle-switching overhead: */
|
||||||
|
sleep_ticks -= cx->latency_ticks + C2_OVERHEAD;
|
||||||
|
|
||||||
current_thread_info()->status |= TS_POLLING;
|
current_thread_info()->status |= TS_POLLING;
|
||||||
/* Compute time (ticks) that we were actually asleep */
|
|
||||||
sleep_ticks =
|
|
||||||
ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
|
|
||||||
acpi_state_timer_broadcast(pr, cx, 0);
|
acpi_state_timer_broadcast(pr, cx, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ACPI_STATE_C3:
|
case ACPI_STATE_C3:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* disable bus master
|
* disable bus master
|
||||||
* bm_check implies we need ARB_DIS
|
* bm_check implies we need ARB_DIS
|
||||||
@@ -518,6 +529,8 @@ static void acpi_processor_idle(void)
|
|||||||
t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
|
t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
|
||||||
/* Invoke C3 */
|
/* Invoke C3 */
|
||||||
acpi_state_timer_broadcast(pr, cx, 1);
|
acpi_state_timer_broadcast(pr, cx, 1);
|
||||||
|
/* Tell the scheduler that we are going deep-idle: */
|
||||||
|
sched_clock_idle_sleep_event();
|
||||||
acpi_cstate_enter(cx);
|
acpi_cstate_enter(cx);
|
||||||
/* Get end time (ticks) */
|
/* Get end time (ticks) */
|
||||||
t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
|
t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
|
||||||
@@ -531,12 +544,17 @@ static void acpi_processor_idle(void)
|
|||||||
/* TSC halts in C3, so notify users */
|
/* TSC halts in C3, so notify users */
|
||||||
mark_tsc_unstable("TSC halts in C3");
|
mark_tsc_unstable("TSC halts in C3");
|
||||||
#endif
|
#endif
|
||||||
|
/* Compute time (ticks) that we were actually asleep */
|
||||||
|
sleep_ticks = ticks_elapsed(t1, t2);
|
||||||
|
/* Tell the scheduler how much we idled: */
|
||||||
|
sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
|
||||||
|
|
||||||
/* Re-enable interrupts */
|
/* Re-enable interrupts */
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
/* Do not account our idle-switching overhead: */
|
||||||
|
sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
|
||||||
|
|
||||||
current_thread_info()->status |= TS_POLLING;
|
current_thread_info()->status |= TS_POLLING;
|
||||||
/* Compute time (ticks) that we were actually asleep */
|
|
||||||
sleep_ticks =
|
|
||||||
ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
|
|
||||||
acpi_state_timer_broadcast(pr, cx, 0);
|
acpi_state_timer_broadcast(pr, cx, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@@ -1388,7 +1388,8 @@ extern void sched_exec(void);
|
|||||||
#define sched_exec() {}
|
#define sched_exec() {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void sched_clock_unstable_event(void);
|
extern void sched_clock_idle_sleep_event(void);
|
||||||
|
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
||||||
|
|
||||||
#ifdef CONFIG_HOTPLUG_CPU
|
#ifdef CONFIG_HOTPLUG_CPU
|
||||||
extern void idle_task_exit(void);
|
extern void idle_task_exit(void);
|
||||||
|
@@ -262,7 +262,8 @@ struct rq {
|
|||||||
s64 clock_max_delta;
|
s64 clock_max_delta;
|
||||||
|
|
||||||
unsigned int clock_warps, clock_overflows;
|
unsigned int clock_warps, clock_overflows;
|
||||||
unsigned int clock_unstable_events;
|
u64 idle_clock;
|
||||||
|
unsigned int clock_deep_idle_events;
|
||||||
u64 tick_timestamp;
|
u64 tick_timestamp;
|
||||||
|
|
||||||
atomic_t nr_iowait;
|
atomic_t nr_iowait;
|
||||||
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CPU frequency is/was unstable - start new by setting prev_clock_raw:
|
* We are going deep-idle (irqs are disabled):
|
||||||
*/
|
*/
|
||||||
void sched_clock_unstable_event(void)
|
void sched_clock_idle_sleep_event(void)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
struct rq *rq = cpu_rq(smp_processor_id());
|
||||||
struct rq *rq;
|
|
||||||
|
|
||||||
rq = task_rq_lock(current, &flags);
|
spin_lock(&rq->lock);
|
||||||
rq->prev_clock_raw = sched_clock();
|
__update_rq_clock(rq);
|
||||||
rq->clock_unstable_events++;
|
spin_unlock(&rq->lock);
|
||||||
task_rq_unlock(rq, &flags);
|
rq->clock_deep_idle_events++;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We just idled delta nanoseconds (called with irqs disabled):
|
||||||
|
*/
|
||||||
|
void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||||
|
{
|
||||||
|
struct rq *rq = cpu_rq(smp_processor_id());
|
||||||
|
u64 now = sched_clock();
|
||||||
|
|
||||||
|
rq->idle_clock += delta_ns;
|
||||||
|
/*
|
||||||
|
* Override the previous timestamp and ignore all
|
||||||
|
* sched_clock() deltas that occured while we idled,
|
||||||
|
* and use the PM-provided delta_ns to advance the
|
||||||
|
* rq clock:
|
||||||
|
*/
|
||||||
|
spin_lock(&rq->lock);
|
||||||
|
rq->prev_clock_raw = now;
|
||||||
|
rq->clock += delta_ns;
|
||||||
|
spin_unlock(&rq->lock);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* resched_task - mark a task 'to be rescheduled now'.
|
* resched_task - mark a task 'to be rescheduled now'.
|
||||||
|
@@ -154,10 +154,11 @@ static void print_cpu(struct seq_file *m, int cpu)
|
|||||||
P(next_balance);
|
P(next_balance);
|
||||||
P(curr->pid);
|
P(curr->pid);
|
||||||
P(clock);
|
P(clock);
|
||||||
|
P(idle_clock);
|
||||||
P(prev_clock_raw);
|
P(prev_clock_raw);
|
||||||
P(clock_warps);
|
P(clock_warps);
|
||||||
P(clock_overflows);
|
P(clock_overflows);
|
||||||
P(clock_unstable_events);
|
P(clock_deep_idle_events);
|
||||||
P(clock_max_delta);
|
P(clock_max_delta);
|
||||||
P(cpu_load[0]);
|
P(cpu_load[0]);
|
||||||
P(cpu_load[1]);
|
P(cpu_load[1]);
|
||||||
|
Reference in New Issue
Block a user