Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: powerpc/booke: Fix breakpoint/watchpoint one-shot behavior powerpc: Reduce printk from pseries_mach_cpu_die() powerpc: Move checks in pseries_mach_cpu_die() powerpc: Reset kernel stack on cpu online from cede state powerpc: Fix G5 thermal shutdown powerpc/pseries: Pass CPPR value to H_XIRR hcall powerpc/booke: Fix a couple typos in the advanced ptrace code powerpc: Fix SMP build with disabled CPU hotplugging. powerpc: Dynamically allocate pacas powerpc/perf: e500 support powerpc/perf: Build callchain code regardless of hardware event support. powerpc/cpm2: Checkpatch cleanup powerpc/86xx: Renaming following split of GE Fanuc joint venture powerpc/86xx: Convert gef_pic_lock to raw_spinlock powerpc/qe: Convert qe_ic_lock to raw_spinlock powerpc/82xx: Convert pci_pic_lock to raw_spinlock powerpc/85xx: Convert socrates_fpga_pic_lock to raw_spinlock
This commit is contained in:
@@ -98,11 +98,16 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
|
||||
|
||||
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
|
||||
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
|
||||
obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
|
||||
|
||||
obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o
|
||||
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
|
||||
power5+-pmu.o power6-pmu.o power7-pmu.o
|
||||
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
|
||||
|
||||
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o
|
||||
obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o
|
||||
|
||||
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
|
||||
|
||||
ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
|
||||
|
@@ -1808,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
|
||||
.icache_bsize = 64,
|
||||
.dcache_bsize = 64,
|
||||
.num_pmcs = 4,
|
||||
.oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */
|
||||
.oprofile_cpu_type = "ppc/e500mc",
|
||||
.oprofile_type = PPC_OPROFILE_FSL_EMB,
|
||||
.cpu_setup = __setup_cpu_e500mc,
|
||||
.machine_check = machine_check_e500,
|
||||
|
129
arch/powerpc/kernel/e500-pmu.c
Normal file
129
arch/powerpc/kernel/e500-pmu.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Performance counter support for e500 family processors.
|
||||
*
|
||||
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
|
||||
* Copyright 2010 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <linux/string.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/cputable.h>
|
||||
|
||||
/*
|
||||
* Map of generic hardware event types to hardware events
|
||||
* Zero if unsupported
|
||||
*/
|
||||
static int e500_generic_events[] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 1,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 2,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 15,
|
||||
};
|
||||
|
||||
#define C(x) PERF_COUNT_HW_CACHE_##x
|
||||
|
||||
/*
|
||||
* Table of generalized cache-related events.
|
||||
* 0 means not supported, -1 means nonsensical, other values
|
||||
* are event codes.
|
||||
*/
|
||||
static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
|
||||
/*
|
||||
* D-cache misses are not split into read/write/prefetch;
|
||||
* use raw event 41.
|
||||
*/
|
||||
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
|
||||
[C(OP_READ)] = { 27, 0 },
|
||||
[C(OP_WRITE)] = { 28, 0 },
|
||||
[C(OP_PREFETCH)] = { 29, 0 },
|
||||
},
|
||||
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
|
||||
[C(OP_READ)] = { 2, 60 },
|
||||
[C(OP_WRITE)] = { -1, -1 },
|
||||
[C(OP_PREFETCH)] = { 0, 0 },
|
||||
},
|
||||
/*
|
||||
* Assuming LL means L2, it's not a good match for this model.
|
||||
* It allocates only on L1 castout or explicit prefetch, and
|
||||
* does not have separate read/write events (but it does have
|
||||
* separate instruction/data events).
|
||||
*/
|
||||
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
|
||||
[C(OP_READ)] = { 0, 0 },
|
||||
[C(OP_WRITE)] = { 0, 0 },
|
||||
[C(OP_PREFETCH)] = { 0, 0 },
|
||||
},
|
||||
/*
|
||||
* There are data/instruction MMU misses, but that's a miss on
|
||||
* the chip's internal level-one TLB which is probably not
|
||||
* what the user wants. Instead, unified level-two TLB misses
|
||||
* are reported here.
|
||||
*/
|
||||
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
|
||||
[C(OP_READ)] = { 26, 66 },
|
||||
[C(OP_WRITE)] = { -1, -1 },
|
||||
[C(OP_PREFETCH)] = { -1, -1 },
|
||||
},
|
||||
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
|
||||
[C(OP_READ)] = { 12, 15 },
|
||||
[C(OP_WRITE)] = { -1, -1 },
|
||||
[C(OP_PREFETCH)] = { -1, -1 },
|
||||
},
|
||||
};
|
||||
|
||||
static int num_events = 128;
|
||||
|
||||
/* Upper half of event id is PMLCb, for threshold events */
|
||||
static u64 e500_xlate_event(u64 event_id)
|
||||
{
|
||||
u32 event_low = (u32)event_id;
|
||||
u64 ret;
|
||||
|
||||
if (event_low >= num_events)
|
||||
return 0;
|
||||
|
||||
ret = FSL_EMB_EVENT_VALID;
|
||||
|
||||
if (event_low >= 76 && event_low <= 81) {
|
||||
ret |= FSL_EMB_EVENT_RESTRICTED;
|
||||
ret |= event_id &
|
||||
(FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH);
|
||||
} else if (event_id &
|
||||
(FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) {
|
||||
/* Threshold requested on non-threshold event */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct fsl_emb_pmu e500_pmu = {
|
||||
.name = "e500 family",
|
||||
.n_counter = 4,
|
||||
.n_restricted = 2,
|
||||
.xlate_event = e500_xlate_event,
|
||||
.n_generic = ARRAY_SIZE(e500_generic_events),
|
||||
.generic_events = e500_generic_events,
|
||||
.cache_events = &e500_cache_events,
|
||||
};
|
||||
|
||||
static int init_e500_pmu(void)
|
||||
{
|
||||
if (!cur_cpu_spec->oprofile_cpu_type)
|
||||
return -ENODEV;
|
||||
|
||||
if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
|
||||
num_events = 256;
|
||||
else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
|
||||
return -ENODEV;
|
||||
|
||||
return register_fsl_emb_pmu(&e500_pmu);
|
||||
}
|
||||
|
||||
arch_initcall(init_e500_pmu);
|
@@ -219,7 +219,8 @@ generic_secondary_common_init:
|
||||
* physical cpu id in r24, we need to search the pacas to find
|
||||
* which logical id maps to our physical one.
|
||||
*/
|
||||
LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */
|
||||
LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
|
||||
ld r13,0(r13) /* Get base vaddr of paca array */
|
||||
li r5,0 /* logical cpu id */
|
||||
1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
|
||||
cmpw r6,r24 /* Compare to our id */
|
||||
@@ -536,7 +537,8 @@ _GLOBAL(pmac_secondary_start)
|
||||
mtmsrd r3 /* RI on */
|
||||
|
||||
/* Set up a paca value for this processor. */
|
||||
LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */
|
||||
LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
|
||||
ld r4,0(r4) /* Get base vaddr of paca array */
|
||||
mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
|
||||
add r13,r13,r4 /* for this processor. */
|
||||
mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/
|
||||
@@ -615,6 +617,17 @@ _GLOBAL(start_secondary_prolog)
|
||||
std r3,0(r1) /* Zero the stack frame pointer */
|
||||
bl .start_secondary
|
||||
b .
|
||||
/*
|
||||
* Reset stack pointer and call start_secondary
|
||||
* to continue with online operation when woken up
|
||||
* from cede in cpu offline.
|
||||
*/
|
||||
_GLOBAL(start_secondary_resume)
|
||||
ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
|
||||
li r3,0
|
||||
std r3,0(r1) /* Zero the stack frame pointer */
|
||||
bl .start_secondary
|
||||
b .
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@@ -9,11 +9,15 @@
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/lmb.h>
|
||||
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/lppaca.h>
|
||||
#include <asm/paca.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/iseries/lpar_map.h>
|
||||
#include <asm/iseries/hv_types.h>
|
||||
|
||||
/* This symbol is provided by the linker - let it fill in the paca
|
||||
* field correctly */
|
||||
@@ -70,37 +74,82 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = {
|
||||
* processors. The processor VPD array needs one entry per physical
|
||||
* processor (not thread).
|
||||
*/
|
||||
struct paca_struct paca[NR_CPUS];
|
||||
struct paca_struct *paca;
|
||||
EXPORT_SYMBOL(paca);
|
||||
|
||||
void __init initialise_pacas(void)
|
||||
{
|
||||
int cpu;
|
||||
struct paca_struct boot_paca;
|
||||
|
||||
/* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
|
||||
* of the TOC can be addressed using a single machine instruction.
|
||||
*/
|
||||
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
|
||||
{
|
||||
/* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
|
||||
* of the TOC can be addressed using a single machine instruction.
|
||||
*/
|
||||
unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
|
||||
|
||||
/* Can't use for_each_*_cpu, as they aren't functional yet */
|
||||
for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
||||
struct paca_struct *new_paca = &paca[cpu];
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
new_paca->lppaca_ptr = &lppaca[cpu];
|
||||
new_paca->lppaca_ptr = &lppaca[cpu];
|
||||
#else
|
||||
new_paca->kernel_pgd = swapper_pg_dir;
|
||||
new_paca->kernel_pgd = swapper_pg_dir;
|
||||
#endif
|
||||
new_paca->lock_token = 0x8000;
|
||||
new_paca->paca_index = cpu;
|
||||
new_paca->kernel_toc = kernel_toc;
|
||||
new_paca->kernelbase = (unsigned long) _stext;
|
||||
new_paca->kernel_msr = MSR_KERNEL;
|
||||
new_paca->hw_cpu_id = 0xffff;
|
||||
new_paca->__current = &init_task;
|
||||
new_paca->lock_token = 0x8000;
|
||||
new_paca->paca_index = cpu;
|
||||
new_paca->kernel_toc = kernel_toc;
|
||||
new_paca->kernelbase = (unsigned long) _stext;
|
||||
new_paca->kernel_msr = MSR_KERNEL;
|
||||
new_paca->hw_cpu_id = 0xffff;
|
||||
new_paca->__current = &init_task;
|
||||
#ifdef CONFIG_PPC_STD_MMU_64
|
||||
new_paca->slb_shadow_ptr = &slb_shadow[cpu];
|
||||
new_paca->slb_shadow_ptr = &slb_shadow[cpu];
|
||||
#endif /* CONFIG_PPC_STD_MMU_64 */
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static int __initdata paca_size;
|
||||
|
||||
void __init allocate_pacas(void)
|
||||
{
|
||||
int nr_cpus, cpu, limit;
|
||||
|
||||
/*
|
||||
* We can't take SLB misses on the paca, and we want to access them
|
||||
* in real mode, so allocate them within the RMA and also within
|
||||
* the first segment. On iSeries they must be within the area mapped
|
||||
* by the HV, which is HvPagesToMap * HVPAGESIZE bytes.
|
||||
*/
|
||||
limit = min(0x10000000ULL, lmb.rmo_size);
|
||||
if (firmware_has_feature(FW_FEATURE_ISERIES))
|
||||
limit = min(limit, HvPagesToMap * HVPAGESIZE);
|
||||
|
||||
nr_cpus = NR_CPUS;
|
||||
/* On iSeries we know we can never have more than 64 cpus */
|
||||
if (firmware_has_feature(FW_FEATURE_ISERIES))
|
||||
nr_cpus = min(64, nr_cpus);
|
||||
|
||||
paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus);
|
||||
|
||||
paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit));
|
||||
memset(paca, 0, paca_size);
|
||||
|
||||
printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
|
||||
paca_size, nr_cpus, paca);
|
||||
|
||||
/* Can't use for_each_*_cpu, as they aren't functional yet */
|
||||
for (cpu = 0; cpu < nr_cpus; cpu++)
|
||||
initialise_paca(&paca[cpu], cpu);
|
||||
}
|
||||
|
||||
void __init free_unused_pacas(void)
|
||||
{
|
||||
int new_size;
|
||||
|
||||
new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus());
|
||||
|
||||
if (new_size >= paca_size)
|
||||
return;
|
||||
|
||||
lmb_free(__pa(paca) + new_size, paca_size - new_size);
|
||||
|
||||
printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
|
||||
paca_size - new_size);
|
||||
|
||||
paca_size = new_size;
|
||||
}
|
||||
|
654
arch/powerpc/kernel/perf_event_fsl_emb.c
Normal file
654
arch/powerpc/kernel/perf_event_fsl_emb.c
Normal file
@@ -0,0 +1,654 @@
|
||||
/*
|
||||
* Performance event support - Freescale Embedded Performance Monitor
|
||||
*
|
||||
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
|
||||
* Copyright 2010 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <asm/reg_fsl_emb.h>
|
||||
#include <asm/pmc.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
struct cpu_hw_events {
|
||||
int n_events;
|
||||
int disabled;
|
||||
u8 pmcs_enabled;
|
||||
struct perf_event *event[MAX_HWEVENTS];
|
||||
};
|
||||
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
|
||||
|
||||
static struct fsl_emb_pmu *ppmu;
|
||||
|
||||
/* Number of perf_events counting hardware events */
|
||||
static atomic_t num_events;
|
||||
/* Used to avoid races in calling reserve/release_pmc_hardware */
|
||||
static DEFINE_MUTEX(pmc_reserve_mutex);
|
||||
|
||||
/*
|
||||
* If interrupts were soft-disabled when a PMU interrupt occurs, treat
|
||||
* it as an NMI.
|
||||
*/
|
||||
static inline int perf_intr_is_nmi(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef __powerpc64__
|
||||
return !regs->softe;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void perf_event_interrupt(struct pt_regs *regs);
|
||||
|
||||
/*
|
||||
* Read one performance monitor counter (PMC).
|
||||
*/
|
||||
static unsigned long read_pmc(int idx)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
switch (idx) {
|
||||
case 0:
|
||||
val = mfpmr(PMRN_PMC0);
|
||||
break;
|
||||
case 1:
|
||||
val = mfpmr(PMRN_PMC1);
|
||||
break;
|
||||
case 2:
|
||||
val = mfpmr(PMRN_PMC2);
|
||||
break;
|
||||
case 3:
|
||||
val = mfpmr(PMRN_PMC3);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "oops trying to read PMC%d\n", idx);
|
||||
val = 0;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write one PMC.
|
||||
*/
|
||||
static void write_pmc(int idx, unsigned long val)
|
||||
{
|
||||
switch (idx) {
|
||||
case 0:
|
||||
mtpmr(PMRN_PMC0, val);
|
||||
break;
|
||||
case 1:
|
||||
mtpmr(PMRN_PMC1, val);
|
||||
break;
|
||||
case 2:
|
||||
mtpmr(PMRN_PMC2, val);
|
||||
break;
|
||||
case 3:
|
||||
mtpmr(PMRN_PMC3, val);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "oops trying to write PMC%d\n", idx);
|
||||
}
|
||||
|
||||
isync();
|
||||
}
|
||||
|
||||
/*
|
||||
* Write one local control A register
|
||||
*/
|
||||
static void write_pmlca(int idx, unsigned long val)
|
||||
{
|
||||
switch (idx) {
|
||||
case 0:
|
||||
mtpmr(PMRN_PMLCA0, val);
|
||||
break;
|
||||
case 1:
|
||||
mtpmr(PMRN_PMLCA1, val);
|
||||
break;
|
||||
case 2:
|
||||
mtpmr(PMRN_PMLCA2, val);
|
||||
break;
|
||||
case 3:
|
||||
mtpmr(PMRN_PMLCA3, val);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
|
||||
}
|
||||
|
||||
isync();
|
||||
}
|
||||
|
||||
/*
|
||||
* Write one local control B register
|
||||
*/
|
||||
static void write_pmlcb(int idx, unsigned long val)
|
||||
{
|
||||
switch (idx) {
|
||||
case 0:
|
||||
mtpmr(PMRN_PMLCB0, val);
|
||||
break;
|
||||
case 1:
|
||||
mtpmr(PMRN_PMLCB1, val);
|
||||
break;
|
||||
case 2:
|
||||
mtpmr(PMRN_PMLCB2, val);
|
||||
break;
|
||||
case 3:
|
||||
mtpmr(PMRN_PMLCB3, val);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
|
||||
}
|
||||
|
||||
isync();
|
||||
}
|
||||
|
||||
static void fsl_emb_pmu_read(struct perf_event *event)
|
||||
{
|
||||
s64 val, delta, prev;
|
||||
|
||||
/*
|
||||
* Performance monitor interrupts come even when interrupts
|
||||
* are soft-disabled, as long as interrupts are hard-enabled.
|
||||
* Therefore we treat them like NMIs.
|
||||
*/
|
||||
do {
|
||||
prev = atomic64_read(&event->hw.prev_count);
|
||||
barrier();
|
||||
val = read_pmc(event->hw.idx);
|
||||
} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
|
||||
|
||||
/* The counters are only 32 bits wide */
|
||||
delta = (val - prev) & 0xfffffffful;
|
||||
atomic64_add(delta, &event->count);
|
||||
atomic64_sub(delta, &event->hw.period_left);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable all events to prevent PMU interrupts and to allow
|
||||
* events to be added or removed.
|
||||
*/
|
||||
void hw_perf_disable(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
cpuhw = &__get_cpu_var(cpu_hw_events);
|
||||
|
||||
if (!cpuhw->disabled) {
|
||||
cpuhw->disabled = 1;
|
||||
|
||||
/*
|
||||
* Check if we ever enabled the PMU on this cpu.
|
||||
*/
|
||||
if (!cpuhw->pmcs_enabled) {
|
||||
ppc_enable_pmcs();
|
||||
cpuhw->pmcs_enabled = 1;
|
||||
}
|
||||
|
||||
if (atomic_read(&num_events)) {
|
||||
/*
|
||||
* Set the 'freeze all counters' bit, and disable
|
||||
* interrupts. The barrier is to make sure the
|
||||
* mtpmr has been executed and the PMU has frozen
|
||||
* the events before we return.
|
||||
*/
|
||||
|
||||
mtpmr(PMRN_PMGC0, PMGC0_FAC);
|
||||
isync();
|
||||
}
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-enable all events if disable == 0.
|
||||
* If we were previously disabled and events were added, then
|
||||
* put the new config on the PMU.
|
||||
*/
|
||||
void hw_perf_enable(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
cpuhw = &__get_cpu_var(cpu_hw_events);
|
||||
if (!cpuhw->disabled)
|
||||
goto out;
|
||||
|
||||
cpuhw->disabled = 0;
|
||||
ppc_set_pmu_inuse(cpuhw->n_events != 0);
|
||||
|
||||
if (cpuhw->n_events > 0) {
|
||||
mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
|
||||
isync();
|
||||
}
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int collect_events(struct perf_event *group, int max_count,
|
||||
struct perf_event *ctrs[])
|
||||
{
|
||||
int n = 0;
|
||||
struct perf_event *event;
|
||||
|
||||
if (!is_software_event(group)) {
|
||||
if (n >= max_count)
|
||||
return -1;
|
||||
ctrs[n] = group;
|
||||
n++;
|
||||
}
|
||||
list_for_each_entry(event, &group->sibling_list, group_entry) {
|
||||
if (!is_software_event(event) &&
|
||||
event->state != PERF_EVENT_STATE_OFF) {
|
||||
if (n >= max_count)
|
||||
return -1;
|
||||
ctrs[n] = event;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/* perf must be disabled, context locked on entry */
|
||||
static int fsl_emb_pmu_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
int ret = -EAGAIN;
|
||||
int num_counters = ppmu->n_counter;
|
||||
u64 val;
|
||||
int i;
|
||||
|
||||
cpuhw = &get_cpu_var(cpu_hw_events);
|
||||
|
||||
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
|
||||
num_counters = ppmu->n_restricted;
|
||||
|
||||
/*
|
||||
* Allocate counters from top-down, so that restricted-capable
|
||||
* counters are kept free as long as possible.
|
||||
*/
|
||||
for (i = num_counters - 1; i >= 0; i--) {
|
||||
if (cpuhw->event[i])
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (i < 0)
|
||||
goto out;
|
||||
|
||||
event->hw.idx = i;
|
||||
cpuhw->event[i] = event;
|
||||
++cpuhw->n_events;
|
||||
|
||||
val = 0;
|
||||
if (event->hw.sample_period) {
|
||||
s64 left = atomic64_read(&event->hw.period_left);
|
||||
if (left < 0x80000000L)
|
||||
val = 0x80000000L - left;
|
||||
}
|
||||
atomic64_set(&event->hw.prev_count, val);
|
||||
write_pmc(i, val);
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
write_pmlcb(i, event->hw.config >> 32);
|
||||
write_pmlca(i, event->hw.config_base);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
put_cpu_var(cpu_hw_events);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* perf must be disabled, context locked on entry */
|
||||
static void fsl_emb_pmu_disable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
int i = event->hw.idx;
|
||||
|
||||
if (i < 0)
|
||||
goto out;
|
||||
|
||||
fsl_emb_pmu_read(event);
|
||||
|
||||
cpuhw = &get_cpu_var(cpu_hw_events);
|
||||
|
||||
WARN_ON(event != cpuhw->event[event->hw.idx]);
|
||||
|
||||
write_pmlca(i, 0);
|
||||
write_pmlcb(i, 0);
|
||||
write_pmc(i, 0);
|
||||
|
||||
cpuhw->event[i] = NULL;
|
||||
event->hw.idx = -1;
|
||||
|
||||
/*
|
||||
* TODO: if at least one restricted event exists, and we
|
||||
* just freed up a non-restricted-capable counter, and
|
||||
* there is a restricted-capable counter occupied by
|
||||
* a non-restricted event, migrate that event to the
|
||||
* vacated counter.
|
||||
*/
|
||||
|
||||
cpuhw->n_events--;
|
||||
|
||||
out:
|
||||
put_cpu_var(cpu_hw_events);
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-enable interrupts on a event after they were throttled
|
||||
* because they were coming too fast.
|
||||
*
|
||||
* Context is locked on entry, but perf is not disabled.
|
||||
*/
|
||||
static void fsl_emb_pmu_unthrottle(struct perf_event *event)
|
||||
{
|
||||
s64 val, left;
|
||||
unsigned long flags;
|
||||
|
||||
if (event->hw.idx < 0 || !event->hw.sample_period)
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
perf_disable();
|
||||
fsl_emb_pmu_read(event);
|
||||
left = event->hw.sample_period;
|
||||
event->hw.last_period = left;
|
||||
val = 0;
|
||||
if (left < 0x80000000L)
|
||||
val = 0x80000000L - left;
|
||||
write_pmc(event->hw.idx, val);
|
||||
atomic64_set(&event->hw.prev_count, val);
|
||||
atomic64_set(&event->hw.period_left, left);
|
||||
perf_event_update_userpage(event);
|
||||
perf_enable();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static struct pmu fsl_emb_pmu = {
|
||||
.enable = fsl_emb_pmu_enable,
|
||||
.disable = fsl_emb_pmu_disable,
|
||||
.read = fsl_emb_pmu_read,
|
||||
.unthrottle = fsl_emb_pmu_unthrottle,
|
||||
};
|
||||
|
||||
/*
|
||||
* Release the PMU if this is the last perf_event.
|
||||
*/
|
||||
static void hw_perf_event_destroy(struct perf_event *event)
|
||||
{
|
||||
if (!atomic_add_unless(&num_events, -1, 1)) {
|
||||
mutex_lock(&pmc_reserve_mutex);
|
||||
if (atomic_dec_return(&num_events) == 0)
|
||||
release_pmc_hardware();
|
||||
mutex_unlock(&pmc_reserve_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate a generic cache event_id config to a raw event_id code.
|
||||
*/
|
||||
static int hw_perf_cache_event(u64 config, u64 *eventp)
|
||||
{
|
||||
unsigned long type, op, result;
|
||||
int ev;
|
||||
|
||||
if (!ppmu->cache_events)
|
||||
return -EINVAL;
|
||||
|
||||
/* unpack config */
|
||||
type = config & 0xff;
|
||||
op = (config >> 8) & 0xff;
|
||||
result = (config >> 16) & 0xff;
|
||||
|
||||
if (type >= PERF_COUNT_HW_CACHE_MAX ||
|
||||
op >= PERF_COUNT_HW_CACHE_OP_MAX ||
|
||||
result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
ev = (*ppmu->cache_events)[type][op][result];
|
||||
if (ev == 0)
|
||||
return -EOPNOTSUPP;
|
||||
if (ev == -1)
|
||||
return -EINVAL;
|
||||
*eventp = ev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct pmu *hw_perf_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 ev;
|
||||
struct perf_event *events[MAX_HWEVENTS];
|
||||
int n;
|
||||
int err;
|
||||
int num_restricted;
|
||||
int i;
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
ev = event->attr.config;
|
||||
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
ev = ppmu->generic_events[ev];
|
||||
break;
|
||||
|
||||
case PERF_TYPE_HW_CACHE:
|
||||
err = hw_perf_cache_event(event->attr.config, &ev);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
break;
|
||||
|
||||
case PERF_TYPE_RAW:
|
||||
ev = event->attr.config;
|
||||
break;
|
||||
|
||||
default:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
event->hw.config = ppmu->xlate_event(ev);
|
||||
if (!(event->hw.config & FSL_EMB_EVENT_VALID))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/*
|
||||
* If this is in a group, check if it can go on with all the
|
||||
* other hardware events in the group. We assume the event
|
||||
* hasn't been linked into its leader's sibling list at this point.
|
||||
*/
|
||||
n = 0;
|
||||
if (event->group_leader != event) {
|
||||
n = collect_events(event->group_leader,
|
||||
ppmu->n_counter - 1, events);
|
||||
if (n < 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
|
||||
num_restricted = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
|
||||
num_restricted++;
|
||||
}
|
||||
|
||||
if (num_restricted >= ppmu->n_restricted)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
event->hw.idx = -1;
|
||||
|
||||
event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
|
||||
(u32)((ev << 16) & PMLCA_EVENT_MASK);
|
||||
|
||||
if (event->attr.exclude_user)
|
||||
event->hw.config_base |= PMLCA_FCU;
|
||||
if (event->attr.exclude_kernel)
|
||||
event->hw.config_base |= PMLCA_FCS;
|
||||
if (event->attr.exclude_idle)
|
||||
return ERR_PTR(-ENOTSUPP);
|
||||
|
||||
event->hw.last_period = event->hw.sample_period;
|
||||
atomic64_set(&event->hw.period_left, event->hw.last_period);
|
||||
|
||||
/*
|
||||
* See if we need to reserve the PMU.
|
||||
* If no events are currently in use, then we have to take a
|
||||
* mutex to ensure that we don't race with another task doing
|
||||
* reserve_pmc_hardware or release_pmc_hardware.
|
||||
*/
|
||||
err = 0;
|
||||
if (!atomic_inc_not_zero(&num_events)) {
|
||||
mutex_lock(&pmc_reserve_mutex);
|
||||
if (atomic_read(&num_events) == 0 &&
|
||||
reserve_pmc_hardware(perf_event_interrupt))
|
||||
err = -EBUSY;
|
||||
else
|
||||
atomic_inc(&num_events);
|
||||
mutex_unlock(&pmc_reserve_mutex);
|
||||
|
||||
mtpmr(PMRN_PMGC0, PMGC0_FAC);
|
||||
isync();
|
||||
}
|
||||
event->destroy = hw_perf_event_destroy;
|
||||
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
return &fsl_emb_pmu;
|
||||
}
|
||||
|
||||
/*
|
||||
* A counter has overflowed; update its count and record
|
||||
* things if requested. Note that interrupts are hard-disabled
|
||||
* here so there is no possibility of being interrupted.
|
||||
*/
|
||||
static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
struct pt_regs *regs, int nmi)
|
||||
{
|
||||
u64 period = event->hw.sample_period;
|
||||
s64 prev, delta, left;
|
||||
int record = 0;
|
||||
|
||||
/* we don't have to worry about interrupts here */
|
||||
prev = atomic64_read(&event->hw.prev_count);
|
||||
delta = (val - prev) & 0xfffffffful;
|
||||
atomic64_add(delta, &event->count);
|
||||
|
||||
/*
|
||||
* See if the total period for this event has expired,
|
||||
* and update for the next period.
|
||||
*/
|
||||
val = 0;
|
||||
left = atomic64_read(&event->hw.period_left) - delta;
|
||||
if (period) {
|
||||
if (left <= 0) {
|
||||
left += period;
|
||||
if (left <= 0)
|
||||
left = period;
|
||||
record = 1;
|
||||
}
|
||||
if (left < 0x80000000LL)
|
||||
val = 0x80000000LL - left;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally record data if requested.
|
||||
*/
|
||||
if (record) {
|
||||
struct perf_sample_data data = {
|
||||
.period = event->hw.last_period,
|
||||
};
|
||||
|
||||
if (perf_event_overflow(event, nmi, &data, regs)) {
|
||||
/*
|
||||
* Interrupts are coming too fast - throttle them
|
||||
* by setting the event to 0, so it will be
|
||||
* at least 2^30 cycles until the next interrupt
|
||||
* (assuming each event counts at most 2 counts
|
||||
* per cycle).
|
||||
*/
|
||||
val = 0;
|
||||
left = ~0ULL >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
write_pmc(event->hw.idx, val);
|
||||
atomic64_set(&event->hw.prev_count, val);
|
||||
atomic64_set(&event->hw.period_left, left);
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void perf_event_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
int i;
|
||||
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
|
||||
struct perf_event *event;
|
||||
unsigned long val;
|
||||
int found = 0;
|
||||
int nmi;
|
||||
|
||||
nmi = perf_intr_is_nmi(regs);
|
||||
if (nmi)
|
||||
nmi_enter();
|
||||
else
|
||||
irq_enter();
|
||||
|
||||
for (i = 0; i < ppmu->n_counter; ++i) {
|
||||
event = cpuhw->event[i];
|
||||
|
||||
val = read_pmc(i);
|
||||
if ((int)val < 0) {
|
||||
if (event) {
|
||||
/* event has overflowed */
|
||||
found = 1;
|
||||
record_and_restart(event, val, regs, nmi);
|
||||
} else {
|
||||
/*
|
||||
* Disabled counter is negative,
|
||||
* reset it just in case.
|
||||
*/
|
||||
write_pmc(i, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* PMM will keep counters frozen until we return from the interrupt. */
|
||||
mtmsr(mfmsr() | MSR_PMM);
|
||||
mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
|
||||
isync();
|
||||
|
||||
if (nmi)
|
||||
nmi_exit();
|
||||
else
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
void hw_perf_event_setup(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
memset(cpuhw, 0, sizeof(*cpuhw));
|
||||
}
|
||||
|
||||
int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
|
||||
{
|
||||
if (ppmu)
|
||||
return -EBUSY; /* something's already registered */
|
||||
|
||||
ppmu = pmu;
|
||||
pr_info("%s performance monitor hardware support registered\n",
|
||||
pmu->name);
|
||||
|
||||
return 0;
|
||||
}
|
@@ -43,6 +43,7 @@
|
||||
#include <asm/smp.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/paca.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pci.h>
|
||||
#include <asm/iommu.h>
|
||||
@@ -721,6 +722,8 @@ void __init early_init_devtree(void *params)
|
||||
* FIXME .. and the initrd too? */
|
||||
move_device_tree();
|
||||
|
||||
allocate_pacas();
|
||||
|
||||
DBG("Scanning CPUs ...\n");
|
||||
|
||||
/* Retreive CPU related informations from the flat tree
|
||||
|
@@ -940,7 +940,7 @@ static int del_instruction_bp(struct task_struct *child, int slot)
|
||||
{
|
||||
switch (slot) {
|
||||
case 1:
|
||||
if (child->thread.iac1 == 0)
|
||||
if ((child->thread.dbcr0 & DBCR0_IAC1) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
|
||||
@@ -952,7 +952,7 @@ static int del_instruction_bp(struct task_struct *child, int slot)
|
||||
child->thread.dbcr0 &= ~DBCR0_IAC1;
|
||||
break;
|
||||
case 2:
|
||||
if (child->thread.iac2 == 0)
|
||||
if ((child->thread.dbcr0 & DBCR0_IAC2) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
if (dbcr_iac_range(child) & DBCR_IAC12MODE)
|
||||
@@ -963,7 +963,7 @@ static int del_instruction_bp(struct task_struct *child, int slot)
|
||||
break;
|
||||
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
|
||||
case 3:
|
||||
if (child->thread.iac3 == 0)
|
||||
if ((child->thread.dbcr0 & DBCR0_IAC3) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
|
||||
@@ -975,7 +975,7 @@ static int del_instruction_bp(struct task_struct *child, int slot)
|
||||
child->thread.dbcr0 &= ~DBCR0_IAC3;
|
||||
break;
|
||||
case 4:
|
||||
if (child->thread.iac4 == 0)
|
||||
if ((child->thread.dbcr0 & DBCR0_IAC4) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
if (dbcr_iac_range(child) & DBCR_IAC34MODE)
|
||||
@@ -1054,7 +1054,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
|
||||
static int del_dac(struct task_struct *child, int slot)
|
||||
{
|
||||
if (slot == 1) {
|
||||
if (child->thread.dac1 == 0)
|
||||
if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
child->thread.dac1 = 0;
|
||||
@@ -1070,7 +1070,7 @@ static int del_dac(struct task_struct *child, int slot)
|
||||
child->thread.dvc1 = 0;
|
||||
#endif
|
||||
} else if (slot == 2) {
|
||||
if (child->thread.dac1 == 0)
|
||||
if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include <linux/lmb.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/paca.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/vdso_datapage.h>
|
||||
@@ -493,6 +494,8 @@ void __init smp_setup_cpu_maps(void)
|
||||
* here will have to be reworked
|
||||
*/
|
||||
cpu_init_thread_core_maps(nthreads);
|
||||
|
||||
free_unused_pacas();
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
|
@@ -144,9 +144,9 @@ early_param("smt-enabled", early_smt_enabled);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/* Put the paca pointer into r13 and SPRG_PACA */
|
||||
void __init setup_paca(int cpu)
|
||||
static void __init setup_paca(struct paca_struct *new_paca)
|
||||
{
|
||||
local_paca = &paca[cpu];
|
||||
local_paca = new_paca;
|
||||
mtspr(SPRN_SPRG_PACA, local_paca);
|
||||
#ifdef CONFIG_PPC_BOOK3E
|
||||
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
|
||||
@@ -176,14 +176,12 @@ void __init early_setup(unsigned long dt_ptr)
|
||||
{
|
||||
/* -------- printk is _NOT_ safe to use here ! ------- */
|
||||
|
||||
/* Fill in any unititialised pacas */
|
||||
initialise_pacas();
|
||||
|
||||
/* Identify CPU type */
|
||||
identify_cpu(0, mfspr(SPRN_PVR));
|
||||
|
||||
/* Assume we're on cpu 0 for now. Don't write to the paca yet! */
|
||||
setup_paca(0);
|
||||
initialise_paca(&boot_paca, 0);
|
||||
setup_paca(&boot_paca);
|
||||
|
||||
/* Initialize lockdep early or else spinlocks will blow */
|
||||
lockdep_init();
|
||||
@@ -203,7 +201,7 @@ void __init early_setup(unsigned long dt_ptr)
|
||||
early_init_devtree(__va(dt_ptr));
|
||||
|
||||
/* Now we know the logical id of our boot cpu, setup the paca. */
|
||||
setup_paca(boot_cpuid);
|
||||
setup_paca(&paca[boot_cpuid]);
|
||||
|
||||
/* Fix up paca fields required for the boot cpu */
|
||||
get_paca()->cpu_start = 1;
|
||||
|
Reference in New Issue
Block a user