[IA64] implement ticket locks for Itanium

Back in January 2008 Nick Piggin implemented "ticket" spinlocks
for X86 (See commit 314cdbefd1).

IA64 implementation has a couple of differences because of the
available atomic operations ... e.g. we have no fetchadd2 instruction
that operates on a 16-bit quantity so we make ticket locks use
a 32-bit word for each of the current ticket and now-serving values.

Performance on uncontended locks is about 8% worse than the previous
implementation, but this seems a good trade for determinism in the
contended case. Performance impact on macro-level benchmarks is in
the noise.

Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
Tony Luck
2009-09-25 08:42:16 -07:00
parent 53cddfcc0e
commit 2c86963b09
6 changed files with 91 additions and 217 deletions

View File

@@ -1130,95 +1130,6 @@ SET_REG(b5);
#endif /* CONFIG_IA64_BRL_EMU */
#ifdef CONFIG_SMP
/*
* This routine handles spinlock contention. It uses a non-standard calling
* convention to avoid converting leaf routines into interior routines. Because
* of this special convention, there are several restrictions:
*
* - do not use gp relative variables, this code is called from the kernel
* and from modules, r1 is undefined.
* - do not use stacked registers, the caller owns them.
* - do not use the scratch stack space, the caller owns it.
* - do not use any registers other than the ones listed below
*
* Inputs:
* ar.pfs - saved CFM of caller
* ar.ccv - 0 (and available for use)
* r27 - flags from spin_lock_irqsave or 0. Must be preserved.
* r28 - available for use.
* r29 - available for use.
* r30 - available for use.
* r31 - address of lock, available for use.
* b6 - return address
* p14 - available for use.
* p15 - used to track flag status.
*
* If you patch this code to use more registers, do not forget to update
* the clobber lists for spin_lock() in arch/ia64/include/asm/spinlock.h.
*/
#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
.prologue
.save ar.pfs, r0 // this code effectively has a zero frame size
.save rp, r28
.body
nop 0
tbit.nz p15,p0=r27,IA64_PSR_I_BIT
.restore sp // pop existing prologue after next insn
mov b6 = r28
.prologue
.save ar.pfs, r0
.altrp b6
.body
;;
(p15) ssm psr.i // reenable interrupts if they were on
// DavidM says that srlz.d is slow and is not required in this case
.wait:
// exponential backoff, kdb, lockmeter etc. go in here
hint @pause
ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
nop 0
;;
cmp4.ne p14,p0=r30,r0
(p14) br.cond.sptk.few .wait
(p15) rsm psr.i // disable interrupts if we reenabled them
br.cond.sptk.few b6 // lock is now free, try to acquire
.global ia64_spinlock_contention_pre3_4_end // for kernprof
ia64_spinlock_contention_pre3_4_end:
END(ia64_spinlock_contention_pre3_4)
#else
GLOBAL_ENTRY(ia64_spinlock_contention)
.prologue
.altrp b6
.body
tbit.nz p15,p0=r27,IA64_PSR_I_BIT
;;
.wait:
(p15) ssm psr.i // reenable interrupts if they were on
// DavidM says that srlz.d is slow and is not required in this case
.wait2:
// exponential backoff, kdb, lockmeter etc. go in here
hint @pause
ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
;;
cmp4.ne p14,p0=r30,r0
mov r30 = 1
(p14) br.cond.sptk.few .wait2
(p15) rsm psr.i // disable interrupts if we reenabled them
;;
cmpxchg4.acq r30=[r31], r30, ar.ccv
;;
cmp4.ne p14,p0=r0,r30
(p14) br.cond.sptk.few .wait
br.ret.sptk.many b6 // lock is now taken
END(ia64_spinlock_contention)
#endif
#ifdef CONFIG_HOTPLUG_CPU
GLOBAL_ENTRY(ia64_jump_to_sal)