MIPS: Optimize spinlocks.
The current locking mechanism uses a ll/sc sequence to release a spinlock. This is slower than a wmb() followed by a store to unlock. The branching forward to .subsection 2 on sc failure slows down the contended case. So we get rid of that part too. Since we are now working on naturally aligned u16 values, we can get rid of a masking operation as the LHU already does the right thing. The ANDI are reversed for better scheduling on multi-issue CPUs On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%. Signed-off-by: David Daney <ddaney@caviumnetworks.com> To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/937/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
committed by
Ralf Baechle
parent
e275ed5ee9
commit
500c2e1fdb
@@ -168,8 +168,14 @@
|
|||||||
|
|
||||||
#ifdef CONFIG_CPU_CAVIUM_OCTEON
|
#ifdef CONFIG_CPU_CAVIUM_OCTEON
|
||||||
#define smp_mb__before_llsc() smp_wmb()
|
#define smp_mb__before_llsc() smp_wmb()
|
||||||
|
/* Cause previous writes to become visible on all CPUs as soon as possible */
|
||||||
|
#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
|
||||||
|
".set arch=octeon\n\t" \
|
||||||
|
"syncw\n\t" \
|
||||||
|
".set pop" : : : "memory")
|
||||||
#else
|
#else
|
||||||
#define smp_mb__before_llsc() smp_llsc_mb()
|
#define smp_mb__before_llsc() smp_llsc_mb()
|
||||||
|
#define nudge_writes() mb()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* __ASM_BARRIER_H */
|
#endif /* __ASM_BARRIER_H */
|
||||||
|
@@ -36,9 +36,9 @@
|
|||||||
|
|
||||||
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
unsigned int counters = ACCESS_ONCE(lock->lock);
|
u32 counters = ACCESS_ONCE(lock->lock);
|
||||||
|
|
||||||
return ((counters >> 14) ^ counters) & 0x1fff;
|
return ((counters >> 16) ^ counters) & 0xffff;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||||
@@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
|||||||
|
|
||||||
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
unsigned int counters = ACCESS_ONCE(lock->lock);
|
u32 counters = ACCESS_ONCE(lock->lock);
|
||||||
|
|
||||||
return (((counters >> 14) - counters) & 0x1fff) > 1;
|
return (((counters >> 16) - counters) & 0xffff) > 1;
|
||||||
}
|
}
|
||||||
#define arch_spin_is_contended arch_spin_is_contended
|
#define arch_spin_is_contended arch_spin_is_contended
|
||||||
|
|
||||||
@@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|||||||
{
|
{
|
||||||
int my_ticket;
|
int my_ticket;
|
||||||
int tmp;
|
int tmp;
|
||||||
|
int inc = 0x10000;
|
||||||
|
|
||||||
if (R10000_LLSC_WAR) {
|
if (R10000_LLSC_WAR) {
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
@@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|||||||
" .set noreorder \n"
|
" .set noreorder \n"
|
||||||
" \n"
|
" \n"
|
||||||
"1: ll %[ticket], %[ticket_ptr] \n"
|
"1: ll %[ticket], %[ticket_ptr] \n"
|
||||||
" addiu %[my_ticket], %[ticket], 0x4000 \n"
|
" addu %[my_ticket], %[ticket], %[inc] \n"
|
||||||
" sc %[my_ticket], %[ticket_ptr] \n"
|
" sc %[my_ticket], %[ticket_ptr] \n"
|
||||||
" beqzl %[my_ticket], 1b \n"
|
" beqzl %[my_ticket], 1b \n"
|
||||||
" nop \n"
|
" nop \n"
|
||||||
" srl %[my_ticket], %[ticket], 14 \n"
|
" srl %[my_ticket], %[ticket], 16 \n"
|
||||||
" andi %[my_ticket], %[my_ticket], 0x1fff \n"
|
" andi %[ticket], %[ticket], 0xffff \n"
|
||||||
" andi %[ticket], %[ticket], 0x1fff \n"
|
" andi %[my_ticket], %[my_ticket], 0xffff \n"
|
||||||
" bne %[ticket], %[my_ticket], 4f \n"
|
" bne %[ticket], %[my_ticket], 4f \n"
|
||||||
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
||||||
"2: \n"
|
"2: \n"
|
||||||
" .subsection 2 \n"
|
" .subsection 2 \n"
|
||||||
"4: andi %[ticket], %[ticket], 0x1fff \n"
|
"4: andi %[ticket], %[ticket], 0xffff \n"
|
||||||
" sll %[ticket], 5 \n"
|
" sll %[ticket], 5 \n"
|
||||||
" \n"
|
" \n"
|
||||||
"6: bnez %[ticket], 6b \n"
|
"6: bnez %[ticket], 6b \n"
|
||||||
" subu %[ticket], 1 \n"
|
" subu %[ticket], 1 \n"
|
||||||
" \n"
|
" \n"
|
||||||
" lw %[ticket], %[ticket_ptr] \n"
|
" lhu %[ticket], %[serving_now_ptr] \n"
|
||||||
" andi %[ticket], %[ticket], 0x1fff \n"
|
|
||||||
" beq %[ticket], %[my_ticket], 2b \n"
|
" beq %[ticket], %[my_ticket], 2b \n"
|
||||||
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
||||||
" b 4b \n"
|
" b 4b \n"
|
||||||
@@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|||||||
" .previous \n"
|
" .previous \n"
|
||||||
" .set pop \n"
|
" .set pop \n"
|
||||||
: [ticket_ptr] "+m" (lock->lock),
|
: [ticket_ptr] "+m" (lock->lock),
|
||||||
|
[serving_now_ptr] "+m" (lock->h.serving_now),
|
||||||
[ticket] "=&r" (tmp),
|
[ticket] "=&r" (tmp),
|
||||||
[my_ticket] "=&r" (my_ticket));
|
[my_ticket] "=&r" (my_ticket)
|
||||||
|
: [inc] "r" (inc));
|
||||||
} else {
|
} else {
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" .set push # arch_spin_lock \n"
|
" .set push # arch_spin_lock \n"
|
||||||
" .set noreorder \n"
|
" .set noreorder \n"
|
||||||
" \n"
|
" \n"
|
||||||
" ll %[ticket], %[ticket_ptr] \n"
|
"1: ll %[ticket], %[ticket_ptr] \n"
|
||||||
"1: addiu %[my_ticket], %[ticket], 0x4000 \n"
|
" addu %[my_ticket], %[ticket], %[inc] \n"
|
||||||
" sc %[my_ticket], %[ticket_ptr] \n"
|
" sc %[my_ticket], %[ticket_ptr] \n"
|
||||||
" beqz %[my_ticket], 3f \n"
|
" beqz %[my_ticket], 1b \n"
|
||||||
" nop \n"
|
" srl %[my_ticket], %[ticket], 16 \n"
|
||||||
" srl %[my_ticket], %[ticket], 14 \n"
|
" andi %[ticket], %[ticket], 0xffff \n"
|
||||||
" andi %[my_ticket], %[my_ticket], 0x1fff \n"
|
" andi %[my_ticket], %[my_ticket], 0xffff \n"
|
||||||
" andi %[ticket], %[ticket], 0x1fff \n"
|
|
||||||
" bne %[ticket], %[my_ticket], 4f \n"
|
" bne %[ticket], %[my_ticket], 4f \n"
|
||||||
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
||||||
"2: \n"
|
"2: \n"
|
||||||
" .subsection 2 \n"
|
" .subsection 2 \n"
|
||||||
"3: b 1b \n"
|
|
||||||
" ll %[ticket], %[ticket_ptr] \n"
|
|
||||||
" \n"
|
|
||||||
"4: andi %[ticket], %[ticket], 0x1fff \n"
|
"4: andi %[ticket], %[ticket], 0x1fff \n"
|
||||||
" sll %[ticket], 5 \n"
|
" sll %[ticket], 5 \n"
|
||||||
" \n"
|
" \n"
|
||||||
"6: bnez %[ticket], 6b \n"
|
"6: bnez %[ticket], 6b \n"
|
||||||
" subu %[ticket], 1 \n"
|
" subu %[ticket], 1 \n"
|
||||||
" \n"
|
" \n"
|
||||||
" lw %[ticket], %[ticket_ptr] \n"
|
" lhu %[ticket], %[serving_now_ptr] \n"
|
||||||
" andi %[ticket], %[ticket], 0x1fff \n"
|
|
||||||
" beq %[ticket], %[my_ticket], 2b \n"
|
" beq %[ticket], %[my_ticket], 2b \n"
|
||||||
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
" subu %[ticket], %[my_ticket], %[ticket] \n"
|
||||||
" b 4b \n"
|
" b 4b \n"
|
||||||
@@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|||||||
" .previous \n"
|
" .previous \n"
|
||||||
" .set pop \n"
|
" .set pop \n"
|
||||||
: [ticket_ptr] "+m" (lock->lock),
|
: [ticket_ptr] "+m" (lock->lock),
|
||||||
|
[serving_now_ptr] "+m" (lock->h.serving_now),
|
||||||
[ticket] "=&r" (tmp),
|
[ticket] "=&r" (tmp),
|
||||||
[my_ticket] "=&r" (my_ticket));
|
[my_ticket] "=&r" (my_ticket)
|
||||||
|
: [inc] "r" (inc));
|
||||||
}
|
}
|
||||||
|
|
||||||
smp_llsc_mb();
|
smp_llsc_mb();
|
||||||
@@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|||||||
|
|
||||||
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
int tmp;
|
unsigned int serving_now = lock->h.serving_now + 1;
|
||||||
|
wmb();
|
||||||
smp_mb__before_llsc();
|
lock->h.serving_now = (u16)serving_now;
|
||||||
|
nudge_writes();
|
||||||
if (R10000_LLSC_WAR) {
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
" # arch_spin_unlock \n"
|
|
||||||
"1: ll %[ticket], %[ticket_ptr] \n"
|
|
||||||
" addiu %[ticket], %[ticket], 1 \n"
|
|
||||||
" ori %[ticket], %[ticket], 0x2000 \n"
|
|
||||||
" xori %[ticket], %[ticket], 0x2000 \n"
|
|
||||||
" sc %[ticket], %[ticket_ptr] \n"
|
|
||||||
" beqzl %[ticket], 1b \n"
|
|
||||||
: [ticket_ptr] "+m" (lock->lock),
|
|
||||||
[ticket] "=&r" (tmp));
|
|
||||||
} else {
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
" .set push # arch_spin_unlock \n"
|
|
||||||
" .set noreorder \n"
|
|
||||||
" \n"
|
|
||||||
" ll %[ticket], %[ticket_ptr] \n"
|
|
||||||
"1: addiu %[ticket], %[ticket], 1 \n"
|
|
||||||
" ori %[ticket], %[ticket], 0x2000 \n"
|
|
||||||
" xori %[ticket], %[ticket], 0x2000 \n"
|
|
||||||
" sc %[ticket], %[ticket_ptr] \n"
|
|
||||||
" beqz %[ticket], 2f \n"
|
|
||||||
" nop \n"
|
|
||||||
" \n"
|
|
||||||
" .subsection 2 \n"
|
|
||||||
"2: b 1b \n"
|
|
||||||
" ll %[ticket], %[ticket_ptr] \n"
|
|
||||||
" .previous \n"
|
|
||||||
" .set pop \n"
|
|
||||||
: [ticket_ptr] "+m" (lock->lock),
|
|
||||||
[ticket] "=&r" (tmp));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
|
static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
int tmp, tmp2, tmp3;
|
int tmp, tmp2, tmp3;
|
||||||
|
int inc = 0x10000;
|
||||||
|
|
||||||
if (R10000_LLSC_WAR) {
|
if (R10000_LLSC_WAR) {
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
@@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
|
|||||||
" .set noreorder \n"
|
" .set noreorder \n"
|
||||||
" \n"
|
" \n"
|
||||||
"1: ll %[ticket], %[ticket_ptr] \n"
|
"1: ll %[ticket], %[ticket_ptr] \n"
|
||||||
" srl %[my_ticket], %[ticket], 14 \n"
|
" srl %[my_ticket], %[ticket], 16 \n"
|
||||||
" andi %[my_ticket], %[my_ticket], 0x1fff \n"
|
" andi %[my_ticket], %[my_ticket], 0xffff \n"
|
||||||
" andi %[now_serving], %[ticket], 0x1fff \n"
|
" andi %[now_serving], %[ticket], 0xffff \n"
|
||||||
" bne %[my_ticket], %[now_serving], 3f \n"
|
" bne %[my_ticket], %[now_serving], 3f \n"
|
||||||
" addiu %[ticket], %[ticket], 0x4000 \n"
|
" addu %[ticket], %[ticket], %[inc] \n"
|
||||||
" sc %[ticket], %[ticket_ptr] \n"
|
" sc %[ticket], %[ticket_ptr] \n"
|
||||||
" beqzl %[ticket], 1b \n"
|
" beqzl %[ticket], 1b \n"
|
||||||
" li %[ticket], 1 \n"
|
" li %[ticket], 1 \n"
|
||||||
@@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
|
|||||||
: [ticket_ptr] "+m" (lock->lock),
|
: [ticket_ptr] "+m" (lock->lock),
|
||||||
[ticket] "=&r" (tmp),
|
[ticket] "=&r" (tmp),
|
||||||
[my_ticket] "=&r" (tmp2),
|
[my_ticket] "=&r" (tmp2),
|
||||||
[now_serving] "=&r" (tmp3));
|
[now_serving] "=&r" (tmp3)
|
||||||
|
: [inc] "r" (inc));
|
||||||
} else {
|
} else {
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" .set push # arch_spin_trylock \n"
|
" .set push # arch_spin_trylock \n"
|
||||||
" .set noreorder \n"
|
" .set noreorder \n"
|
||||||
" \n"
|
" \n"
|
||||||
" ll %[ticket], %[ticket_ptr] \n"
|
"1: ll %[ticket], %[ticket_ptr] \n"
|
||||||
"1: srl %[my_ticket], %[ticket], 14 \n"
|
" srl %[my_ticket], %[ticket], 16 \n"
|
||||||
" andi %[my_ticket], %[my_ticket], 0x1fff \n"
|
" andi %[my_ticket], %[my_ticket], 0xffff \n"
|
||||||
" andi %[now_serving], %[ticket], 0x1fff \n"
|
" andi %[now_serving], %[ticket], 0xffff \n"
|
||||||
" bne %[my_ticket], %[now_serving], 3f \n"
|
" bne %[my_ticket], %[now_serving], 3f \n"
|
||||||
" addiu %[ticket], %[ticket], 0x4000 \n"
|
" addu %[ticket], %[ticket], %[inc] \n"
|
||||||
" sc %[ticket], %[ticket_ptr] \n"
|
" sc %[ticket], %[ticket_ptr] \n"
|
||||||
" beqz %[ticket], 4f \n"
|
" beqz %[ticket], 1b \n"
|
||||||
" li %[ticket], 1 \n"
|
" li %[ticket], 1 \n"
|
||||||
"2: \n"
|
"2: \n"
|
||||||
" .subsection 2 \n"
|
" .subsection 2 \n"
|
||||||
"3: b 2b \n"
|
"3: b 2b \n"
|
||||||
" li %[ticket], 0 \n"
|
" li %[ticket], 0 \n"
|
||||||
"4: b 1b \n"
|
|
||||||
" ll %[ticket], %[ticket_ptr] \n"
|
|
||||||
" .previous \n"
|
" .previous \n"
|
||||||
" .set pop \n"
|
" .set pop \n"
|
||||||
: [ticket_ptr] "+m" (lock->lock),
|
: [ticket_ptr] "+m" (lock->lock),
|
||||||
[ticket] "=&r" (tmp),
|
[ticket] "=&r" (tmp),
|
||||||
[my_ticket] "=&r" (tmp2),
|
[my_ticket] "=&r" (tmp2),
|
||||||
[now_serving] "=&r" (tmp3));
|
[now_serving] "=&r" (tmp3)
|
||||||
|
: [inc] "r" (inc));
|
||||||
}
|
}
|
||||||
|
|
||||||
smp_llsc_mb();
|
smp_llsc_mb();
|
||||||
|
@@ -5,16 +5,28 @@
|
|||||||
# error "please don't include this file directly"
|
# error "please don't include this file directly"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#include <asm/byteorder.h>
|
||||||
|
|
||||||
|
typedef union {
|
||||||
/*
|
/*
|
||||||
* bits 0..13: serving_now
|
* bits 0..15 : serving_now
|
||||||
* bits 14 : junk data
|
* bits 16..31 : ticket
|
||||||
* bits 15..28: ticket
|
|
||||||
*/
|
*/
|
||||||
unsigned int lock;
|
u32 lock;
|
||||||
|
struct {
|
||||||
|
#ifdef __BIG_ENDIAN
|
||||||
|
u16 ticket;
|
||||||
|
u16 serving_now;
|
||||||
|
#else
|
||||||
|
u16 serving_now;
|
||||||
|
u16 ticket;
|
||||||
|
#endif
|
||||||
|
} h;
|
||||||
} arch_spinlock_t;
|
} arch_spinlock_t;
|
||||||
|
|
||||||
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
|
#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0 }
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
volatile unsigned int lock;
|
volatile unsigned int lock;
|
||||||
|
Reference in New Issue
Block a user