x86: atomic64 assembly improvements
In the "xchg" implementation, %ebx and %ecx don't need to be copied into %eax and %edx respectively (this is only necessary when desiring to only read the stored value). In the "add_unless" implementation, swapping the use of %ecx and %esi for passing arguments allows %esi to become an input only (i.e. permitting the register to be re-used to address the same object without reload). In "{add,sub}_return", doing the initial read64 through the passed in %ecx decreases a register dependency. In "inc_not_zero", a branch can be eliminated by or-ing together the two halves of the current (64-bit) value, and code size can be further reduced by adjusting the arithmetic slightly. v2: Undo the folding of "xchg" and "set". Signed-off-by: Jan Beulich <jbeulich@suse.com> Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com Cc: Luca Barbieri <luca@luca-barbieri.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
committed by
H. Peter Anvin
parent
819165fb34
commit
cb8095bba6
@@ -288,9 +288,8 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
|
|||||||
unsigned low = (unsigned)u;
|
unsigned low = (unsigned)u;
|
||||||
unsigned high = (unsigned)(u >> 32);
|
unsigned high = (unsigned)(u >> 32);
|
||||||
alternative_atomic64(add_unless,
|
alternative_atomic64(add_unless,
|
||||||
ASM_OUTPUT2("+A" (a), "+c" (v),
|
ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
|
||||||
"+S" (low), "+D" (high)),
|
"S" (v) : "memory");
|
||||||
ASM_NO_INPUT_CLOBBER("memory"));
|
|
||||||
return (int)a;
|
return (int)a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -137,13 +137,13 @@ BEGIN(dec_return)
|
|||||||
RET_ENDP
|
RET_ENDP
|
||||||
#undef v
|
#undef v
|
||||||
|
|
||||||
#define v %ecx
|
#define v %esi
|
||||||
BEGIN(add_unless)
|
BEGIN(add_unless)
|
||||||
addl %eax, %esi
|
addl %eax, %ecx
|
||||||
adcl %edx, %edi
|
adcl %edx, %edi
|
||||||
addl (v), %eax
|
addl (v), %eax
|
||||||
adcl 4(v), %edx
|
adcl 4(v), %edx
|
||||||
cmpl %eax, %esi
|
cmpl %eax, %ecx
|
||||||
je 3f
|
je 3f
|
||||||
1:
|
1:
|
||||||
movl %eax, (v)
|
movl %eax, (v)
|
||||||
|
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
|
|||||||
ENTRY(atomic64_xchg_cx8)
|
ENTRY(atomic64_xchg_cx8)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
|
|
||||||
movl %ebx, %eax
|
|
||||||
movl %ecx, %edx
|
|
||||||
1:
|
1:
|
||||||
LOCK_PREFIX
|
LOCK_PREFIX
|
||||||
cmpxchg8b (%esi)
|
cmpxchg8b (%esi)
|
||||||
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
|
|||||||
movl %edx, %edi
|
movl %edx, %edi
|
||||||
movl %ecx, %ebp
|
movl %ecx, %ebp
|
||||||
|
|
||||||
read64 %ebp
|
read64 %ecx
|
||||||
1:
|
1:
|
||||||
movl %eax, %ebx
|
movl %eax, %ebx
|
||||||
movl %edx, %ecx
|
movl %edx, %ecx
|
||||||
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
|
|||||||
SAVE ebx
|
SAVE ebx
|
||||||
/* these just push these two parameters on the stack */
|
/* these just push these two parameters on the stack */
|
||||||
SAVE edi
|
SAVE edi
|
||||||
SAVE esi
|
SAVE ecx
|
||||||
|
|
||||||
movl %ecx, %ebp
|
movl %eax, %ebp
|
||||||
movl %eax, %esi
|
|
||||||
movl %edx, %edi
|
movl %edx, %edi
|
||||||
|
|
||||||
read64 %ebp
|
read64 %esi
|
||||||
1:
|
1:
|
||||||
cmpl %eax, 0(%esp)
|
cmpl %eax, 0(%esp)
|
||||||
je 4f
|
je 4f
|
||||||
2:
|
2:
|
||||||
movl %eax, %ebx
|
movl %eax, %ebx
|
||||||
movl %edx, %ecx
|
movl %edx, %ecx
|
||||||
addl %esi, %ebx
|
addl %ebp, %ebx
|
||||||
adcl %edi, %ecx
|
adcl %edi, %ecx
|
||||||
LOCK_PREFIX
|
LOCK_PREFIX
|
||||||
cmpxchg8b (%ebp)
|
cmpxchg8b (%esi)
|
||||||
jne 1b
|
jne 1b
|
||||||
|
|
||||||
movl $1, %eax
|
movl $1, %eax
|
||||||
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
|
|||||||
|
|
||||||
read64 %esi
|
read64 %esi
|
||||||
1:
|
1:
|
||||||
testl %eax, %eax
|
movl %eax, %ecx
|
||||||
je 4f
|
orl %edx, %ecx
|
||||||
2:
|
jz 3f
|
||||||
movl %eax, %ebx
|
movl %eax, %ebx
|
||||||
movl %edx, %ecx
|
xorl %ecx, %ecx
|
||||||
addl $1, %ebx
|
addl $1, %ebx
|
||||||
adcl $0, %ecx
|
adcl %edx, %ecx
|
||||||
LOCK_PREFIX
|
LOCK_PREFIX
|
||||||
cmpxchg8b (%esi)
|
cmpxchg8b (%esi)
|
||||||
jne 1b
|
jne 1b
|
||||||
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
|
|||||||
3:
|
3:
|
||||||
RESTORE ebx
|
RESTORE ebx
|
||||||
ret
|
ret
|
||||||
4:
|
|
||||||
testl %edx, %edx
|
|
||||||
jne 2b
|
|
||||||
jmp 3b
|
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
ENDPROC(atomic64_inc_not_zero_cx8)
|
ENDPROC(atomic64_inc_not_zero_cx8)
|
||||||
|
Reference in New Issue
Block a user