x86-64: Modify memcpy()/memset() alternatives mechanism
In order to avoid unnecessary chains of branches, rather than implementing memcpy()/memset()'s access to their alternative implementations via a jump, patch the (larger) original function directly. The memcpy() part of this is slightly subtle: while alternative instruction patching does itself use memcpy(), with the replacement block being less than 64-bytes in size the main loop of the original function doesn't get used for copying memcpy_c() over memcpy(), and hence we can safely write over its beginning. Also note that the CFI annotations are fine for both variants of each of the functions. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@ -20,12 +20,11 @@
|
|||||||
/*
|
/*
|
||||||
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
|
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
|
||||||
*
|
*
|
||||||
* Calls to this get patched into the kernel image via the
|
* This gets patched over the unrolled variant (below) via the
|
||||||
* alternative instructions framework:
|
* alternative instructions framework:
|
||||||
*/
|
*/
|
||||||
ALIGN
|
.section .altinstr_replacement, "ax", @progbits
|
||||||
memcpy_c:
|
.Lmemcpy_c:
|
||||||
CFI_STARTPROC
|
|
||||||
movq %rdi, %rax
|
movq %rdi, %rax
|
||||||
|
|
||||||
movl %edx, %ecx
|
movl %edx, %ecx
|
||||||
@ -35,8 +34,8 @@ memcpy_c:
|
|||||||
movl %edx, %ecx
|
movl %edx, %ecx
|
||||||
rep movsb
|
rep movsb
|
||||||
ret
|
ret
|
||||||
CFI_ENDPROC
|
.Lmemcpy_e:
|
||||||
ENDPROC(memcpy_c)
|
.previous
|
||||||
|
|
||||||
ENTRY(__memcpy)
|
ENTRY(__memcpy)
|
||||||
ENTRY(memcpy)
|
ENTRY(memcpy)
|
||||||
@ -128,16 +127,10 @@ ENDPROC(__memcpy)
|
|||||||
* It is also a lot simpler. Use this when possible:
|
* It is also a lot simpler. Use this when possible:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
.section .altinstr_replacement, "ax"
|
|
||||||
1: .byte 0xeb /* jmp <disp8> */
|
|
||||||
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
|
|
||||||
2:
|
|
||||||
.previous
|
|
||||||
|
|
||||||
.section .altinstructions, "a"
|
.section .altinstructions, "a"
|
||||||
.align 8
|
.align 8
|
||||||
.quad memcpy
|
.quad memcpy
|
||||||
.quad 1b
|
.quad .Lmemcpy_c
|
||||||
.byte X86_FEATURE_REP_GOOD
|
.byte X86_FEATURE_REP_GOOD
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -145,6 +138,6 @@ ENDPROC(__memcpy)
|
|||||||
* so it is silly to overwrite itself with nops - reboot is the
|
* so it is silly to overwrite itself with nops - reboot is the
|
||||||
* only outcome...
|
* only outcome...
|
||||||
*/
|
*/
|
||||||
.byte 2b - 1b
|
.byte .Lmemcpy_e - .Lmemcpy_c
|
||||||
.byte 2b - 1b
|
.byte .Lmemcpy_e - .Lmemcpy_c
|
||||||
.previous
|
.previous
|
||||||
|
@ -12,9 +12,8 @@
|
|||||||
*
|
*
|
||||||
* rax original destination
|
* rax original destination
|
||||||
*/
|
*/
|
||||||
ALIGN
|
.section .altinstr_replacement, "ax", @progbits
|
||||||
memset_c:
|
.Lmemset_c:
|
||||||
CFI_STARTPROC
|
|
||||||
movq %rdi,%r9
|
movq %rdi,%r9
|
||||||
movl %edx,%r8d
|
movl %edx,%r8d
|
||||||
andl $7,%r8d
|
andl $7,%r8d
|
||||||
@ -29,8 +28,8 @@ memset_c:
|
|||||||
rep stosb
|
rep stosb
|
||||||
movq %r9,%rax
|
movq %r9,%rax
|
||||||
ret
|
ret
|
||||||
CFI_ENDPROC
|
.Lmemset_e:
|
||||||
ENDPROC(memset_c)
|
.previous
|
||||||
|
|
||||||
ENTRY(memset)
|
ENTRY(memset)
|
||||||
ENTRY(__memset)
|
ENTRY(__memset)
|
||||||
@ -118,16 +117,11 @@ ENDPROC(__memset)
|
|||||||
|
|
||||||
#include <asm/cpufeature.h>
|
#include <asm/cpufeature.h>
|
||||||
|
|
||||||
.section .altinstr_replacement,"ax"
|
|
||||||
1: .byte 0xeb /* jmp <disp8> */
|
|
||||||
.byte (memset_c - memset) - (2f - 1b) /* offset */
|
|
||||||
2:
|
|
||||||
.previous
|
|
||||||
.section .altinstructions,"a"
|
.section .altinstructions,"a"
|
||||||
.align 8
|
.align 8
|
||||||
.quad memset
|
.quad memset
|
||||||
.quad 1b
|
.quad .Lmemset_c
|
||||||
.byte X86_FEATURE_REP_GOOD
|
.byte X86_FEATURE_REP_GOOD
|
||||||
.byte .Lfinal - memset
|
.byte .Lfinal - memset
|
||||||
.byte 2b - 1b
|
.byte .Lmemset_e - .Lmemset_c
|
||||||
.previous
|
.previous
|
||||||
|
Reference in New Issue
Block a user