ARC: String library
Hand optimised asm code for ARC700 pipeline. Originally written/optimized by Joern Rennecke Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Cc: Joern Rennecke <joern.rennecke@embecosm.com>
This commit is contained in:
40
arch/arc/include/asm/string.h
Normal file
40
arch/arc/include/asm/string.h
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* vineetg: May 2011
|
||||||
|
* -We had half-optimised memset/memcpy, got better versions of those
|
||||||
|
* -Added memcmp, strchr, strcpy, strcmp, strlen
|
||||||
|
*
|
||||||
|
* Amit Bhor: Codito Technologies 2004
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ASM_ARC_STRING_H
|
||||||
|
#define _ASM_ARC_STRING_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
|
||||||
|
#define __HAVE_ARCH_MEMSET
|
||||||
|
#define __HAVE_ARCH_MEMCPY
|
||||||
|
#define __HAVE_ARCH_MEMCMP
|
||||||
|
#define __HAVE_ARCH_STRCHR
|
||||||
|
#define __HAVE_ARCH_STRCPY
|
||||||
|
#define __HAVE_ARCH_STRCMP
|
||||||
|
#define __HAVE_ARCH_STRLEN
|
||||||
|
|
||||||
|
extern void *memset(void *ptr, int, __kernel_size_t);
|
||||||
|
extern void *memcpy(void *, const void *, __kernel_size_t);
|
||||||
|
extern void memzero(void *ptr, __kernel_size_t n);
|
||||||
|
extern int memcmp(const void *, const void *, __kernel_size_t);
|
||||||
|
extern char *strchr(const char *s, int c);
|
||||||
|
extern char *strcpy(char *dest, const char *src);
|
||||||
|
extern int strcmp(const char *cs, const char *ct);
|
||||||
|
extern __kernel_size_t strlen(const char *);
|
||||||
|
|
||||||
|
#endif /* __KERNEL__ */
|
||||||
|
#endif /* _ASM_ARC_STRING_H */
|
124
arch/arc/lib/memcmp.S
Normal file
124
arch/arc/lib/memcmp.S
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
#define WORD2 r2
|
||||||
|
#define SHIFT r3
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
#define WORD2 r3
|
||||||
|
#define SHIFT r2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ARC_ENTRY memcmp
|
||||||
|
or r12,r0,r1
|
||||||
|
asl_s r12,r12,30
|
||||||
|
sub r3,r2,1
|
||||||
|
brls r2,r12,.Lbytewise
|
||||||
|
ld r4,[r0,0]
|
||||||
|
ld r5,[r1,0]
|
||||||
|
lsr.f lp_count,r3,3
|
||||||
|
lpne .Loop_end
|
||||||
|
ld_s WORD2,[r0,4]
|
||||||
|
ld_s r12,[r1,4]
|
||||||
|
brne r4,r5,.Leven
|
||||||
|
ld.a r4,[r0,8]
|
||||||
|
ld.a r5,[r1,8]
|
||||||
|
brne WORD2,r12,.Lodd
|
||||||
|
.Loop_end:
|
||||||
|
asl_s SHIFT,SHIFT,3
|
||||||
|
bhs_s .Last_cmp
|
||||||
|
brne r4,r5,.Leven
|
||||||
|
ld r4,[r0,4]
|
||||||
|
ld r5,[r1,4]
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
nop_s
|
||||||
|
; one more load latency cycle
|
||||||
|
.Last_cmp:
|
||||||
|
xor r0,r4,r5
|
||||||
|
bset r0,r0,SHIFT
|
||||||
|
sub_s r1,r0,1
|
||||||
|
bic_s r1,r1,r0
|
||||||
|
norm r1,r1
|
||||||
|
b.d .Leven_cmp
|
||||||
|
and r1,r1,24
|
||||||
|
.Leven:
|
||||||
|
xor r0,r4,r5
|
||||||
|
sub_s r1,r0,1
|
||||||
|
bic_s r1,r1,r0
|
||||||
|
norm r1,r1
|
||||||
|
; slow track insn
|
||||||
|
and r1,r1,24
|
||||||
|
.Leven_cmp:
|
||||||
|
asl r2,r4,r1
|
||||||
|
asl r12,r5,r1
|
||||||
|
lsr_s r2,r2,1
|
||||||
|
lsr_s r12,r12,1
|
||||||
|
j_s.d [blink]
|
||||||
|
sub r0,r2,r12
|
||||||
|
.balign 4
|
||||||
|
.Lodd:
|
||||||
|
xor r0,WORD2,r12
|
||||||
|
sub_s r1,r0,1
|
||||||
|
bic_s r1,r1,r0
|
||||||
|
norm r1,r1
|
||||||
|
; slow track insn
|
||||||
|
and r1,r1,24
|
||||||
|
asl_s r2,r2,r1
|
||||||
|
asl_s r12,r12,r1
|
||||||
|
lsr_s r2,r2,1
|
||||||
|
lsr_s r12,r12,1
|
||||||
|
j_s.d [blink]
|
||||||
|
sub r0,r2,r12
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
.Last_cmp:
|
||||||
|
neg_s SHIFT,SHIFT
|
||||||
|
lsr r4,r4,SHIFT
|
||||||
|
lsr r5,r5,SHIFT
|
||||||
|
; slow track insn
|
||||||
|
.Leven:
|
||||||
|
sub.f r0,r4,r5
|
||||||
|
mov.ne r0,1
|
||||||
|
j_s.d [blink]
|
||||||
|
bset.cs r0,r0,31
|
||||||
|
.Lodd:
|
||||||
|
cmp_s WORD2,r12
|
||||||
|
|
||||||
|
mov_s r0,1
|
||||||
|
j_s.d [blink]
|
||||||
|
bset.cs r0,r0,31
|
||||||
|
#endif /* ENDIAN */
|
||||||
|
.balign 4
|
||||||
|
.Lbytewise:
|
||||||
|
breq r2,0,.Lnil
|
||||||
|
ldb r4,[r0,0]
|
||||||
|
ldb r5,[r1,0]
|
||||||
|
lsr.f lp_count,r3
|
||||||
|
lpne .Lbyte_end
|
||||||
|
ldb_s r3,[r0,1]
|
||||||
|
ldb r12,[r1,1]
|
||||||
|
brne r4,r5,.Lbyte_even
|
||||||
|
ldb.a r4,[r0,2]
|
||||||
|
ldb.a r5,[r1,2]
|
||||||
|
brne r3,r12,.Lbyte_odd
|
||||||
|
.Lbyte_end:
|
||||||
|
bcc .Lbyte_even
|
||||||
|
brne r4,r5,.Lbyte_even
|
||||||
|
ldb_s r3,[r0,1]
|
||||||
|
ldb_s r12,[r1,1]
|
||||||
|
.Lbyte_odd:
|
||||||
|
j_s.d [blink]
|
||||||
|
sub r0,r3,r12
|
||||||
|
.Lbyte_even:
|
||||||
|
j_s.d [blink]
|
||||||
|
sub r0,r4,r5
|
||||||
|
.Lnil:
|
||||||
|
j_s.d [blink]
|
||||||
|
mov r0,0
|
||||||
|
ARC_EXIT memcmp
|
66
arch/arc/lib/memcpy-700.S
Normal file
66
arch/arc/lib/memcpy-700.S
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
ARC_ENTRY memcpy
|
||||||
|
or r3,r0,r1
|
||||||
|
asl_s r3,r3,30
|
||||||
|
mov_s r5,r0
|
||||||
|
brls.d r2,r3,.Lcopy_bytewise
|
||||||
|
sub.f r3,r2,1
|
||||||
|
ld_s r12,[r1,0]
|
||||||
|
asr.f lp_count,r3,3
|
||||||
|
bbit0.d r3,2,.Lnox4
|
||||||
|
bmsk_s r2,r2,1
|
||||||
|
st.ab r12,[r5,4]
|
||||||
|
ld.a r12,[r1,4]
|
||||||
|
.Lnox4:
|
||||||
|
lppnz .Lendloop
|
||||||
|
ld_s r3,[r1,4]
|
||||||
|
st.ab r12,[r5,4]
|
||||||
|
ld.a r12,[r1,8]
|
||||||
|
st.ab r3,[r5,4]
|
||||||
|
.Lendloop:
|
||||||
|
breq r2,0,.Last_store
|
||||||
|
ld r3,[r5,0]
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
add3 r2,-1,r2
|
||||||
|
; uses long immediate
|
||||||
|
xor_s r12,r12,r3
|
||||||
|
bmsk r12,r12,r2
|
||||||
|
xor_s r12,r12,r3
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
sub3 r2,31,r2
|
||||||
|
; uses long immediate
|
||||||
|
xor_s r3,r3,r12
|
||||||
|
bmsk r3,r3,r2
|
||||||
|
xor_s r12,r12,r3
|
||||||
|
#endif /* ENDIAN */
|
||||||
|
.Last_store:
|
||||||
|
j_s.d [blink]
|
||||||
|
st r12,[r5,0]
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.Lcopy_bytewise:
|
||||||
|
jcs [blink]
|
||||||
|
ldb_s r12,[r1,0]
|
||||||
|
lsr.f lp_count,r3
|
||||||
|
bhs_s .Lnox1
|
||||||
|
stb.ab r12,[r5,1]
|
||||||
|
ldb.a r12,[r1,1]
|
||||||
|
.Lnox1:
|
||||||
|
lppnz .Lendbloop
|
||||||
|
ldb_s r3,[r1,1]
|
||||||
|
stb.ab r12,[r5,1]
|
||||||
|
ldb.a r12,[r1,2]
|
||||||
|
stb.ab r3,[r5,1]
|
||||||
|
.Lendbloop:
|
||||||
|
j_s.d [blink]
|
||||||
|
stb r12,[r5,0]
|
||||||
|
ARC_EXIT memcpy
|
59
arch/arc/lib/memset.S
Normal file
59
arch/arc/lib/memset.S
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
|
||||||
|
|
||||||
|
ARC_ENTRY memset
|
||||||
|
mov_s r4,r0
|
||||||
|
or r12,r0,r2
|
||||||
|
bmsk.f r12,r12,1
|
||||||
|
extb_s r1,r1
|
||||||
|
asl r3,r1,8
|
||||||
|
beq.d .Laligned
|
||||||
|
or_s r1,r1,r3
|
||||||
|
brls r2,SMALL,.Ltiny
|
||||||
|
add r3,r2,r0
|
||||||
|
stb r1,[r3,-1]
|
||||||
|
bclr_s r3,r3,0
|
||||||
|
stw r1,[r3,-2]
|
||||||
|
bmsk.f r12,r0,1
|
||||||
|
add_s r2,r2,r12
|
||||||
|
sub.ne r2,r2,4
|
||||||
|
stb.ab r1,[r4,1]
|
||||||
|
and r4,r4,-2
|
||||||
|
stw.ab r1,[r4,2]
|
||||||
|
and r4,r4,-4
|
||||||
|
.Laligned: ; This code address should be aligned for speed.
|
||||||
|
asl r3,r1,16
|
||||||
|
lsr.f lp_count,r2,2
|
||||||
|
or_s r1,r1,r3
|
||||||
|
lpne .Loop_end
|
||||||
|
st.ab r1,[r4,4]
|
||||||
|
.Loop_end:
|
||||||
|
j_s [blink]
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.Ltiny:
|
||||||
|
mov.f lp_count,r2
|
||||||
|
lpne .Ltiny_end
|
||||||
|
stb.ab r1,[r4,1]
|
||||||
|
.Ltiny_end:
|
||||||
|
j_s [blink]
|
||||||
|
ARC_EXIT memset
|
||||||
|
|
||||||
|
; memzero: @r0 = mem, @r1 = size_t
|
||||||
|
; memset: @r0 = mem, @r1 = char, @r2 = size_t
|
||||||
|
|
||||||
|
ARC_ENTRY memzero
|
||||||
|
; adjust bzero args to memset args
|
||||||
|
mov r2, r1
|
||||||
|
mov r1, 0
|
||||||
|
b memset ;tail call so need to tinker with blink
|
||||||
|
ARC_EXIT memzero
|
123
arch/arc/lib/strchr-700.S
Normal file
123
arch/arc/lib/strchr-700.S
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* ARC700 has a relatively long pipeline and branch prediction, so we want
|
||||||
|
to avoid branches that are hard to predict. On the other hand, the
|
||||||
|
presence of the norm instruction makes it easier to operate on whole
|
||||||
|
words branch-free. */
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
ARC_ENTRY strchr
|
||||||
|
extb_s r1,r1
|
||||||
|
asl r5,r1,8
|
||||||
|
bmsk r2,r0,1
|
||||||
|
or r5,r5,r1
|
||||||
|
mov_s r3,0x01010101
|
||||||
|
breq.d r2,r0,.Laligned
|
||||||
|
asl r4,r5,16
|
||||||
|
sub_s r0,r0,r2
|
||||||
|
asl r7,r2,3
|
||||||
|
ld_s r2,[r0]
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
asl r7,r3,r7
|
||||||
|
#else
|
||||||
|
lsr r7,r3,r7
|
||||||
|
#endif
|
||||||
|
or r5,r5,r4
|
||||||
|
ror r4,r3
|
||||||
|
sub r12,r2,r7
|
||||||
|
bic_s r12,r12,r2
|
||||||
|
and r12,r12,r4
|
||||||
|
brne.d r12,0,.Lfound0_ua
|
||||||
|
xor r6,r2,r5
|
||||||
|
ld.a r2,[r0,4]
|
||||||
|
sub r12,r6,r7
|
||||||
|
bic r12,r12,r6
|
||||||
|
and r7,r12,r4
|
||||||
|
breq r7,0,.Loop ; For speed, we want this branch to be unaligned.
|
||||||
|
b .Lfound_char ; Likewise this one.
|
||||||
|
; /* We require this code address to be unaligned for speed... */
|
||||||
|
.Laligned:
|
||||||
|
ld_s r2,[r0]
|
||||||
|
or r5,r5,r4
|
||||||
|
ror r4,r3
|
||||||
|
; /* ... so that this code address is aligned, for itself and ... */
|
||||||
|
.Loop:
|
||||||
|
sub r12,r2,r3
|
||||||
|
bic_s r12,r12,r2
|
||||||
|
and r12,r12,r4
|
||||||
|
brne.d r12,0,.Lfound0
|
||||||
|
xor r6,r2,r5
|
||||||
|
ld.a r2,[r0,4]
|
||||||
|
sub r12,r6,r3
|
||||||
|
bic r12,r12,r6
|
||||||
|
and r7,r12,r4
|
||||||
|
breq r7,0,.Loop /* ... so that this branch is unaligned. */
|
||||||
|
; Found searched-for character. r0 has already advanced to next word.
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
/* We only need the information about the first matching byte
|
||||||
|
(i.e. the least significant matching byte) to be exact,
|
||||||
|
hence there is no problem with carry effects. */
|
||||||
|
.Lfound_char:
|
||||||
|
sub r3,r7,1
|
||||||
|
bic r3,r3,r7
|
||||||
|
norm r2,r3
|
||||||
|
sub_s r0,r0,1
|
||||||
|
asr_s r2,r2,3
|
||||||
|
j.d [blink]
|
||||||
|
sub_s r0,r0,r2
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.Lfound0_ua:
|
||||||
|
mov r3,r7
|
||||||
|
.Lfound0:
|
||||||
|
sub r3,r6,r3
|
||||||
|
bic r3,r3,r6
|
||||||
|
and r2,r3,r4
|
||||||
|
or_s r12,r12,r2
|
||||||
|
sub_s r3,r12,1
|
||||||
|
bic_s r3,r3,r12
|
||||||
|
norm r3,r3
|
||||||
|
add_s r0,r0,3
|
||||||
|
asr_s r12,r3,3
|
||||||
|
asl.f 0,r2,r3
|
||||||
|
sub_s r0,r0,r12
|
||||||
|
j_s.d [blink]
|
||||||
|
mov.pl r0,0
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
.Lfound_char:
|
||||||
|
lsr r7,r7,7
|
||||||
|
|
||||||
|
bic r2,r7,r6
|
||||||
|
norm r2,r2
|
||||||
|
sub_s r0,r0,4
|
||||||
|
asr_s r2,r2,3
|
||||||
|
j.d [blink]
|
||||||
|
add_s r0,r0,r2
|
||||||
|
|
||||||
|
.Lfound0_ua:
|
||||||
|
mov_s r3,r7
|
||||||
|
.Lfound0:
|
||||||
|
asl_s r2,r2,7
|
||||||
|
or r7,r6,r4
|
||||||
|
bic_s r12,r12,r2
|
||||||
|
sub r2,r7,r3
|
||||||
|
or r2,r2,r6
|
||||||
|
bic r12,r2,r12
|
||||||
|
bic.f r3,r4,r12
|
||||||
|
norm r3,r3
|
||||||
|
|
||||||
|
add.pl r3,r3,1
|
||||||
|
asr_s r12,r3,3
|
||||||
|
asl.f 0,r2,r3
|
||||||
|
add_s r0,r0,r12
|
||||||
|
j_s.d [blink]
|
||||||
|
mov.mi r0,0
|
||||||
|
#endif /* ENDIAN */
|
||||||
|
ARC_EXIT strchr
|
96
arch/arc/lib/strcmp.S
Normal file
96
arch/arc/lib/strcmp.S
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This is optimized primarily for the ARC700.
|
||||||
|
It would be possible to speed up the loops by one cycle / word
|
||||||
|
respective one cycle / byte by forcing double source 1 alignment, unrolling
|
||||||
|
by a factor of two, and speculatively loading the second word / byte of
|
||||||
|
source 1; however, that would increase the overhead for loop setup / finish,
|
||||||
|
and strcmp might often terminate early. */
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
ARC_ENTRY strcmp
|
||||||
|
or r2,r0,r1
|
||||||
|
bmsk_s r2,r2,1
|
||||||
|
brne r2,0,.Lcharloop
|
||||||
|
mov_s r12,0x01010101
|
||||||
|
ror r5,r12
|
||||||
|
.Lwordloop:
|
||||||
|
ld.ab r2,[r0,4]
|
||||||
|
ld.ab r3,[r1,4]
|
||||||
|
nop_s
|
||||||
|
sub r4,r2,r12
|
||||||
|
bic r4,r4,r2
|
||||||
|
and r4,r4,r5
|
||||||
|
brne r4,0,.Lfound0
|
||||||
|
breq r2,r3,.Lwordloop
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
xor r0,r2,r3 ; mask for difference
|
||||||
|
sub_s r1,r0,1
|
||||||
|
bic_s r0,r0,r1 ; mask for least significant difference bit
|
||||||
|
sub r1,r5,r0
|
||||||
|
xor r0,r5,r1 ; mask for least significant difference byte
|
||||||
|
and_s r2,r2,r0
|
||||||
|
and_s r3,r3,r0
|
||||||
|
#endif /* LITTLE ENDIAN */
|
||||||
|
cmp_s r2,r3
|
||||||
|
mov_s r0,1
|
||||||
|
j_s.d [blink]
|
||||||
|
bset.lo r0,r0,31
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
.Lfound0:
|
||||||
|
xor r0,r2,r3 ; mask for difference
|
||||||
|
or r0,r0,r4 ; or in zero indicator
|
||||||
|
sub_s r1,r0,1
|
||||||
|
bic_s r0,r0,r1 ; mask for least significant difference bit
|
||||||
|
sub r1,r5,r0
|
||||||
|
xor r0,r5,r1 ; mask for least significant difference byte
|
||||||
|
and_s r2,r2,r0
|
||||||
|
and_s r3,r3,r0
|
||||||
|
sub.f r0,r2,r3
|
||||||
|
mov.hi r0,1
|
||||||
|
j_s.d [blink]
|
||||||
|
bset.lo r0,r0,31
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
/* The zero-detection above can mis-detect 0x01 bytes as zeroes
|
||||||
|
because of carry-propagateion from a lower significant zero byte.
|
||||||
|
We can compensate for this by checking that bit0 is zero.
|
||||||
|
This compensation is not necessary in the step where we
|
||||||
|
get a low estimate for r2, because in any affected bytes
|
||||||
|
we already have 0x00 or 0x01, which will remain unchanged
|
||||||
|
when bit 7 is cleared. */
|
||||||
|
.balign 4
|
||||||
|
.Lfound0:
|
||||||
|
lsr r0,r4,8
|
||||||
|
lsr_s r1,r2
|
||||||
|
bic_s r2,r2,r0 ; get low estimate for r2 and get ...
|
||||||
|
bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
|
||||||
|
or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
|
||||||
|
cmp_s r3,r2 ; ... be independent of trailing garbage
|
||||||
|
or_s r2,r2,r0 ; likewise for r3 > r2
|
||||||
|
bic_s r3,r3,r0
|
||||||
|
rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
|
||||||
|
cmp_s r2,r3
|
||||||
|
j_s.d [blink]
|
||||||
|
bset.lo r0,r0,31
|
||||||
|
#endif /* ENDIAN */
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.Lcharloop:
|
||||||
|
ldb.ab r2,[r0,1]
|
||||||
|
ldb.ab r3,[r1,1]
|
||||||
|
nop_s
|
||||||
|
breq r2,0,.Lcmpend
|
||||||
|
breq r2,r3,.Lcharloop
|
||||||
|
.Lcmpend:
|
||||||
|
j_s.d [blink]
|
||||||
|
sub r0,r2,r3
|
||||||
|
ARC_EXIT strcmp
|
70
arch/arc/lib/strcpy-700.S
Normal file
70
arch/arc/lib/strcpy-700.S
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
|
||||||
|
If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
|
||||||
|
it 8 byte aligned. Thus, we can do a little read-ahead, without
|
||||||
|
dereferencing a cache line that we should not touch.
|
||||||
|
Note that short and long instructions have been scheduled to avoid
|
||||||
|
branch stalls.
|
||||||
|
The beq_s to r3z could be made unaligned & long to avoid a stall
|
||||||
|
there, but the it is not likely to be taken often, and it
|
||||||
|
would also be likey to cost an unaligned mispredict at the next call. */
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
ARC_ENTRY strcpy
|
||||||
|
or r2,r0,r1
|
||||||
|
bmsk_s r2,r2,1
|
||||||
|
brne.d r2,0,charloop
|
||||||
|
mov_s r10,r0
|
||||||
|
ld_s r3,[r1,0]
|
||||||
|
mov r8,0x01010101
|
||||||
|
bbit0.d r1,2,loop_start
|
||||||
|
ror r12,r8
|
||||||
|
sub r2,r3,r8
|
||||||
|
bic_s r2,r2,r3
|
||||||
|
tst_s r2,r12
|
||||||
|
bne r3z
|
||||||
|
mov_s r4,r3
|
||||||
|
.balign 4
|
||||||
|
loop:
|
||||||
|
ld.a r3,[r1,4]
|
||||||
|
st.ab r4,[r10,4]
|
||||||
|
loop_start:
|
||||||
|
ld.a r4,[r1,4]
|
||||||
|
sub r2,r3,r8
|
||||||
|
bic_s r2,r2,r3
|
||||||
|
tst_s r2,r12
|
||||||
|
bne_s r3z
|
||||||
|
st.ab r3,[r10,4]
|
||||||
|
sub r2,r4,r8
|
||||||
|
bic r2,r2,r4
|
||||||
|
tst r2,r12
|
||||||
|
beq loop
|
||||||
|
mov_s r3,r4
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
r3z: bmsk.f r1,r3,7
|
||||||
|
lsr_s r3,r3,8
|
||||||
|
#else
|
||||||
|
r3z: lsr.f r1,r3,24
|
||||||
|
asl_s r3,r3,8
|
||||||
|
#endif
|
||||||
|
bne.d r3z
|
||||||
|
stb.ab r1,[r10,1]
|
||||||
|
j_s [blink]
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
charloop:
|
||||||
|
ldb.ab r3,[r1,1]
|
||||||
|
|
||||||
|
|
||||||
|
brne.d r3,0,charloop
|
||||||
|
stb.ab r3,[r10,1]
|
||||||
|
j [blink]
|
||||||
|
ARC_EXIT strcpy
|
83
arch/arc/lib/strlen.S
Normal file
83
arch/arc/lib/strlen.S
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <asm/linkage.h>
|
||||||
|
|
||||||
|
ARC_ENTRY strlen
|
||||||
|
or r3,r0,7
|
||||||
|
ld r2,[r3,-7]
|
||||||
|
ld.a r6,[r3,-3]
|
||||||
|
mov r4,0x01010101
|
||||||
|
; uses long immediate
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
asl_s r1,r0,3
|
||||||
|
btst_s r0,2
|
||||||
|
asl r7,r4,r1
|
||||||
|
ror r5,r4
|
||||||
|
sub r1,r2,r7
|
||||||
|
bic_s r1,r1,r2
|
||||||
|
mov.eq r7,r4
|
||||||
|
sub r12,r6,r7
|
||||||
|
bic r12,r12,r6
|
||||||
|
or.eq r12,r12,r1
|
||||||
|
and r12,r12,r5
|
||||||
|
brne r12,0,.Learly_end
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
ror r5,r4
|
||||||
|
btst_s r0,2
|
||||||
|
mov_s r1,31
|
||||||
|
sub3 r7,r1,r0
|
||||||
|
sub r1,r2,r4
|
||||||
|
bic_s r1,r1,r2
|
||||||
|
bmsk r1,r1,r7
|
||||||
|
sub r12,r6,r4
|
||||||
|
bic r12,r12,r6
|
||||||
|
bmsk.ne r12,r12,r7
|
||||||
|
or.eq r12,r12,r1
|
||||||
|
and r12,r12,r5
|
||||||
|
brne r12,0,.Learly_end
|
||||||
|
#endif /* ENDIAN */
|
||||||
|
|
||||||
|
.Loop:
|
||||||
|
ld_s r2,[r3,4]
|
||||||
|
ld.a r6,[r3,8]
|
||||||
|
; stall for load result
|
||||||
|
sub r1,r2,r4
|
||||||
|
bic_s r1,r1,r2
|
||||||
|
sub r12,r6,r4
|
||||||
|
bic r12,r12,r6
|
||||||
|
or r12,r12,r1
|
||||||
|
and r12,r12,r5
|
||||||
|
breq r12,0,.Loop
|
||||||
|
.Lend:
|
||||||
|
and.f r1,r1,r5
|
||||||
|
sub.ne r3,r3,4
|
||||||
|
mov.eq r1,r12
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
sub_s r2,r1,1
|
||||||
|
bic_s r2,r2,r1
|
||||||
|
norm r1,r2
|
||||||
|
sub_s r0,r0,3
|
||||||
|
lsr_s r1,r1,3
|
||||||
|
sub r0,r3,r0
|
||||||
|
j_s.d [blink]
|
||||||
|
sub r0,r0,r1
|
||||||
|
#else /* BIG ENDIAN */
|
||||||
|
lsr_s r1,r1,7
|
||||||
|
mov.eq r2,r6
|
||||||
|
bic_s r1,r1,r2
|
||||||
|
norm r1,r1
|
||||||
|
sub r0,r3,r0
|
||||||
|
lsr_s r1,r1,3
|
||||||
|
j_s.d [blink]
|
||||||
|
add r0,r0,r1
|
||||||
|
#endif /* ENDIAN */
|
||||||
|
.Learly_end:
|
||||||
|
b.d .Lend
|
||||||
|
sub_s.ne r1,r1,r1
|
||||||
|
ARC_EXIT strlen
|
Reference in New Issue
Block a user