string: add and test an optimized memchr (DONE)

This version works on both SH3 and SH4.
This commit is contained in:
Lephenixnoir 2021-05-23 14:30:35 +02:00
parent b96970e26d
commit d6f606fa5c
No known key found for this signature in database
GPG key ID: 1BBA026E13FC0495
3 changed files with 87 additions and 3 deletions

View file

@ -167,7 +167,8 @@ endif()
if(sh-generic IN_LIST TARGET_FOLDERS) if(sh-generic IN_LIST TARGET_FOLDERS)
list(APPEND SOURCES list(APPEND SOURCES
src/libc/setjmp/target/sh-generic/setjmp.S src/libc/setjmp/target/sh-generic/setjmp.S
src/libc/setjmp/target/sh-generic/longjmp.S) src/libc/setjmp/target/sh-generic/longjmp.S
src/libc/string/target/sh-generic/memchr.S)
endif() endif()
if(casiowin-fx IN_LIST TARGET_FOLDERS) if(casiowin-fx IN_LIST TARGET_FOLDERS)

19
STATUS
View file

@ -14,6 +14,14 @@ taken from the C99 standard (ISO/IEC 9899:1999), section 7 ("Library").
address of the function can be taken; don't rely on the macro being defined, address of the function can be taken; don't rely on the macro being defined,
as the user can remove it except in some special cases as the user can remove it except in some special cases
String functions (mainly in <string.h>) can use 4-byte accesses, and in doing
so read up to 3 bytes after the end of the string if it is not padded (which
malloc'd strings and literal strings both are, leaving only stack-allocated and
statically-allocated ones). This allows important speed optimizations. The
extra access cannot trigger memory protection because there is no valid memory
less than 4 bytes before the end of any protection region. The extra access
might trigger the UBC in very specific scenarios, but we don't really care.
# Status # Status
In this file, every definition is classified in one of several implementation In this file, every definition is classified in one of several implementation
@ -102,7 +110,7 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
7.21 <string.h> 7.21 <string.h>
7.21.2.1 memcpy: DONE 7.21.2.1 memcpy: DONE
7.21.2.2 memmove: DONE 7.21.2.2 memmove: DONE (Unoptimized: byte-by-byte)
! 7.21.2.3 strcpy: TODO ! 7.21.2.3 strcpy: TODO
! 7.21.2.4 strncpy: TODO ! 7.21.2.4 strncpy: TODO
! 7.21.3.1 strcat: TODO ! 7.21.3.1 strcat: TODO
@ -112,7 +120,7 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
! 7.21.4.3 strcoll: TODO ! 7.21.4.3 strcoll: TODO
! 7.21.4.4 strncmp: TODO ! 7.21.4.4 strncmp: TODO
! 7.21.4.5 strxfrm: TODO ! 7.21.4.5 strxfrm: TODO
! 7.21.5.1 memchr: TODO 7.21.5.1 memchr: DONE
! 7.21.5.2 strchr: TODO ! 7.21.5.2 strchr: TODO
! 7.21.5.3 strcspn: TODO ! 7.21.5.3 strcspn: TODO
! 7.21.5.4 strpbrk: TODO ! 7.21.5.4 strpbrk: TODO
@ -123,6 +131,13 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
7.21.6.1 memset: DONE 7.21.6.1 memset: DONE
! 7.21.6.2 strerror: TODO ! 7.21.6.2 strerror: TODO
! 7.21.6.3 strlen: TODO ! 7.21.6.3 strlen: TODO
Extensions:
- strnlen: TODO
- strchrnul: TODO
- strcasecmp: TODO
- strncasecmp: TODO
- strdup: TODO
- strndup: TODO
7.22 <tgmath.h> => GCC 7.22 <tgmath.h> => GCC

View file

@ -0,0 +1,68 @@
.global _memchr
.type _memchr, @function
_memchr:
mov r4, r0
exts.b r5, r5
/* For small inputs, simply check bytes individually */
mov #64, r2
cmp/hi r6, r2
bt .last
.large: /* Make a 4-byte version of r5 for cmp/str */
extu.b r5, r3
swap.b r3, r2
or r3, r2
swap.w r2, r3
or r3, r2
/* First check 3 bytes to ensure we don't skip bytes when aligning */
mov.b @r0+, r1
cmp/eq r1, r5
bt .end
mov.b @r0+, r1
cmp/eq r1, r5
bt .end
mov.b @r0+, r1
cmp/eq r1, r5
bt .end
/* Align to a 4-byte boundary */
shlr2 r0
shll2 r0
add r4, r6
sub r0, r6
mov r6, r7
shlr2 r7
mov #3, r3
and r3, r6
/* Read longwords */
1: mov.l @r0+, r1
cmp/str r1, r2
bt .found
dt r7
bf 1b
.last: /* Don't read if there are no bytes left */
tst r6, r6
bt .none
2: mov.b @r0+, r1
cmp/eq r1, r5
bt .end
dt r6
bf 2b
.none: rts
mov #0, r0
.found: /* Go back to find out which of the last 4 bytes is r5 */
add #-4, r0
bra 2b
mov #4, r6
.end: rts
add #-1, r0