mirror of
https://git.planet-casio.com/Vhex-Kernel-Core/fxlibc.git
synced 2024-12-28 04:23:38 +01:00
string: use gint's optimized memcpy (DONE)
This commit is contained in:
parent
b69e0fd299
commit
a354e38ccf
4 changed files with 135 additions and 8 deletions
|
@ -172,6 +172,7 @@ if(sh-generic IN_LIST TARGET_FOLDERS)
|
|||
src/libc/setjmp/target/sh-generic/setjmp.S
|
||||
src/libc/setjmp/target/sh-generic/longjmp.S
|
||||
src/libc/string/target/sh-generic/memchr.S
|
||||
src/libc/string/target/sh-generic/memcpy.S
|
||||
src/libc/string/target/sh-generic/memset.S
|
||||
src/libc/string/target/sh-generic/strlen.S
|
||||
src/target/sh-generic/cpucap.c)
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
** The memcpy() function copies n bytes from memory area src to memory area dest.
|
||||
** The memory areas must not overlap. Use memmove(3) if the memory areas do
|
||||
** overlap.
|
||||
**
|
||||
** TODO: use DMA ?
|
||||
** TODO: use DSP ?
|
||||
*/
|
||||
#ifndef __SUPPORT_ARCH_SH
|
||||
|
||||
void *memcpy(void *dest, const void *src, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i = i + 1)
|
||||
((uint8_t *) dest)[i] = ((uint8_t *) src)[i];
|
||||
return (dest);
|
||||
}
|
||||
|
||||
#endif /*__SUPPORT_ARCH_SH*/
|
||||
|
|
|
@ -119,5 +119,7 @@ _memchr:
|
|||
rts
|
||||
add #-1, r0
|
||||
|
||||
.align 4
|
||||
|
||||
.___cpucap:
|
||||
.long ___cpucap
|
||||
|
|
128
src/libc/string/target/sh-generic/memcpy.S
Normal file
128
src/libc/string/target/sh-generic/memcpy.S
Normal file
|
@ -0,0 +1,128 @@
|
|||
#include <bits/asm/cpucap.h>
|
||||
|
||||
.global _memcpy
|
||||
.text
|
||||
|
||||
_memcpy:
|
||||
tst r6, r6
|
||||
bt .zero
|
||||
|
||||
mov r4, r3
|
||||
mov #3, r2
|
||||
|
||||
/* When copying less than 64 bytes, use the naive method */
|
||||
mov #64, r0
|
||||
cmp/ge r6, r0
|
||||
bt _naive_memcpy
|
||||
|
||||
_memcpy_align_dst:
|
||||
/* 4-align the destination */
|
||||
mov.b @r5+, r0
|
||||
mov.b r0, @r4
|
||||
add #1, r4
|
||||
tst r2, r4
|
||||
bf/s _memcpy_align_dst
|
||||
dt r6
|
||||
|
||||
/* If source is 4-aligned, use mov.l */
|
||||
tst r2, r5
|
||||
bt/s .aligned4_32
|
||||
mov #4, r2
|
||||
|
||||
/* If unaligned but SH4, use movua.l */
|
||||
mov.l .___cpucap, r0
|
||||
mov.l @r0, r0
|
||||
tst #__CPUCAP_SH4ALDSP, r0
|
||||
bf .unaligned4
|
||||
|
||||
/* If source is 2-aligned, use mov.w */
|
||||
mov r5, r0
|
||||
tst #1, r0
|
||||
bt .aligned2
|
||||
|
||||
/* Otherwise use a naive copy */
|
||||
bra _naive_memcpy
|
||||
nop
|
||||
|
||||
.aligned4_32:
|
||||
mov #36, r2
|
||||
|
||||
/* Copy 32 bytes at a time until at most 32 bytes are left */
|
||||
mov.l @r5+, r0
|
||||
mov.l @r5+, r1
|
||||
mov.l @r5+, r7
|
||||
mov.l r0, @r4
|
||||
mov.l r1, @(4,r4)
|
||||
mov.l r7, @(8,r4)
|
||||
mov.l @r5+, r0
|
||||
mov.l @r5+, r1
|
||||
mov.l @r5+, r7
|
||||
mov.l r0, @(12,r4)
|
||||
mov.l r1, @(16,r4)
|
||||
mov.l r7, @(20,r4)
|
||||
mov.l @r5+, r0
|
||||
mov.l @r5+, r1
|
||||
add #-32, r6
|
||||
mov.l r0, @(24,r4)
|
||||
mov.l r1, @(28,r4)
|
||||
cmp/ge r6, r2
|
||||
bf/s .aligned4_32
|
||||
add #32, r4
|
||||
|
||||
.aligned4_4:
|
||||
mov #4, r2
|
||||
|
||||
/* Copy 4 bytes at a time until at most 4 bytes are left */
|
||||
mov.l @r5+, r0
|
||||
mov.l r0, @r4
|
||||
add #-4, r6
|
||||
cmp/ge r6, r2
|
||||
bf/s .aligned4_4
|
||||
add #4, r4
|
||||
|
||||
bra _naive_memcpy
|
||||
nop
|
||||
|
||||
.unaligned4:
|
||||
/* Copy 4 bytes but read with movua.l since source is unaligned */
|
||||
movua.l @r5+, r0
|
||||
mov.l r0, @r4
|
||||
add #-4, r6
|
||||
cmp/ge r6, r2
|
||||
bf/s .unaligned4
|
||||
add #4, r4
|
||||
|
||||
bra _naive_memcpy
|
||||
nop
|
||||
|
||||
.aligned2:
|
||||
mov.w @r5+, r0
|
||||
mov.w r0, @r4
|
||||
mov.w @r5+, r0
|
||||
mov.w r0, @(2,r4)
|
||||
add #-4, r6
|
||||
cmp/ge r6, r2
|
||||
bf/s .aligned2
|
||||
add #4, r4
|
||||
|
||||
bra _naive_memcpy
|
||||
nop
|
||||
|
||||
_naive_memcpy:
|
||||
mov.b @r5+, r0
|
||||
dt r6
|
||||
mov.b r0, @r4
|
||||
bf/s _naive_memcpy
|
||||
add #1, r4
|
||||
|
||||
rts
|
||||
mov r3, r0
|
||||
|
||||
.zero:
|
||||
rts
|
||||
mov r4, r0
|
||||
|
||||
.align 4
|
||||
|
||||
.___cpucap:
|
||||
.long ___cpucap
|
Loading…
Reference in a new issue