std: move memcmp, memcpy, memmove, memset to fxlibc

2025-07-08 13:27:34 +02:00 · 2021-05-23 16:50:57 +02:00 · 2021-05-23 16:50:57 +02:00 · 6c12217777
commit 6c12217777
parent 7e0ccc3f69
5 changed files with 0 additions and 373 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -67,10 +67,6 @@ set(SOURCES_COMMON
  src/spu/spu.c
  src/std/aprint.c
  src/std/malloc.c
  src/std/memcmp.s
  src/std/memcpy.s
  src/std/memmove.s
  src/std/memset.s
  src/std/print.c
  src/std/string.c
  src/std/string-ext.c
--- a/src/std/memcmp.s
+++ b/src/std/memcmp.s
@ -1,114 +0,0 @@
 .global _memcmp
 .text
 _memcmp:
 	tst	r6, r6
 	bt	.zero
 	/* When comparing less than 64 bytes, use the naive method */
 	mov	#64, r0
 	cmp/ge	r6, r0
 	bt	_naive_memcmp
 	mov	#4, r2
 	mov	#3, r3
 _memcmp_align_rhs:
 	/* 4-align the right-hand side */
 	mov.b	@r4+, r0
 	mov.b	@r5+, r1
 	cmp/eq	r0, r1
 	bf/s	.end
 	dt	r6
 	tst	r3, r5
 	bf	_memcmp_align_rhs
 	/* If left-hand side is 4-aligned, use mov.l */
 	tst	r3, r4
 	bt	.aligned4
 	/* If unaligned but SH4, use movua.l */
 	mov.l	.gint, r0
 	mov.l	@r0, r0
 	tst	#1, r0
 	bt	.unaligned4
 	/* If left-hand side is 2-aligned, use mov.w and mov.l */
 	mov	r4, r0
 	tst	#1, r0
 	bt	.aligned2
 	/* Otherwise use a naive comparison */
 	bra	_naive_memcmp
 	nop
 .aligned4:
 	/* Compare 4 bytes at a time until at most 4 bytes are left */
 	mov.l	@r4+, r0
 	mov.l	@r5+, r1
 	cmp/eq	r0, r1
 	bf/s	_fail
 	add	#-4, r6
 	cmp/ge	r6, r2
 	bf	.aligned4
 	bra	_naive_memcmp
 	nop
 .unaligned4:
 	/* Compare 4 bytes at a time until at most 4 bytes are left. Since
 	   left-hand side is aligned, use movua.l */
 	movua.l	@r4+, r0
 	mov.l	@r5+, r1
 	cmp/eq	r0, r1
 	bf/s	_fail
 	add	#-4, r6
 	cmp/ge	r6, r2
 	bf	.unaligned4
 	bra	_naive_memcmp
 	nop
 .aligned2:
 	/* Read 4 bytes from r4 in two steps */
 	mov.w	@r4+, r0
 	mov.l	@r5+, r1
 	mov.w	@r4+, r2
 	shll16	r0
 	or	r2, r0
 	cmp/eq	r0, r1
 	bf/s	_fail
 	add	#-4, r6
 	cmp/ge	r6, r2
 	bf	.aligned2
 	bra	_naive_memcmp
 	nop
 _fail:
 	/* Rewind 4 bytes to compare manually */
 	add	#-4, r4
 	add	#-4, r5
 	add	#4, r6
 _naive_memcmp:
 	mov.b	@r4+, r0
 	mov.b	@r5+, r1
 	cmp/eq	r0, r1
 	bf/s	.end
 	dt	r6
 	bf	_naive_memcmp
 .end:
 	extu.b	r0, r0
 	extu.b	r1, r1
 	rts
 	sub	r1, r0
 .zero:
 	rts
 	mov	#0, r0
 .align 4
 .gint:
 	.long _gint
--- a/src/std/memcpy.s
+++ b/src/std/memcpy.s
@ -1,125 +0,0 @@
 .global _memcpy
 .text
 _memcpy:
 	tst	r6, r6
 	bt	.zero
 	mov	r4, r3
 	mov	#3, r2
 	/* When copying less than 64 bytes, use the naive method */
 	mov	#64, r0
 	cmp/ge	r6, r0
 	bt	_naive_memcpy
 _memcpy_align_dst:
 	/* 4-align the destination */
 	mov.b	@r5+, r0
 	mov.b	r0, @r4
 	add	#1, r4
 	tst	r2, r4
 	bf/s	_memcpy_align_dst
 	dt	r6
 	/* If source is 4-aligned, use mov.l */
 	tst	r2, r5
 	bt/s	.aligned4_32
 	mov	#4, r2
 	/* If unaligned but SH4, use movua.l */
 	mov.l	.gint, r0
 	mov.l	@r0, r0
 	tst	#1, r0
 	bt	.unaligned4
 	/* If source is 2-aligned, use mov.w */
 	mov	r5, r0
 	tst	#1, r0
 	bt	.aligned2
 	/* Otherwise use a naive copy */
 	bra	_naive_memcpy
 	nop
 .aligned4_32:
 	mov	#36, r2
 	/* Copy 32 bytes at a time until at most 32 bytes are left */
 	mov.l	@r5+, r0
 	mov.l	@r5+, r1
 	mov.l	@r5+, r7
 	mov.l	r0, @r4
 	mov.l	r1, @(4,r4)
 	mov.l	r7, @(8,r4)
 	mov.l	@r5+, r0
 	mov.l	@r5+, r1
 	mov.l	@r5+, r7
 	mov.l	r0, @(12,r4)
 	mov.l	r1, @(16,r4)
 	mov.l	r7, @(20,r4)
 	mov.l	@r5+, r0
 	mov.l	@r5+, r1
 	add	#-32, r6
 	mov.l	r0, @(24,r4)
 	mov.l	r1, @(28,r4)
 	cmp/ge	r6, r2
 	bf/s	.aligned4_32
 	add	#32, r4
 .aligned4_4:
 	mov	#4, r2
 	/* Copy 4 bytes at a time until at most 4 bytes are left */
 	mov.l	@r5+, r0
 	mov.l	r0, @r4
 	add	#-4, r6
 	cmp/ge	r6, r2
 	bf/s	.aligned4_4
 	add	#4, r4
 	bra	_naive_memcpy
 	nop
 .unaligned4:
 	/* Copy 4 bytes but read with movua.l since source is unaligned */
 	movua.l	@r5+, r0
 	mov.l	r0, @r4
 	add	#-4, r6
 	cmp/ge	r6, r2
 	bf/s	.unaligned4
 	add	#4, r4
 	bra	_naive_memcpy
 	nop
 .aligned2:
 	mov.w	@r5+, r0
 	mov.w	r0, @r4
 	mov.w	@r5+, r0
 	mov.w	r0, @(2,r4)
 	add	#-4, r6
 	cmp/ge	r6, r2
 	bf/s	.aligned2
 	add	#4, r4
 	bra	_naive_memcpy
 	nop
 _naive_memcpy:
 	mov.b	@r5+, r0
 	dt	r6
 	mov.b	r0, @r4
 	bf/s	_naive_memcpy
 	add	#1, r4
 	rts
 	mov	r3, r0
 .zero:
 	rts
 	mov	r4, r0
 .align 4
 .gint:
 	.long _gint
--- a/src/std/memmove.s
+++ b/src/std/memmove.s
@ -1,60 +0,0 @@
 .global _memmove
 .text
 _memmove:
 	tst	r6, r6
 	bt	.zero
 	/* Simple optimization: if regions do not overlap, use memcpy() */
 	mov	r4, r0
 	add	r6, r0
 	cmp/ge	r0, r5
 	bt	_memmove_memcpy
 	mov	r5, r0
 	add	r6, r0
 	cmp/ge	r0, r4
 	bt	_memmove_memcpy
 	mov	r4, r3
 	cmp/ge	r4, r5
 	bf	.backwards
 .forwards:
 	/* If the destination starts before the source, copy forwards */
 	mov.b	@r5+, r0
 	mov.b	r0, @r4
 	dt	r6
 	bf/s	.forwards
 	add	#1, r4
 	rts
 	mov	r3, r0
 .backwards:
 	/* Otherwise, copy backwards */
 	add	r6, r4
 	add	r6, r5
 .backwards_loop:
 	add	#-1, r5
 	mov.b	@r5, r0
 	dt	r6
 	bf/s	.backwards_loop
 	mov.b	r0, @-r4
 	rts
 	mov	r3, r0
 _memmove_memcpy:
 	mov.l	.memcpy, r1
 	jmp	@r1
 	nop
 .zero:
 	rts
 	mov	r4, r0
 .align 4
 .memcpy:
 	.long _memcpy
--- a/src/std/memset.s
+++ b/src/std/memset.s
@ -1,70 +0,0 @@
 .global _memset
 .text
 _memset:
 	tst	r6, r6
 	bt	.zero
 	/* We'll fill from the end */
 	mov	r4, r3
 	add	r6, r4
 	/* When setting less than 64 bytes, use the naive method */
 	mov	#64, r0
 	cmp/ge	r6, r0
 	bt	_naive_memset
 	mov	#3, r2
 	/* Make a 4-byte filler */
 	mov	r5, r0
 	shll8	r5
 	or	r5, r0
 	mov	r0, r5
 	shll16	r5
 	or	r5, r0
 _memset_align:
 	/* 4-align the destination */
 	mov.b	r0, @-r4
 	tst	r2, r4
 	bf/s	_memset_align
 	dt	r6
 	mov	#40, r2
 .aligned4_32:
 	add	#-32, r4
 	add	#-32, r6
 	mov.l	r0, @(28,r4)
 	mov.l	r0, @(24,r4)
 	mov.l	r0, @(20,r4)
 	mov.l	r0, @(16,r4)
 	mov.l	r0, @(12,r4)
 	mov.l	r0, @(8,r4)
 	mov.l	r0, @(4,r4)
 	cmp/ge	r6, r2
 	bf/s	.aligned4_32
 	mov.l	r0, @r4
 	mov	#8, r2
 .aligned4_4:
 	mov.l	r0, @-r4
 	cmp/ge	r6, r2
 	bf/s	.aligned4_4
 	add	#-4, r6
 _naive_memset:
 	/* Tight loop copy one byte */
 	dt	r6
 	bf/s	_naive_memset
 	mov.b	r5, @-r4
 .end:
 	rts
 	mov	r3, r0
 .zero:
 	rts
 	mov	r4, r0