From 5cfd2a7d857da15bb757152ffc6a2b918a783364 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sun, 23 May 2021 16:40:21 +0200 Subject: [PATCH] string: use gint's optimized memcmp (DONE) --- CMakeLists.txt | 1 + src/libc/string/target/sh-generic/memcmp.S | 117 +++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 src/libc/string/target/sh-generic/memcmp.S diff --git a/CMakeLists.txt b/CMakeLists.txt index 79b511a..128ade2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,7 @@ if(sh-generic IN_LIST TARGET_FOLDERS) src/libc/setjmp/target/sh-generic/setjmp.S src/libc/setjmp/target/sh-generic/longjmp.S src/libc/string/target/sh-generic/memchr.S + src/libc/string/target/sh-generic/memcmp.S src/libc/string/target/sh-generic/memcpy.S src/libc/string/target/sh-generic/memset.S src/libc/string/target/sh-generic/strlen.S diff --git a/src/libc/string/target/sh-generic/memcmp.S b/src/libc/string/target/sh-generic/memcmp.S new file mode 100644 index 0000000..1d222dc --- /dev/null +++ b/src/libc/string/target/sh-generic/memcmp.S @@ -0,0 +1,117 @@ +#include + +.global _memcmp +.text + +_memcmp: + tst r6, r6 + bt .zero + + /* When comparing less than 64 bytes, use the naive method */ + mov #64, r0 + cmp/ge r6, r0 + bt _naive_memcmp + + mov #4, r2 + mov #3, r3 + +_memcmp_align_rhs: + /* 4-align the right-hand side */ + mov.b @r4+, r0 + mov.b @r5+, r1 + cmp/eq r0, r1 + bf/s .end + dt r6 + tst r3, r5 + bf _memcmp_align_rhs + + /* If left-hand side is 4-aligned, use mov.l */ + tst r3, r4 + bt .aligned4 + + /* If unaligned but SH4, use movua.l */ + mov.l .___cpucap, r0 + mov.l @r0, r0 + tst #__CPUCAP_SH4ALDSP, r0 + bf .unaligned4 + + /* If left-hand side is 2-aligned, use mov.w and mov.l */ + mov r4, r0 + tst #1, r0 + bt .aligned2 + + /* Otherwise use a naive comparison */ + bra _naive_memcmp + nop + +.aligned4: + /* Compare 4 bytes at a time until at most 4 bytes are left */ + mov.l @r4+, r0 + mov.l @r5+, r1 + cmp/eq r0, r1 + bf/s _fail + add #-4, r6 + cmp/ge r6, r2 + bf .aligned4 + + bra _naive_memcmp + nop + +.unaligned4: + /* Compare 4 bytes at a time until at most 4 bytes are left. Since + left-hand side is aligned, use movua.l */ + movua.l @r4+, r0 + mov.l @r5+, r1 + cmp/eq r0, r1 + bf/s _fail + add #-4, r6 + cmp/ge r6, r2 + bf .unaligned4 + + bra _naive_memcmp + nop + +.aligned2: + /* Read 4 bytes from r4 in two steps */ + mov.w @r4+, r0 + mov.l @r5+, r1 + mov.w @r4+, r2 + shll16 r0 + or r2, r0 + cmp/eq r0, r1 + bf/s _fail + add #-4, r6 + cmp/ge r6, r2 + bf .aligned2 + + bra _naive_memcmp + nop + +_fail: + /* Rewind 4 bytes to compare manually */ + add #-4, r4 + add #-4, r5 + add #4, r6 + +_naive_memcmp: + mov.b @r4+, r0 + mov.b @r5+, r1 + cmp/eq r0, r1 + bf/s .end + dt r6 + bf _naive_memcmp + +.end: + extu.b r0, r0 + extu.b r1, r1 + rts + sub r1, r0 + +.zero: + rts + mov #0, r0 + +.align 4 + +.___cpucap: + .long ___cpucap