mirror of
https://git.planet-casio.com/Lephenixnoir/gint.git
synced 2024-12-29 13:03:36 +01:00
render-cg: restore bopti method on P4 and defined p4_clearbg_alt
This commit is contained in:
parent
a4df076214
commit
ede19fc878
6 changed files with 241 additions and 113 deletions
|
@ -181,6 +181,7 @@ set(SOURCES_CG
|
||||||
src/render-cg/image/image_p4.S
|
src/render-cg/image/image_p4.S
|
||||||
src/render-cg/image/image_p4_normal.S
|
src/render-cg/image/image_p4_normal.S
|
||||||
src/render-cg/image/image_p4_clearbg.S
|
src/render-cg/image/image_p4_clearbg.S
|
||||||
|
src/render-cg/image/image_p4_clearbg_alt.S
|
||||||
src/render-cg/image/image_p4_swapcolor.S
|
src/render-cg/image/image_p4_swapcolor.S
|
||||||
src/render-cg/image/image_p4_dye.S
|
src/render-cg/image/image_p4_dye.S
|
||||||
# Interface to the fast image renderer
|
# Interface to the fast image renderer
|
||||||
|
@ -193,6 +194,7 @@ set(SOURCES_CG
|
||||||
src/render-cg/image/image_p8_swapcolor.c
|
src/render-cg/image/image_p8_swapcolor.c
|
||||||
src/render-cg/image/image_p8_dye.c
|
src/render-cg/image/image_p8_dye.c
|
||||||
src/render-cg/image/image_p4.c
|
src/render-cg/image/image_p4.c
|
||||||
|
src/render-cg/image/image_p4_clearbg_alt.c
|
||||||
src/render-cg/image/image_p4_effect.c
|
src/render-cg/image/image_p4_effect.c
|
||||||
src/render-cg/image/image_p4_swapcolor.c
|
src/render-cg/image/image_p4_swapcolor.c
|
||||||
src/render-cg/image/image_p4_dye.c
|
src/render-cg/image/image_p4_dye.c
|
||||||
|
|
|
@ -197,6 +197,12 @@ DIMAGE_SIG(_addbg, int effects, int bg_color)
|
||||||
/* d[sub]image_{rgb16,p8,p4}_dye(..., effects, dye_color) */
|
/* d[sub]image_{rgb16,p8,p4}_dye(..., effects, dye_color) */
|
||||||
DIMAGE_SIG(_dye, int effects, int dye_color)
|
DIMAGE_SIG(_dye, int effects, int dye_color)
|
||||||
|
|
||||||
|
/* d[sub]image_p4_clearbg_alt(..., effects, bg_index)
|
||||||
|
This is functionally identical to CLEARBG, but it uses an alternative
|
||||||
|
rendering method that is faster for larger images with wide transparent
|
||||||
|
areas. You can swap it with the normal CLEARBG freely. */
|
||||||
|
DIMAGE_SIG1(p4_clearbg_alt, int effects, int bg_index)
|
||||||
|
|
||||||
#define dimage_rgb16_effect(x, y, img, eff, ...) \
|
#define dimage_rgb16_effect(x, y, img, eff, ...) \
|
||||||
dsubimage_rgb16_effect(x, y, img, 0, 0, (img)->width, (img)->height, \
|
dsubimage_rgb16_effect(x, y, img, 0, 0, (img)->width, (img)->height, \
|
||||||
eff, ##__VA_ARGS__)
|
eff, ##__VA_ARGS__)
|
||||||
|
@ -353,6 +359,7 @@ void gint_image_p8_dye(void);
|
||||||
|
|
||||||
void gint_image_p4_normal(void);
|
void gint_image_p4_normal(void);
|
||||||
void gint_image_p4_clearbg(void);
|
void gint_image_p4_clearbg(void);
|
||||||
|
void gint_image_p4_clearbg_alt(void);
|
||||||
void gint_image_p4_swapcolor(void);
|
void gint_image_p4_swapcolor(void);
|
||||||
void gint_image_p4_dye(void);
|
void gint_image_p4_dye(void);
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ void dsubimage_p4_clearbg(int x, int y, image_t const *img,
|
||||||
struct gint_image_box box = { x, y, w, h, left, top };
|
struct gint_image_box box = { x, y, w, h, left, top };
|
||||||
struct gint_image_cmd cmd;
|
struct gint_image_cmd cmd;
|
||||||
|
|
||||||
if(!gint_image_mkcmd(&box, img, eff, true, true, &cmd, DWIDTH,
|
if(!gint_image_mkcmd(&box, img, eff, false, false, &cmd, DWIDTH,
|
||||||
DHEIGHT)) return;
|
DHEIGHT)) return;
|
||||||
cmd.effect += 4;
|
cmd.effect += 4;
|
||||||
cmd.color_1 = bg_color;
|
cmd.color_1 = bg_color;
|
||||||
|
|
|
@ -1,146 +1,90 @@
|
||||||
.global _gint_image_p4_clearbg
|
.global _gint_image_p4_clearbg
|
||||||
#include "image_macros.S"
|
#include "image_macros.S"
|
||||||
|
|
||||||
/* P4 CLEARBG, RAM version: by NULL canceling.
|
/* P4 CLEARBG, RAM version: trivial.
|
||||||
|
|
||||||
This function is similar to P8 CLEARBG. Transparent pixels are not limited
|
This is the bopti algorithm. Azur's is faster when there are enough
|
||||||
by RAM writing speed, so a tight CPU loop is used. See P8 CLEARBG for an
|
transparent pixels, but very limiting for quasi-opaque images.
|
||||||
explanation of NULL canceling.
|
|
||||||
|
|
||||||
r0: [temporary]
|
r0: [temporary]
|
||||||
r7: Right edge pointer
|
r7: Current x position
|
||||||
r8: Alpha value
|
r8: Alpha value
|
||||||
r9: Palette
|
r9: Palette
|
||||||
r10: Left edge pointer
|
r10: Initial x position
|
||||||
r11: Nullable output pointer
|
r11: Column counter
|
||||||
r12: 0 (in outer loop: edge stride)
|
r12: -3 */
|
||||||
r13: [temporary]
|
|
||||||
r14: [temporary]
|
|
||||||
|
|
||||||
Spilled to stack:
|
|
||||||
@(-12,r15): Right edge value
|
|
||||||
@(-8,r15): Left edge value
|
|
||||||
@(-4,r15): Edge stride */
|
|
||||||
|
|
||||||
.macro GEN_CLEARBG_LOOP HFLIP, OUT_DIR, TMP1, TMP2, OFF1, OFF2
|
|
||||||
shlr r2
|
|
||||||
nop
|
|
||||||
|
|
||||||
add r10, r10
|
|
||||||
nop
|
|
||||||
|
|
||||||
|
.macro GEN_CLEARBG_LOOP HFLIP, OUT_DIR
|
||||||
|
/* Cancel the last operation to keep r4 = bytes between rows */
|
||||||
mov.l @r8+, r9 /* cmd.palette */
|
mov.l @r8+, r9 /* cmd.palette */
|
||||||
mov r2, r0
|
mov r2, r7
|
||||||
|
shlr r7
|
||||||
|
addc r7, r4
|
||||||
|
|
||||||
mov.w @r8+, r7 /* cmd.edge_2 */
|
mov.w @r8+, r7 /* cmd.edge_2 */
|
||||||
shll2 r0
|
nop
|
||||||
|
|
||||||
mov.l r12, @-r15
|
|
||||||
shll r7
|
|
||||||
|
|
||||||
mov.l r11, @-r15
|
|
||||||
add r5, r7
|
|
||||||
|
|
||||||
mov r0, r12
|
|
||||||
add r6, r12
|
|
||||||
|
|
||||||
mov.l r13, @-r15
|
|
||||||
add r5, r10
|
|
||||||
|
|
||||||
mov.l r14, @-r15
|
|
||||||
add #-4, r5
|
|
||||||
|
|
||||||
mov.w @r8, r8 /* cmd.color_1 */
|
mov.w @r8, r8 /* cmd.color_1 */
|
||||||
add #-1, r4 /* Input stride compensation for pipelining */
|
|
||||||
|
|
||||||
.if \HFLIP
|
|
||||||
add r0, r5
|
|
||||||
nop
|
nop
|
||||||
|
|
||||||
shll r0
|
mov.l r11, @-r15
|
||||||
nop
|
|
||||||
|
|
||||||
add r0, r6
|
|
||||||
nop
|
|
||||||
.endif
|
|
||||||
|
|
||||||
shll r8 /* alpha*2 compares against palette offsets */
|
shll r8 /* alpha*2 compares against palette offsets */
|
||||||
nop
|
|
||||||
|
|
||||||
START
|
|
||||||
|
|
||||||
mov.b @r3+, \TMP1
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.w @r7, r0 /* Save right edge */
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.l r0, @-r15
|
|
||||||
shll \TMP1
|
|
||||||
|
|
||||||
mov.w @r10, r0 /* Save left edge */
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.l r0, @-r15
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.l r12, @-r15
|
mov.l r12, @-r15
|
||||||
mov #0, r12
|
mov #-3, r12
|
||||||
|
|
||||||
2: mov \TMP1, r0
|
.if \HFLIP
|
||||||
|
add #-2, r5
|
||||||
|
mov r2, r0
|
||||||
|
shll r0
|
||||||
|
add r0, r5
|
||||||
|
shll r0
|
||||||
|
add r0, r6
|
||||||
|
.endif
|
||||||
|
|
||||||
|
1: mov r2, r11
|
||||||
|
mov r10, r7
|
||||||
|
|
||||||
|
/* Load 4 bits from offet r7 (in pixels) within input */
|
||||||
|
2: mov r7, r0
|
||||||
|
shlr r0
|
||||||
|
|
||||||
|
mov.b @(r0, r3), r0
|
||||||
|
nop
|
||||||
|
|
||||||
|
bt.s 3f
|
||||||
|
add #1, r7
|
||||||
|
|
||||||
|
/* Aligned */
|
||||||
|
shld r12, r0
|
||||||
and #0x1e, r0
|
and #0x1e, r0
|
||||||
|
|
||||||
cmp/eq r0, r8
|
cmp/eq r0, r8
|
||||||
mov #-1, r11
|
bt 4f
|
||||||
|
|
||||||
addc r12, r11
|
|
||||||
mov #-4, \TMP2
|
|
||||||
|
|
||||||
and r5, r11
|
|
||||||
mov.w @(r0, r9), r0
|
mov.w @(r0, r9), r0
|
||||||
|
bra 4f
|
||||||
|
mov.w r0, @r5
|
||||||
|
|
||||||
shld \TMP2, \TMP1
|
/* Unaligned */
|
||||||
mov #0x1e, \TMP2
|
3: shll r0
|
||||||
|
and #0x1e, r0
|
||||||
|
|
||||||
and \TMP2, \TMP1
|
cmp/eq r0, r8
|
||||||
mov.w r0, @(\OFF1,r11)
|
bt 4f
|
||||||
|
|
||||||
cmp/eq \TMP1, r8
|
mov.w @(r0, r9), r0
|
||||||
mov #-1, r11
|
mov.w r0, @r5
|
||||||
|
|
||||||
addc r12, r11
|
|
||||||
mov \TMP1, r0
|
|
||||||
|
|
||||||
and r5, r11
|
|
||||||
mov.b @r3+, \TMP1
|
|
||||||
|
|
||||||
|
/* End */
|
||||||
|
4: dt r11
|
||||||
|
bf.s 2b
|
||||||
add #\OUT_DIR, r5
|
add #\OUT_DIR, r5
|
||||||
mov.w @(r0,r9), r0
|
|
||||||
|
|
||||||
mov.w r0, @(\OFF2,r11)
|
|
||||||
3: shll \TMP1
|
|
||||||
|
|
||||||
mov.l @r15+, r12
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.l @r15+, r0
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.w r0, @r10 /* Restore left edge */
|
|
||||||
add r12, r10
|
|
||||||
|
|
||||||
mov.l @r15+, r0
|
|
||||||
nop
|
|
||||||
|
|
||||||
mov.w r0, @r7 /* Restore right edge */
|
|
||||||
add r12, r7
|
|
||||||
|
|
||||||
END
|
END
|
||||||
|
|
||||||
mov.l @r15+, r14
|
|
||||||
mov.l @r15+, r13
|
|
||||||
mov.l @r15+, r11
|
|
||||||
mov.l @r15+, r12
|
mov.l @r15+, r12
|
||||||
|
mov.l @r15+, r11
|
||||||
mov.l @r15+, r10
|
mov.l @r15+, r10
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
.endm
|
.endm
|
||||||
|
@ -149,5 +93,5 @@ _gint_image_p4_clearbg:
|
||||||
tst #1, r0
|
tst #1, r0
|
||||||
bf 9f
|
bf 9f
|
||||||
|
|
||||||
GEN_CLEARBG_LOOP 0, 4, r13, r14, 6, 4
|
GEN_CLEARBG_LOOP 0, 2
|
||||||
9: GEN_CLEARBG_LOOP 1, -4, r13, r14, 0, 2
|
9: GEN_CLEARBG_LOOP 1, -2
|
||||||
|
|
153
src/render-cg/image/image_p4_clearbg_alt.S
Normal file
153
src/render-cg/image/image_p4_clearbg_alt.S
Normal file
|
@ -0,0 +1,153 @@
|
||||||
|
.global _gint_image_p4_clearbg_alt
|
||||||
|
#include "image_macros.S"
|
||||||
|
|
||||||
|
/* P4 CLEARBG, alternative version: by NULL canceling.
|
||||||
|
|
||||||
|
This function is similar to P8 CLEARBG. Transparent pixels are not limited
|
||||||
|
by RAM writing speed, so a tight CPU loop is used. See P8 CLEARBG for an
|
||||||
|
explanation of NULL canceling.
|
||||||
|
|
||||||
|
r0: [temporary]
|
||||||
|
r7: Right edge pointer
|
||||||
|
r8: Alpha value
|
||||||
|
r9: Palette
|
||||||
|
r10: Left edge pointer
|
||||||
|
r11: Nullable output pointer
|
||||||
|
r12: 0 (in outer loop: edge stride)
|
||||||
|
r13: [temporary]
|
||||||
|
r14: [temporary]
|
||||||
|
|
||||||
|
Spilled to stack:
|
||||||
|
@(-12,r15): Right edge value
|
||||||
|
@(-8,r15): Left edge value
|
||||||
|
@(-4,r15): Edge stride */
|
||||||
|
|
||||||
|
.macro GEN_CLEARBG_LOOP HFLIP, OUT_DIR, TMP1, TMP2, OFF1, OFF2
|
||||||
|
shlr r2
|
||||||
|
nop
|
||||||
|
|
||||||
|
add r10, r10
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.l @r8+, r9 /* cmd.palette */
|
||||||
|
mov r2, r0
|
||||||
|
|
||||||
|
mov.w @r8+, r7 /* cmd.edge_2 */
|
||||||
|
shll2 r0
|
||||||
|
|
||||||
|
mov.l r12, @-r15
|
||||||
|
shll r7
|
||||||
|
|
||||||
|
mov.l r11, @-r15
|
||||||
|
add r5, r7
|
||||||
|
|
||||||
|
mov r0, r12
|
||||||
|
add r6, r12
|
||||||
|
|
||||||
|
mov.l r13, @-r15
|
||||||
|
add r5, r10
|
||||||
|
|
||||||
|
mov.l r14, @-r15
|
||||||
|
add #-4, r5
|
||||||
|
|
||||||
|
mov.w @r8, r8 /* cmd.color_1 */
|
||||||
|
add #-1, r4 /* Input stride compensation for pipelining */
|
||||||
|
|
||||||
|
.if \HFLIP
|
||||||
|
add r0, r5
|
||||||
|
nop
|
||||||
|
|
||||||
|
shll r0
|
||||||
|
nop
|
||||||
|
|
||||||
|
add r0, r6
|
||||||
|
nop
|
||||||
|
.endif
|
||||||
|
|
||||||
|
shll r8 /* alpha*2 compares against palette offsets */
|
||||||
|
nop
|
||||||
|
|
||||||
|
START
|
||||||
|
|
||||||
|
mov.b @r3+, \TMP1
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.w @r7, r0 /* Save right edge */
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.l r0, @-r15
|
||||||
|
shll \TMP1
|
||||||
|
|
||||||
|
mov.w @r10, r0 /* Save left edge */
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.l r0, @-r15
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.l r12, @-r15
|
||||||
|
mov #0, r12
|
||||||
|
|
||||||
|
2: mov \TMP1, r0
|
||||||
|
and #0x1e, r0
|
||||||
|
|
||||||
|
cmp/eq r0, r8
|
||||||
|
mov #-1, r11
|
||||||
|
|
||||||
|
addc r12, r11
|
||||||
|
mov #-4, \TMP2
|
||||||
|
|
||||||
|
and r5, r11
|
||||||
|
mov.w @(r0,r9), r0
|
||||||
|
|
||||||
|
shld \TMP2, \TMP1
|
||||||
|
mov #0x1e, \TMP2
|
||||||
|
|
||||||
|
and \TMP2, \TMP1
|
||||||
|
mov.w r0, @(\OFF1,r11)
|
||||||
|
|
||||||
|
cmp/eq \TMP1, r8
|
||||||
|
mov #-1, r11
|
||||||
|
|
||||||
|
addc r12, r11
|
||||||
|
mov \TMP1, r0
|
||||||
|
|
||||||
|
and r5, r11
|
||||||
|
mov.b @r3+, \TMP1
|
||||||
|
|
||||||
|
add #\OUT_DIR, r5
|
||||||
|
mov.w @(r0,r9), r0
|
||||||
|
|
||||||
|
mov.w r0, @(\OFF2,r11)
|
||||||
|
3: shll \TMP1
|
||||||
|
|
||||||
|
mov.l @r15+, r12
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.l @r15+, r0
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.w r0, @r10 /* Restore left edge */
|
||||||
|
add r12, r10
|
||||||
|
|
||||||
|
mov.l @r15+, r0
|
||||||
|
nop
|
||||||
|
|
||||||
|
mov.w r0, @r7 /* Restore right edge */
|
||||||
|
add r12, r7
|
||||||
|
|
||||||
|
END
|
||||||
|
|
||||||
|
mov.l @r15+, r14
|
||||||
|
mov.l @r15+, r13
|
||||||
|
mov.l @r15+, r11
|
||||||
|
mov.l @r15+, r12
|
||||||
|
mov.l @r15+, r10
|
||||||
|
EPILOGUE
|
||||||
|
.endm
|
||||||
|
|
||||||
|
_gint_image_p4_clearbg_alt:
|
||||||
|
tst #1, r0
|
||||||
|
bf 9f
|
||||||
|
|
||||||
|
GEN_CLEARBG_LOOP 0, 4, r13, r14, 6, 4
|
||||||
|
9: GEN_CLEARBG_LOOP 1, -4, r13, r14, 0, 2
|
22
src/render-cg/image/image_p4_clearbg_alt.c
Normal file
22
src/render-cg/image/image_p4_clearbg_alt.c
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
#include <gint/image.h>
|
||||||
|
#include <gint/display.h>
|
||||||
|
|
||||||
|
void dimage_p4_clearbg_alt(int x, int y, image_t const *img, int eff, int bg)
|
||||||
|
{
|
||||||
|
dsubimage_p4_clearbg_alt(x, y, img, 0, 0, img->width, img->height, eff,
|
||||||
|
bg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void dsubimage_p4_clearbg_alt(int x, int y, image_t const *img,
|
||||||
|
int left, int top, int w, int h, int eff, int bg_color)
|
||||||
|
{
|
||||||
|
struct gint_image_box box = { x, y, w, h, left, top };
|
||||||
|
struct gint_image_cmd cmd;
|
||||||
|
|
||||||
|
if(!gint_image_mkcmd(&box, img, eff, true, true, &cmd, DWIDTH,
|
||||||
|
DHEIGHT)) return;
|
||||||
|
cmd.effect += 16;
|
||||||
|
cmd.color_1 = bg_color;
|
||||||
|
cmd.loop = gint_image_p4_clearbg_alt;
|
||||||
|
gint_image_p4_loop(DWIDTH, &cmd);
|
||||||
|
}
|
Loading…
Reference in a new issue