From 6942d10dda04e1f4fbb0e53dedefa5c030d69981 Mon Sep 17 00:00:00 2001 From: attilavs2 Date: Sat, 10 Aug 2024 16:11:54 +0200 Subject: [PATCH 1/2] Essai de boucle de dessin en asm (echec) --- CMakeLists.txt | 3 +- src/main.c | 8 ++-- src/moteur.c | 8 +++- src/moteur.h | 2 + src/opti.S | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 8 deletions(-) create mode 100644 src/opti.S diff --git a/CMakeLists.txt b/CMakeLists.txt index 20a9446..1ed3d4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ # toolchain file and module path of the fxSDK cmake_minimum_required(VERSION 3.15) -project(Copy3DEngine) +project(Copy3DEngine LANGUAGES C ASM) include(GenerateG3A) include(Fxconv) @@ -18,6 +18,7 @@ set(SOURCES src/main.c src/moteur.c src/map.c + src/opti.S ) set(ASSETS diff --git a/src/main.c b/src/main.c index 8aad888..a347065 100644 --- a/src/main.c +++ b/src/main.c @@ -27,9 +27,7 @@ #error Ce code est pour FXCG50/G90+E uniquement, enlevez ce message a vos riques et périls #endif -//#define debug //pour afficher les infos de debug - -//extern uint16_t *gint_vram; +#define debug 0 //pour afficher les infos de debug image_t *tex_index[TINDEX_S]; @@ -73,7 +71,7 @@ void keys_get(){ if (keydown(KEY_EXIT)) exit_game = 1; #ifdef debug - if (keydown(KEY_TAN)) end_screen(); + //if (keydown(KEY_TAN)) end_screen(); #endif } @@ -129,7 +127,7 @@ int main(){ if (disp_frame_time == 1) dprint( 1, 10, C_BLACK, "Frame time : %d ms", frame_time); - #ifdef debug + #if debug dprint( 1, 20, C_BLACK, "planeX : %d", planeX); dprint( 1, 30, C_BLACK, "planeY : %d", planeY); dprint( 1, 40, C_BLACK, "dirX : %d", dirX); diff --git a/src/moteur.c b/src/moteur.c index 17a778d..384e38e 100644 --- a/src/moteur.c +++ b/src/moteur.c @@ -167,7 +167,10 @@ void load_map(){ spawn_gen(); } -void draw_stripe(image_t *tex, int texSampleY, int texSample, int linePos, fixed_t texSize, int texX, int x){ +#if asm_opti +void draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x); +#else +void inline draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x){ fixed_t screenPos = fix(linePos); uint32_t texDat = (uint32_t)tex->data + 2*texX; for(int texPos = texSampleY; texPos < 64; texPos++){ @@ -180,6 +183,7 @@ void draw_stripe(image_t *tex, int texSampleY, int texSample, int linePos, fixed texDat += tex->stride; } } +#endif void draw_walls(){ extern fixed_t posX; @@ -336,6 +340,6 @@ void draw_walls(){ image_t *tex = tex_index[map_test[mapX][mapY]]; - draw_stripe(tex, texSampleY, texSample, linePos, texSize, texX, x); + draw_stripe(tex, texSampleY, linePos, texSize, texX, x); } } diff --git a/src/moteur.h b/src/moteur.h index eaee3fc..e9e847e 100644 --- a/src/moteur.h +++ b/src/moteur.h @@ -14,6 +14,8 @@ #define TINDEX_S 256 +#define asm_opti 1 + void load_map(); void end_screen(); void draw_walls(); diff --git a/src/opti.S b/src/opti.S new file mode 100644 index 0000000..c2c18bb --- /dev/null +++ b/src/opti.S @@ -0,0 +1,99 @@ +##void draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x){ +## fixed_t screenPos = fix(linePos); +## uint32_t texDat = (uint32_t)tex->data + 2*texX; +## for(int texPos = texSampleY; texPos < 64; texPos++){ +## if(screenPos < -texSize) goto noDraw; +## for(fixed_t oldPos = screenPos; oldPos < screenPos+texSize; oldPos+=0xFFFF){ +## gint_vram[ffloor(oldPos)*396+x] = *(uint16_t*)texDat; +## } +## noDraw: +## screenPos += texSize; +## texDat += tex->stride; +## } +##} + +# r0 : vram +# r1 : tex->stride +# r2 : texDat +# r3 : 0xFFFF / swapped with 64 +# r4 : tex -> x -> texX -> oldPos +# r5 : texSampleY -> texPos +# r6 : linePos -> screenPos +# r7 : texSize +# r8 : 396*2 +# r9 : -texSize +# r10 : px save +# @-4 : texX -> r8 (save) +# @-8 : x -> r9 (save) +# @-12 : r10 (save) + +.global _draw_stripe +.align 4 +_draw_stripe: + mov.l .gint_vram, r0 + + add #4, r4 + mov.l @r4, r1 + + add #4, r4 + mov.l @r4, r2 + mov.l @r15+, r4 + shll r4 + add r4, r2 + + mov.l @r15+, r4 + shll r4 + add r4, r0 + + shll16 r6 + + mov.l r8, @-r15 + mov #99, r8 + shll2 r8 + shll r8 + + mov.l r9, @-r15 + mov r7, r9 + neg r9, r9 + + mov.l r10, @-r15 + + ds_lpstart: + cmp/gt r6, r9 + bt noDraw + nop + nop + ds_tex_lpstart: + mov #-1, r3 + mov r6, r4 + #tex read + mov.w @r2, r10 + extu.w r3, r3 + add r7, r6 + mov.w r10, @r0 + add r3, r4 + cmp/gt r4, r6 + add r8, r0 + bf ds_tex_lpstart + bt ds_tex_exit + nop + noDraw: + add r7, r6 + add r8, r0 + ds_tex_exit: + add r1, r2 + mov #64, r3 + add #1, r5 + cmp/ge r5, r3 + bf ds_lpstart + nop + + mov.l @r15+, r10 + mov.l @r15+, r9 + mov.l @r15+, r8 + rts + add #-8, r15 + +.align 4 +.gint_vram: + .long _gint_vram From 84b823f285a2027c254fc49f3c4645fa7e33cf84 Mon Sep 17 00:00:00 2001 From: attilavs2 Date: Sun, 1 Sep 2024 16:38:23 +0200 Subject: [PATCH 2/2] draw_stripe : Opti max (Pour du C) --- CMakeLists.txt | 2 +- src/main.c | 20 +++++++--- src/moteur.c | 47 +++++++++++++++++++----- src/moteur.h | 64 ++++++++++++++++++++------------ src/opti.S | 99 -------------------------------------------------- 5 files changed, 93 insertions(+), 139 deletions(-) delete mode 100644 src/opti.S diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ed3d4e..9284dfc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ set(SOURCES src/main.c src/moteur.c src/map.c - src/opti.S +# src/opti.S ) set(ASSETS diff --git a/src/main.c b/src/main.c index a347065..6783498 100644 --- a/src/main.c +++ b/src/main.c @@ -27,8 +27,6 @@ #error Ce code est pour FXCG50/G90+E uniquement, enlevez ce message a vos riques et périls #endif -#define debug 0 //pour afficher les infos de debug - image_t *tex_index[TINDEX_S]; extern char map_test[map_w][map_h]; @@ -91,8 +89,6 @@ int main(){ load_map(); - image_t *frame_buffer = image_create_vram(); - extern image_t *tex_index[TINDEX_S]; tex_index[1] = &buisson0; @@ -108,6 +104,10 @@ int main(){ prof_init(); + #if debug + EngineTimers timers; + #endif + while (!exit_game) { prof_t frame = prof_make(); prof_enter(frame); @@ -117,14 +117,18 @@ int main(){ dma_memset((void*)((uint32_t)gint_vram + viewport_w*viewport_h), 0xc4c9c4c9, viewport_w*viewport_h); - draw_walls(); - if(first_frame){ main_menu(); } keys_get(); + draw_walls( + #if debug + &timers + #endif + ); + if (disp_frame_time == 1) dprint( 1, 10, C_BLACK, "Frame time : %d ms", frame_time); #if debug @@ -134,6 +138,10 @@ int main(){ dprint( 1, 50, C_BLACK, "dirY : %d", dirY); dprint( 1, 60, C_BLACK, "posX : %d", posX); dprint( 1, 70, C_BLACK, "posY : %d", posY); + dprint( 1, 80, C_BLACK, "Raycast time : %d", prof_time(timers.raycast_time)); + dprint( 1, 90, C_BLACK, "Draw time : %d", prof_time(timers.draw_time)); + timers.raycast_time = prof_make(); + timers.draw_time = prof_make(); #endif dupdate(); diff --git a/src/moteur.c b/src/moteur.c index 384e38e..dce8a77 100644 --- a/src/moteur.c +++ b/src/moteur.c @@ -168,24 +168,40 @@ void load_map(){ } #if asm_opti -void draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x); #else -void inline draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x){ +inline void __attribute__((always_inline)) draw_stripe(image_t *tex, int texSampleY, + int linePos, fixed_t texSize, int texX, int x){ fixed_t screenPos = fix(linePos); uint32_t texDat = (uint32_t)tex->data + 2*texX; - for(int texPos = texSampleY; texPos < 64; texPos++){ - if(screenPos < -texSize) goto noDraw; - for(fixed_t oldPos = screenPos; oldPos < screenPos+texSize; oldPos+=0xFFFF){ - gint_vram[ffloor(oldPos)*396+x] = *(uint16_t*)texDat; + register int vramSize asm("r2") = 396; + register int oneConst asm("r12"); + asm("mov #-1, r12\n" + "extu.w r12, r12" + : "=r" (oneConst) + : "r" (oneConst) + ); + for(int texPos = texSampleY; texPos < 64; ++texPos){ + if(screenPos >= -texSize){ + int vrampos = ffloor(screenPos)*vramSize+x; + fixed_t oldPos = screenPos; + int16_t pix = *(int16_t*)texDat; + do{ + gint_vram[vrampos] = pix; + vrampos += vramSize; + oldPos += oneConst; + }while(oldPos < screenPos+texSize); } - noDraw: screenPos += texSize; texDat += tex->stride; } } #endif -void draw_walls(){ +void draw_walls( +#if debug + EngineTimers *timers +#endif +){ extern fixed_t posX; extern fixed_t posY; extern fixed_t dirX; @@ -222,6 +238,9 @@ void draw_walls(){ struct image_linear_map temp; for(x = 0; x < viewport_w; x++) { + #if debug + prof_enter(timers->raycast_time); + #endif //calculate ray position and direction cameraX = fdiv(fix(x*2), fix(viewport_w)) - 0xFFFF + h_offset; //x-coordinate in camera space @@ -245,6 +264,7 @@ void draw_walls(){ // stepping further below works. So the values can be computed as below. // Division through zero is prevented, even though technically that's not // needed in C++ with IEEE 754 floating point values. + //Fcalva : It is with fp32s ! rayDirX = rayDirX == 0 ? 1 : rayDirX; rayDirY = rayDirY == 0 ? 1 : rayDirY; @@ -281,7 +301,7 @@ void draw_walls(){ break; } //Otherwise check if ray has hit a wall - else if (map_test[mapX][mapY] > 0) { + if (map_test[mapX][mapY] > 0) { break; } //jump to next map square, either in x-direction, or in y-direction @@ -306,6 +326,11 @@ void draw_walls(){ if (side == 0) perpWallDist = (sideDistX - deltaDistX); else perpWallDist = (sideDistY - deltaDistY); + #if debug + prof_leave(timers->raycast_time); + prof_enter(timers->draw_time); + #endif + //texturing calculations //calculate value of wallX @@ -341,5 +366,9 @@ void draw_walls(){ image_t *tex = tex_index[map_test[mapX][mapY]]; draw_stripe(tex, texSampleY, linePos, texSize, texX, x); + + #if debug + prof_leave(timers->draw_time); + #endif } } diff --git a/src/moteur.h b/src/moteur.h index e9e847e..11b8303 100644 --- a/src/moteur.h +++ b/src/moteur.h @@ -1,24 +1,40 @@ -// Voir README.md pour license précise, par Fcalva 2023-2024 et est sous GPLv3 - -#ifndef moteur_h -#define moteur_h - -#include - -//param. graphiques -#define screen_w 396 -#define screen_h 224 -#define viewport_w 396 -#define viewport_h 224 -#define max_dist fix(32) //en tuiles << 16, actuellement 32 - -#define TINDEX_S 256 - -#define asm_opti 1 - -void load_map(); -void end_screen(); -void draw_walls(); -void move(); - -#endif /* moteur */ +// Voir README.md pour license précise, par Fcalva 2023-2024 et est sous GPLv3 + +#ifndef moteur_h +#define moteur_h + +#include "libprof.h" +#include + +//param. graphiques +#define screen_w 396 +#define screen_h 224 +#define viewport_w 396 +#define viewport_h 224 +#define max_dist fix(32) //en tuiles << 16, actuellement 32 + +#define TINDEX_S 256 + +#define debug 0 //pour afficher les infos de debug + +#define asm_opti 0 + +typedef struct { + + prof_t raycast_time; + prof_t draw_time; + +} EngineTimers; + +void draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x); + +void load_map(); +void end_screen(); +void draw_walls( +#if debug + EngineTimers *timers +#endif +); +void move(); + +#endif /* moteur */ diff --git a/src/opti.S b/src/opti.S deleted file mode 100644 index c2c18bb..0000000 --- a/src/opti.S +++ /dev/null @@ -1,99 +0,0 @@ -##void draw_stripe(image_t *tex, int texSampleY, int linePos, fixed_t texSize, int texX, int x){ -## fixed_t screenPos = fix(linePos); -## uint32_t texDat = (uint32_t)tex->data + 2*texX; -## for(int texPos = texSampleY; texPos < 64; texPos++){ -## if(screenPos < -texSize) goto noDraw; -## for(fixed_t oldPos = screenPos; oldPos < screenPos+texSize; oldPos+=0xFFFF){ -## gint_vram[ffloor(oldPos)*396+x] = *(uint16_t*)texDat; -## } -## noDraw: -## screenPos += texSize; -## texDat += tex->stride; -## } -##} - -# r0 : vram -# r1 : tex->stride -# r2 : texDat -# r3 : 0xFFFF / swapped with 64 -# r4 : tex -> x -> texX -> oldPos -# r5 : texSampleY -> texPos -# r6 : linePos -> screenPos -# r7 : texSize -# r8 : 396*2 -# r9 : -texSize -# r10 : px save -# @-4 : texX -> r8 (save) -# @-8 : x -> r9 (save) -# @-12 : r10 (save) - -.global _draw_stripe -.align 4 -_draw_stripe: - mov.l .gint_vram, r0 - - add #4, r4 - mov.l @r4, r1 - - add #4, r4 - mov.l @r4, r2 - mov.l @r15+, r4 - shll r4 - add r4, r2 - - mov.l @r15+, r4 - shll r4 - add r4, r0 - - shll16 r6 - - mov.l r8, @-r15 - mov #99, r8 - shll2 r8 - shll r8 - - mov.l r9, @-r15 - mov r7, r9 - neg r9, r9 - - mov.l r10, @-r15 - - ds_lpstart: - cmp/gt r6, r9 - bt noDraw - nop - nop - ds_tex_lpstart: - mov #-1, r3 - mov r6, r4 - #tex read - mov.w @r2, r10 - extu.w r3, r3 - add r7, r6 - mov.w r10, @r0 - add r3, r4 - cmp/gt r4, r6 - add r8, r0 - bf ds_tex_lpstart - bt ds_tex_exit - nop - noDraw: - add r7, r6 - add r8, r0 - ds_tex_exit: - add r1, r2 - mov #64, r3 - add #1, r5 - cmp/ge r5, r3 - bf ds_lpstart - nop - - mov.l @r15+, r10 - mov.l @r15+, r9 - mov.l @r15+, r8 - rts - add #-8, r15 - -.align 4 -.gint_vram: - .long _gint_vram