mirror of
https://git.planet-casio.com/Lephenixnoir/gint.git
synced 2024-12-28 20:43:36 +01:00
bopti: more performance improvement for SCSP cases
* Turn on GCC's -O3 for bopti files * Remove the bopti_render_noclip() step * Use rbox as early as possible to avoid moving memory around * A lot of local grinding
This commit is contained in:
parent
11dd04243f
commit
d887423bbb
5 changed files with 147 additions and 98 deletions
|
@ -2,21 +2,38 @@
|
|||
#include "../render-fx/render-fx.h"
|
||||
#include "../render-fx/bopti-asm.h"
|
||||
|
||||
#pragma GCC optimize("O3")
|
||||
|
||||
/* gsubimage(): Render a section of an image */
|
||||
void gsubimage(int x, int y, bopti_image_t const *img, int left, int top,
|
||||
int width, int height, int flags)
|
||||
void gsubimage(bopti_image_t const *img, struct rbox *r, GUNUSED int flags)
|
||||
{
|
||||
uint32_t *light, *dark;
|
||||
dgray_getvram(&light, &dark);
|
||||
|
||||
if(flags & DIMAGE_NOCLIP)
|
||||
/* Intersect the bounding box with both the source image and the VRAM,
|
||||
except if DIMAGE_NOCLIP is provided */
|
||||
if(!(flags & DIMAGE_NOCLIP))
|
||||
{
|
||||
bopti_render_noclip(x, y, img, left, top, width, height,
|
||||
light, dark);
|
||||
/* Early finish for empty intersections */
|
||||
if(bopti_clip(img, r)) return;
|
||||
}
|
||||
|
||||
int left = r->left;
|
||||
int width = r->width;
|
||||
int visual_x = r->visual_x;
|
||||
|
||||
r->left = left >> 5;
|
||||
r->columns = ((left + width - 1) >> 5) - r->left + 1;
|
||||
|
||||
if(r->columns == 1 && (visual_x & 31) + width <= 32)
|
||||
{
|
||||
r->x = (left & 31) - (visual_x & 31);
|
||||
bopti_render_scsp(img, r, light, dark);
|
||||
}
|
||||
else
|
||||
{
|
||||
bopti_render_clip(x, y, img, left, top, width, height,
|
||||
light, dark);
|
||||
/* x-coordinate of the first pixel of the first column */
|
||||
r->x = visual_x - (left & 31);
|
||||
bopti_render(img, r, light, dark);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ typedef void asm_gray_scsp_t(uint32_t *v1, uint32_t const *layer,
|
|||
|
||||
/* Type of any rendering function */
|
||||
typedef union {
|
||||
void *gen;
|
||||
asm_mono_t *asm_mono;
|
||||
asm_gray_t *asm_gray;
|
||||
asm_mono_scsp_t *asm_mono_scsp;
|
||||
|
@ -84,4 +85,34 @@ extern asm_gray_scsp_t bopti_gasm_gray_scsp;
|
|||
/* bpoti_asm_gray_alpha_scsp(): SCSP "gray_alpha" profile */
|
||||
extern asm_gray_scsp_t bopti_gasm_gray_alpha_scsp;
|
||||
|
||||
//---
|
||||
// Renderer's data structures
|
||||
//---
|
||||
|
||||
/* struct rbox: A rendering box (target coordinates and source rectangle)
|
||||
Meaning of fields vary during the rendering process! */
|
||||
struct rbox
|
||||
{
|
||||
/* General renderer:
|
||||
On-screen location of the leftmost pixel of the leftmost rendered
|
||||
column (this particular pixel might not be drawn but is of
|
||||
importance in the positioning process)
|
||||
SCSP renderer:
|
||||
Shift value used to align columns with positions */
|
||||
int x;
|
||||
/* On-screen location of top-left corner; the (x,y) of dsubimage() */
|
||||
int visual_x, y;
|
||||
/* Width of rendered sub-image */
|
||||
int width;
|
||||
/* Before bopti_render{_scsp}():
|
||||
Left-coordinate of the source box (included, in pixels)
|
||||
In bopti_render{_scsp}():
|
||||
Left-coordinate of the source box (included, in columns) */
|
||||
int left;
|
||||
/* Number of columns used in the source box */
|
||||
int columns;
|
||||
/* Vertical bounds of the box in the image (inc-excluded, in pixels) */
|
||||
int top, height;
|
||||
};
|
||||
|
||||
#endif /* GINT_RENDERFX_BOPTIASM */
|
||||
|
|
|
@ -3,22 +3,7 @@
|
|||
#include "render-fx.h"
|
||||
#include "bopti-asm.h"
|
||||
|
||||
/* struct rbox: A rendering box (target coordinates and source rectangle)
|
||||
Some of the data here is redundant, but makes things easier. */
|
||||
struct rbox
|
||||
{
|
||||
/* Left pixel of the first column to be drawn, even if this column is
|
||||
not drawn entirely */
|
||||
int x;
|
||||
/* On-screen location of top-left corner */
|
||||
int visual_x, y;
|
||||
/* Width of rendered sub-image */
|
||||
int width;
|
||||
/* Horizontal bounds of the box in the image (included, in columns) */
|
||||
int left, right;
|
||||
/* Vertical bounds of the box in the image (inc-excluded, in pixels) */
|
||||
int top, height;
|
||||
};
|
||||
#pragma GCC optimize("O3")
|
||||
|
||||
/* struct command: A rendering command
|
||||
Includes many computed parameters and handy information. Read-only. */
|
||||
|
@ -176,7 +161,7 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1,
|
|||
masks(rbox->visual_x, rbox->visual_x + rbox->width - 1, vm);
|
||||
|
||||
/* Number of layers per profile */
|
||||
int layer_count[] = { 1, 2, 2, 3 };
|
||||
static const int layer_count[] = { 1, 2, 2, 3 };
|
||||
int layers = layer_count[img->profile];
|
||||
|
||||
/* For each pair of consecutive VRAM elements involved, create a mask
|
||||
|
@ -207,20 +192,17 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1,
|
|||
const uint32_t *layer = (void *)img->data;
|
||||
layer += (rbox->top * img_columns + rbox->left) * layers;
|
||||
|
||||
/* Number of grid columns */
|
||||
int columns = rbox->right - rbox->left + 1;
|
||||
|
||||
/* Compute and execute the command for this parameters */
|
||||
struct command c = {
|
||||
.x = rbox->x & 31,
|
||||
.v1 = v1,
|
||||
.v2 = v2 ? v2 : v1,
|
||||
.offset = (rbox->y << 2) + (rbox->x >> 5),
|
||||
.columns = columns,
|
||||
.columns = rbox->columns,
|
||||
.masks = masks + 2 * left_origin,
|
||||
.real_start = (left_origin > 0),
|
||||
.vram_stride = 4 - columns,
|
||||
.data_stride = ((img_columns - columns) << 2) * layers,
|
||||
.vram_stride = 4 - rbox->columns,
|
||||
.data_stride = ((img_columns - rbox->columns) << 2) * layers,
|
||||
.gray = (v2 != NULL),
|
||||
.f = f,
|
||||
};
|
||||
|
@ -231,61 +213,62 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1,
|
|||
void bopti_render_scsp(bopti_image_t const *img, struct rbox *rbox,
|
||||
uint32_t *v1, uint32_t *v2)
|
||||
{
|
||||
/* Rendering function */
|
||||
bopti_asm_t f;
|
||||
if(v2) f.asm_gray_scsp = asm_gray_scsp[img->profile];
|
||||
else f.asm_mono_scsp = asm_mono_scsp[img->profile];
|
||||
|
||||
/* Compute the only rendering mask. Avoid UB if width = 32 */
|
||||
uint32_t mask = 0xffffffff;
|
||||
if(rbox->width < 32)
|
||||
{
|
||||
int right = 32 - ((rbox->visual_x & 31) + rbox->width);
|
||||
mask = ((1 << rbox->width) - 1) << right;
|
||||
}
|
||||
/* Compute the only rendering mask */
|
||||
uint32_t mask =
|
||||
(0xffffffff << (32 - rbox->width)) >> (rbox->visual_x & 31);
|
||||
|
||||
/* Number of layers */
|
||||
int layer_count[] = { 1, 2, 2, 3 };
|
||||
int layers = layer_count[img->profile];
|
||||
int layers = img->profile - (img->profile >> 1) + 1;
|
||||
|
||||
/* Number of columns in [img] */
|
||||
int img_columns = (img->width + 31) >> 5;
|
||||
/* Number of longwords to skip between rows of [img] */
|
||||
int img_stride = ((img->width + 31) >> 5) * layers;
|
||||
|
||||
/* Interwoven layer data. Skip left columns that are not rendered */
|
||||
const uint32_t *layer = (void *)img->data;
|
||||
layer += (rbox->top * img_columns + rbox->left) * layers;
|
||||
layer += (rbox->top * img_stride) + (rbox->left * layers);
|
||||
|
||||
/* Starting value of VRAM pointers */
|
||||
int offset = (rbox->y << 2) + (rbox->visual_x >> 5);
|
||||
v1 += offset;
|
||||
if(v2) v2 += offset;
|
||||
|
||||
/* Number of rows */
|
||||
int rows = rbox->height;
|
||||
/* Mask shift */
|
||||
int shift = -(rbox->x & 31);
|
||||
if(rbox->x < 0) shift += 32;
|
||||
|
||||
/* Render the grid immediately; mono version */
|
||||
if(!v2) while(rows--)
|
||||
if(!v2)
|
||||
{
|
||||
f.asm_mono_scsp(v1, layer, mask, shift);
|
||||
layer += img_columns * layers;
|
||||
v1 += 4;
|
||||
asm_mono_scsp_t *f = asm_mono_scsp[img->profile];
|
||||
while(rows--)
|
||||
{
|
||||
f(v1, layer, mask, rbox->x);
|
||||
layer += img_stride;
|
||||
v1 += 4;
|
||||
}
|
||||
}
|
||||
/* Gray version */
|
||||
else while(rows--)
|
||||
else
|
||||
{
|
||||
f.asm_gray_scsp(v1, layer, mask, v2, shift);
|
||||
layer += img_columns * layers;
|
||||
v1 += 4;
|
||||
v2 += 4;
|
||||
asm_gray_scsp_t *f = asm_gray_scsp[img->profile];
|
||||
v2 += offset;
|
||||
|
||||
while(rows--)
|
||||
{
|
||||
f(v1, layer, mask, v2, rbox->x);
|
||||
layer += img_stride;
|
||||
v1 += 4;
|
||||
v2 += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
||||
int top, int width, int height, uint32_t *v1, uint32_t *v2)
|
||||
int bopti_clip(bopti_image_t const *img, struct rbox *r)
|
||||
{
|
||||
/* This load/save is not elegant but it makes GCC use register-only
|
||||
operations, which is what we need for efficiency */
|
||||
int x = r->visual_x, y = r->y;
|
||||
int left = r->left, top = r->top;
|
||||
int width = r->width, height = r->height;
|
||||
|
||||
/* Adjust the bounding box of the input image */
|
||||
if(left < 0) width += left, x -= left, left = 0;
|
||||
if(top < 0) height += top, y -= top, top = 0;
|
||||
|
@ -298,32 +281,34 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
|||
if(x + width > DWIDTH) width = DWIDTH - x;
|
||||
if(y + height > DHEIGHT) height = DHEIGHT - y;
|
||||
|
||||
/* Early finish for empty intersections */
|
||||
if(width <= 0 || height <= 0) return;
|
||||
r->visual_x = x;
|
||||
r->y = y;
|
||||
r->left = left;
|
||||
r->top = top;
|
||||
r->width = width;
|
||||
r->height = height;
|
||||
|
||||
/* Finish with the noclip variant */
|
||||
bopti_render_noclip(x, y, img, left, top, width, height, v1, v2);
|
||||
/* Return non-zero if the result is empty */
|
||||
return (width <= 0 || height <= 0);
|
||||
}
|
||||
|
||||
void bopti_render_noclip(int visual_x, int y, bopti_image_t const *img,
|
||||
int left, int top, int width, int height, uint32_t *v1, uint32_t *v2)
|
||||
void bopti_render_noclip(bopti_image_t const *img, struct rbox *r,
|
||||
uint32_t *v1, uint32_t *v2)
|
||||
{
|
||||
int left = r->left;
|
||||
|
||||
/* Start column and end column (both included) */
|
||||
int cl = (left) >> 5;
|
||||
int cr = (left + width - 1) >> 5;
|
||||
r->left >>= 5;
|
||||
|
||||
/* Finish with the standard bopti renderer */
|
||||
struct rbox rbox = { 0, visual_x, y, width, cl, cr, top, height };
|
||||
|
||||
if(cl == cr && (visual_x & 31) + width <= 32)
|
||||
if(r->columns == 1 && (r->visual_x & 31) + r->width <= 32)
|
||||
{
|
||||
rbox.x = (visual_x & 31) - (left & 31);
|
||||
bopti_render_scsp(img, &rbox, v1, v2);
|
||||
r->x = (left & 31) - (r->visual_x & 31);
|
||||
bopti_render_scsp(img, r, v1, v2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* x-coordinate of the first pixel of the first column */
|
||||
rbox.x = visual_x - (left & 31);
|
||||
bopti_render(img, &rbox, v1, v2);
|
||||
r->x = r->visual_x - (left & 31);
|
||||
bopti_render(img, r, v1, v2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,22 +2,43 @@
|
|||
#include "render-fx.h"
|
||||
#include "bopti-asm.h"
|
||||
|
||||
#pragma GCC optimize("O3")
|
||||
|
||||
/* dsubimage(): Render a section of an image */
|
||||
void dsubimage(int x, int y, bopti_image_t const *img, int left, int top,
|
||||
int width, int height, int flags)
|
||||
{
|
||||
DMODE_OVERRIDE(dsubimage, x, y, img, left, top, width, height, flags);
|
||||
struct rbox r = {
|
||||
0, x, y, width, left, 0, top, height
|
||||
};
|
||||
|
||||
DMODE_OVERRIDE(dsubimage, img, &r, flags);
|
||||
if(img->gray) return;
|
||||
|
||||
if(flags & DIMAGE_NOCLIP)
|
||||
/* Intersect the bounding box with both the source image and the VRAM,
|
||||
except if DIMAGE_NOCLIP is provided */
|
||||
if(!(flags & DIMAGE_NOCLIP))
|
||||
{
|
||||
bopti_render_noclip(x, y, img, left, top, width, height,
|
||||
gint_vram, NULL);
|
||||
/* Early finish for empty intersections */
|
||||
if(bopti_clip(img, &r)) return;
|
||||
}
|
||||
|
||||
left = r.left;
|
||||
width = r.width;
|
||||
int visual_x = r.visual_x;
|
||||
|
||||
r.left = left >> 5;
|
||||
r.columns = ((left + width - 1) >> 5) - r.left + 1;
|
||||
|
||||
if(r.columns == 1 && (visual_x & 31) + width <= 32)
|
||||
{
|
||||
r.x = (left & 31) - (visual_x & 31);
|
||||
bopti_render_scsp(img, &r, gint_vram, NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
bopti_render_clip(x, y, img, left, top, width, height,
|
||||
gint_vram, NULL);
|
||||
/* x-coordinate of the first pixel of the first column */
|
||||
r.x = visual_x - (left & 31);
|
||||
bopti_render(img, &r, gint_vram, NULL);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <gint/defs/types.h>
|
||||
#include <gint/display.h>
|
||||
#include "bopti-asm.h"
|
||||
|
||||
/* masks(): Compute the vram masks for a given rectangle
|
||||
|
||||
|
@ -24,13 +25,10 @@
|
|||
@masks Stores the result of the function (four uint32_t values) */
|
||||
void masks(int x1, int x2, uint32_t *masks);
|
||||
|
||||
/* bopti_render_clip(): Render a bopti image with clipping
|
||||
@x @y Location of the top-left corner
|
||||
@img Image encoded by [fxconv]
|
||||
@left @top @w @h Bounding box to render
|
||||
@v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */
|
||||
void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
||||
int top, int w, int h, uint32_t *v1, uint32_t *v2);
|
||||
/* bopti_clip(): Clip a bounding box to image and VRAM
|
||||
@img Image encoded by [fxconv]
|
||||
@rbox Rendering box */
|
||||
int bopti_clip(bopti_image_t const *img, struct rbox *rbox);
|
||||
|
||||
/* bopti_render_noclip(): Render a bopti image without clipping
|
||||
This function is only ever slightly faster than bopti_render_clip(),
|
||||
|
@ -42,8 +40,8 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
|||
@img Image encoded by [fxconv]
|
||||
@left @top @w @h Bounding box to render
|
||||
@v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */
|
||||
void bopti_render_noclip(int x, int y, bopti_image_t const *img, int left,
|
||||
int top, int w, int h, uint32_t *v1, uint32_t *v2);
|
||||
void bopti_render_noclip(bopti_image_t const *img, struct rbox *rbox,
|
||||
uint32_t *v1, uint32_t *v2);
|
||||
|
||||
//---
|
||||
// Alternate rendering modes
|
||||
|
@ -72,8 +70,7 @@ struct rendering_mode
|
|||
(int x, int y, int fg, int bg, int halign, int valign,
|
||||
char const *str);
|
||||
void (*dsubimage)
|
||||
(int x, int y, bopti_image_t const *image, int left, int top,
|
||||
int width, int height, int flags);
|
||||
(bopti_image_t const *image, struct rbox *r, int flags);
|
||||
};
|
||||
|
||||
/* The alternate rendering mode pointer (initially NULL)*/
|
||||
|
@ -89,9 +86,7 @@ void gint_gvline(int y1, int y2, int x, int color);
|
|||
void gtext_opt
|
||||
(int x, int y, int fg, int bg, int halign, int valign,
|
||||
char const *str);
|
||||
void gsubimage
|
||||
(int x, int y, bopti_image_t const *image, int left, int top,
|
||||
int width, int height, int flags);
|
||||
void gsubimage(bopti_image_t const *image, struct rbox *r, int flags);
|
||||
|
||||
/* Short macro to call the alternate rendering function when available */
|
||||
#define DMODE_OVERRIDE(func, ...) \
|
||||
|
|
Loading…
Reference in a new issue