mirror of
https://git.planet-casio.com/Lephenixnoir/gint.git
synced 2025-01-01 06:23:35 +01:00
bopti: more performance improvement for SCSP cases
* Turn on GCC's -O3 for bopti files * Remove the bopti_render_noclip() step * Use rbox as early as possible to avoid moving memory around * A lot of local grinding
This commit is contained in:
parent
11dd04243f
commit
d887423bbb
5 changed files with 147 additions and 98 deletions
|
@ -2,21 +2,38 @@
|
||||||
#include "../render-fx/render-fx.h"
|
#include "../render-fx/render-fx.h"
|
||||||
#include "../render-fx/bopti-asm.h"
|
#include "../render-fx/bopti-asm.h"
|
||||||
|
|
||||||
|
#pragma GCC optimize("O3")
|
||||||
|
|
||||||
/* gsubimage(): Render a section of an image */
|
/* gsubimage(): Render a section of an image */
|
||||||
void gsubimage(int x, int y, bopti_image_t const *img, int left, int top,
|
void gsubimage(bopti_image_t const *img, struct rbox *r, GUNUSED int flags)
|
||||||
int width, int height, int flags)
|
|
||||||
{
|
{
|
||||||
uint32_t *light, *dark;
|
uint32_t *light, *dark;
|
||||||
dgray_getvram(&light, &dark);
|
dgray_getvram(&light, &dark);
|
||||||
|
|
||||||
if(flags & DIMAGE_NOCLIP)
|
/* Intersect the bounding box with both the source image and the VRAM,
|
||||||
|
except if DIMAGE_NOCLIP is provided */
|
||||||
|
if(!(flags & DIMAGE_NOCLIP))
|
||||||
{
|
{
|
||||||
bopti_render_noclip(x, y, img, left, top, width, height,
|
/* Early finish for empty intersections */
|
||||||
light, dark);
|
if(bopti_clip(img, r)) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int left = r->left;
|
||||||
|
int width = r->width;
|
||||||
|
int visual_x = r->visual_x;
|
||||||
|
|
||||||
|
r->left = left >> 5;
|
||||||
|
r->columns = ((left + width - 1) >> 5) - r->left + 1;
|
||||||
|
|
||||||
|
if(r->columns == 1 && (visual_x & 31) + width <= 32)
|
||||||
|
{
|
||||||
|
r->x = (left & 31) - (visual_x & 31);
|
||||||
|
bopti_render_scsp(img, r, light, dark);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bopti_render_clip(x, y, img, left, top, width, height,
|
/* x-coordinate of the first pixel of the first column */
|
||||||
light, dark);
|
r->x = visual_x - (left & 31);
|
||||||
|
bopti_render(img, r, light, dark);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ typedef void asm_gray_scsp_t(uint32_t *v1, uint32_t const *layer,
|
||||||
|
|
||||||
/* Type of any rendering function */
|
/* Type of any rendering function */
|
||||||
typedef union {
|
typedef union {
|
||||||
|
void *gen;
|
||||||
asm_mono_t *asm_mono;
|
asm_mono_t *asm_mono;
|
||||||
asm_gray_t *asm_gray;
|
asm_gray_t *asm_gray;
|
||||||
asm_mono_scsp_t *asm_mono_scsp;
|
asm_mono_scsp_t *asm_mono_scsp;
|
||||||
|
@ -84,4 +85,34 @@ extern asm_gray_scsp_t bopti_gasm_gray_scsp;
|
||||||
/* bpoti_asm_gray_alpha_scsp(): SCSP "gray_alpha" profile */
|
/* bpoti_asm_gray_alpha_scsp(): SCSP "gray_alpha" profile */
|
||||||
extern asm_gray_scsp_t bopti_gasm_gray_alpha_scsp;
|
extern asm_gray_scsp_t bopti_gasm_gray_alpha_scsp;
|
||||||
|
|
||||||
|
//---
|
||||||
|
// Renderer's data structures
|
||||||
|
//---
|
||||||
|
|
||||||
|
/* struct rbox: A rendering box (target coordinates and source rectangle)
|
||||||
|
Meaning of fields vary during the rendering process! */
|
||||||
|
struct rbox
|
||||||
|
{
|
||||||
|
/* General renderer:
|
||||||
|
On-screen location of the leftmost pixel of the leftmost rendered
|
||||||
|
column (this particular pixel might not be drawn but is of
|
||||||
|
importance in the positioning process)
|
||||||
|
SCSP renderer:
|
||||||
|
Shift value used to align columns with positions */
|
||||||
|
int x;
|
||||||
|
/* On-screen location of top-left corner; the (x,y) of dsubimage() */
|
||||||
|
int visual_x, y;
|
||||||
|
/* Width of rendered sub-image */
|
||||||
|
int width;
|
||||||
|
/* Before bopti_render{_scsp}():
|
||||||
|
Left-coordinate of the source box (included, in pixels)
|
||||||
|
In bopti_render{_scsp}():
|
||||||
|
Left-coordinate of the source box (included, in columns) */
|
||||||
|
int left;
|
||||||
|
/* Number of columns used in the source box */
|
||||||
|
int columns;
|
||||||
|
/* Vertical bounds of the box in the image (inc-excluded, in pixels) */
|
||||||
|
int top, height;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* GINT_RENDERFX_BOPTIASM */
|
#endif /* GINT_RENDERFX_BOPTIASM */
|
||||||
|
|
|
@ -3,22 +3,7 @@
|
||||||
#include "render-fx.h"
|
#include "render-fx.h"
|
||||||
#include "bopti-asm.h"
|
#include "bopti-asm.h"
|
||||||
|
|
||||||
/* struct rbox: A rendering box (target coordinates and source rectangle)
|
#pragma GCC optimize("O3")
|
||||||
Some of the data here is redundant, but makes things easier. */
|
|
||||||
struct rbox
|
|
||||||
{
|
|
||||||
/* Left pixel of the first column to be drawn, even if this column is
|
|
||||||
not drawn entirely */
|
|
||||||
int x;
|
|
||||||
/* On-screen location of top-left corner */
|
|
||||||
int visual_x, y;
|
|
||||||
/* Width of rendered sub-image */
|
|
||||||
int width;
|
|
||||||
/* Horizontal bounds of the box in the image (included, in columns) */
|
|
||||||
int left, right;
|
|
||||||
/* Vertical bounds of the box in the image (inc-excluded, in pixels) */
|
|
||||||
int top, height;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* struct command: A rendering command
|
/* struct command: A rendering command
|
||||||
Includes many computed parameters and handy information. Read-only. */
|
Includes many computed parameters and handy information. Read-only. */
|
||||||
|
@ -176,7 +161,7 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1,
|
||||||
masks(rbox->visual_x, rbox->visual_x + rbox->width - 1, vm);
|
masks(rbox->visual_x, rbox->visual_x + rbox->width - 1, vm);
|
||||||
|
|
||||||
/* Number of layers per profile */
|
/* Number of layers per profile */
|
||||||
int layer_count[] = { 1, 2, 2, 3 };
|
static const int layer_count[] = { 1, 2, 2, 3 };
|
||||||
int layers = layer_count[img->profile];
|
int layers = layer_count[img->profile];
|
||||||
|
|
||||||
/* For each pair of consecutive VRAM elements involved, create a mask
|
/* For each pair of consecutive VRAM elements involved, create a mask
|
||||||
|
@ -207,20 +192,17 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1,
|
||||||
const uint32_t *layer = (void *)img->data;
|
const uint32_t *layer = (void *)img->data;
|
||||||
layer += (rbox->top * img_columns + rbox->left) * layers;
|
layer += (rbox->top * img_columns + rbox->left) * layers;
|
||||||
|
|
||||||
/* Number of grid columns */
|
|
||||||
int columns = rbox->right - rbox->left + 1;
|
|
||||||
|
|
||||||
/* Compute and execute the command for this parameters */
|
/* Compute and execute the command for this parameters */
|
||||||
struct command c = {
|
struct command c = {
|
||||||
.x = rbox->x & 31,
|
.x = rbox->x & 31,
|
||||||
.v1 = v1,
|
.v1 = v1,
|
||||||
.v2 = v2 ? v2 : v1,
|
.v2 = v2 ? v2 : v1,
|
||||||
.offset = (rbox->y << 2) + (rbox->x >> 5),
|
.offset = (rbox->y << 2) + (rbox->x >> 5),
|
||||||
.columns = columns,
|
.columns = rbox->columns,
|
||||||
.masks = masks + 2 * left_origin,
|
.masks = masks + 2 * left_origin,
|
||||||
.real_start = (left_origin > 0),
|
.real_start = (left_origin > 0),
|
||||||
.vram_stride = 4 - columns,
|
.vram_stride = 4 - rbox->columns,
|
||||||
.data_stride = ((img_columns - columns) << 2) * layers,
|
.data_stride = ((img_columns - rbox->columns) << 2) * layers,
|
||||||
.gray = (v2 != NULL),
|
.gray = (v2 != NULL),
|
||||||
.f = f,
|
.f = f,
|
||||||
};
|
};
|
||||||
|
@ -231,61 +213,62 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1,
|
||||||
void bopti_render_scsp(bopti_image_t const *img, struct rbox *rbox,
|
void bopti_render_scsp(bopti_image_t const *img, struct rbox *rbox,
|
||||||
uint32_t *v1, uint32_t *v2)
|
uint32_t *v1, uint32_t *v2)
|
||||||
{
|
{
|
||||||
/* Rendering function */
|
/* Compute the only rendering mask */
|
||||||
bopti_asm_t f;
|
uint32_t mask =
|
||||||
if(v2) f.asm_gray_scsp = asm_gray_scsp[img->profile];
|
(0xffffffff << (32 - rbox->width)) >> (rbox->visual_x & 31);
|
||||||
else f.asm_mono_scsp = asm_mono_scsp[img->profile];
|
|
||||||
|
|
||||||
/* Compute the only rendering mask. Avoid UB if width = 32 */
|
|
||||||
uint32_t mask = 0xffffffff;
|
|
||||||
if(rbox->width < 32)
|
|
||||||
{
|
|
||||||
int right = 32 - ((rbox->visual_x & 31) + rbox->width);
|
|
||||||
mask = ((1 << rbox->width) - 1) << right;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Number of layers */
|
/* Number of layers */
|
||||||
int layer_count[] = { 1, 2, 2, 3 };
|
int layers = img->profile - (img->profile >> 1) + 1;
|
||||||
int layers = layer_count[img->profile];
|
|
||||||
|
|
||||||
/* Number of columns in [img] */
|
/* Number of longwords to skip between rows of [img] */
|
||||||
int img_columns = (img->width + 31) >> 5;
|
int img_stride = ((img->width + 31) >> 5) * layers;
|
||||||
|
|
||||||
/* Interwoven layer data. Skip left columns that are not rendered */
|
/* Interwoven layer data. Skip left columns that are not rendered */
|
||||||
const uint32_t *layer = (void *)img->data;
|
const uint32_t *layer = (void *)img->data;
|
||||||
layer += (rbox->top * img_columns + rbox->left) * layers;
|
layer += (rbox->top * img_stride) + (rbox->left * layers);
|
||||||
|
|
||||||
/* Starting value of VRAM pointers */
|
/* Starting value of VRAM pointers */
|
||||||
int offset = (rbox->y << 2) + (rbox->visual_x >> 5);
|
int offset = (rbox->y << 2) + (rbox->visual_x >> 5);
|
||||||
v1 += offset;
|
v1 += offset;
|
||||||
if(v2) v2 += offset;
|
|
||||||
|
|
||||||
/* Number of rows */
|
/* Number of rows */
|
||||||
int rows = rbox->height;
|
int rows = rbox->height;
|
||||||
/* Mask shift */
|
|
||||||
int shift = -(rbox->x & 31);
|
|
||||||
if(rbox->x < 0) shift += 32;
|
|
||||||
|
|
||||||
/* Render the grid immediately; mono version */
|
/* Render the grid immediately; mono version */
|
||||||
if(!v2) while(rows--)
|
if(!v2)
|
||||||
{
|
{
|
||||||
f.asm_mono_scsp(v1, layer, mask, shift);
|
asm_mono_scsp_t *f = asm_mono_scsp[img->profile];
|
||||||
layer += img_columns * layers;
|
while(rows--)
|
||||||
|
{
|
||||||
|
f(v1, layer, mask, rbox->x);
|
||||||
|
layer += img_stride;
|
||||||
v1 += 4;
|
v1 += 4;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/* Gray version */
|
/* Gray version */
|
||||||
else while(rows--)
|
else
|
||||||
{
|
{
|
||||||
f.asm_gray_scsp(v1, layer, mask, v2, shift);
|
asm_gray_scsp_t *f = asm_gray_scsp[img->profile];
|
||||||
layer += img_columns * layers;
|
v2 += offset;
|
||||||
|
|
||||||
|
while(rows--)
|
||||||
|
{
|
||||||
|
f(v1, layer, mask, v2, rbox->x);
|
||||||
|
layer += img_stride;
|
||||||
v1 += 4;
|
v1 += 4;
|
||||||
v2 += 4;
|
v2 += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
int bopti_clip(bopti_image_t const *img, struct rbox *r)
|
||||||
int top, int width, int height, uint32_t *v1, uint32_t *v2)
|
|
||||||
{
|
{
|
||||||
|
/* This load/save is not elegant but it makes GCC use register-only
|
||||||
|
operations, which is what we need for efficiency */
|
||||||
|
int x = r->visual_x, y = r->y;
|
||||||
|
int left = r->left, top = r->top;
|
||||||
|
int width = r->width, height = r->height;
|
||||||
|
|
||||||
/* Adjust the bounding box of the input image */
|
/* Adjust the bounding box of the input image */
|
||||||
if(left < 0) width += left, x -= left, left = 0;
|
if(left < 0) width += left, x -= left, left = 0;
|
||||||
if(top < 0) height += top, y -= top, top = 0;
|
if(top < 0) height += top, y -= top, top = 0;
|
||||||
|
@ -298,32 +281,34 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
||||||
if(x + width > DWIDTH) width = DWIDTH - x;
|
if(x + width > DWIDTH) width = DWIDTH - x;
|
||||||
if(y + height > DHEIGHT) height = DHEIGHT - y;
|
if(y + height > DHEIGHT) height = DHEIGHT - y;
|
||||||
|
|
||||||
/* Early finish for empty intersections */
|
r->visual_x = x;
|
||||||
if(width <= 0 || height <= 0) return;
|
r->y = y;
|
||||||
|
r->left = left;
|
||||||
|
r->top = top;
|
||||||
|
r->width = width;
|
||||||
|
r->height = height;
|
||||||
|
|
||||||
/* Finish with the noclip variant */
|
/* Return non-zero if the result is empty */
|
||||||
bopti_render_noclip(x, y, img, left, top, width, height, v1, v2);
|
return (width <= 0 || height <= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bopti_render_noclip(int visual_x, int y, bopti_image_t const *img,
|
void bopti_render_noclip(bopti_image_t const *img, struct rbox *r,
|
||||||
int left, int top, int width, int height, uint32_t *v1, uint32_t *v2)
|
uint32_t *v1, uint32_t *v2)
|
||||||
{
|
{
|
||||||
|
int left = r->left;
|
||||||
|
|
||||||
/* Start column and end column (both included) */
|
/* Start column and end column (both included) */
|
||||||
int cl = (left) >> 5;
|
r->left >>= 5;
|
||||||
int cr = (left + width - 1) >> 5;
|
|
||||||
|
|
||||||
/* Finish with the standard bopti renderer */
|
if(r->columns == 1 && (r->visual_x & 31) + r->width <= 32)
|
||||||
struct rbox rbox = { 0, visual_x, y, width, cl, cr, top, height };
|
|
||||||
|
|
||||||
if(cl == cr && (visual_x & 31) + width <= 32)
|
|
||||||
{
|
{
|
||||||
rbox.x = (visual_x & 31) - (left & 31);
|
r->x = (left & 31) - (r->visual_x & 31);
|
||||||
bopti_render_scsp(img, &rbox, v1, v2);
|
bopti_render_scsp(img, r, v1, v2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* x-coordinate of the first pixel of the first column */
|
/* x-coordinate of the first pixel of the first column */
|
||||||
rbox.x = visual_x - (left & 31);
|
r->x = r->visual_x - (left & 31);
|
||||||
bopti_render(img, &rbox, v1, v2);
|
bopti_render(img, r, v1, v2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,22 +2,43 @@
|
||||||
#include "render-fx.h"
|
#include "render-fx.h"
|
||||||
#include "bopti-asm.h"
|
#include "bopti-asm.h"
|
||||||
|
|
||||||
|
#pragma GCC optimize("O3")
|
||||||
|
|
||||||
/* dsubimage(): Render a section of an image */
|
/* dsubimage(): Render a section of an image */
|
||||||
void dsubimage(int x, int y, bopti_image_t const *img, int left, int top,
|
void dsubimage(int x, int y, bopti_image_t const *img, int left, int top,
|
||||||
int width, int height, int flags)
|
int width, int height, int flags)
|
||||||
{
|
{
|
||||||
DMODE_OVERRIDE(dsubimage, x, y, img, left, top, width, height, flags);
|
struct rbox r = {
|
||||||
|
0, x, y, width, left, 0, top, height
|
||||||
|
};
|
||||||
|
|
||||||
|
DMODE_OVERRIDE(dsubimage, img, &r, flags);
|
||||||
if(img->gray) return;
|
if(img->gray) return;
|
||||||
|
|
||||||
if(flags & DIMAGE_NOCLIP)
|
/* Intersect the bounding box with both the source image and the VRAM,
|
||||||
|
except if DIMAGE_NOCLIP is provided */
|
||||||
|
if(!(flags & DIMAGE_NOCLIP))
|
||||||
{
|
{
|
||||||
bopti_render_noclip(x, y, img, left, top, width, height,
|
/* Early finish for empty intersections */
|
||||||
gint_vram, NULL);
|
if(bopti_clip(img, &r)) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
left = r.left;
|
||||||
|
width = r.width;
|
||||||
|
int visual_x = r.visual_x;
|
||||||
|
|
||||||
|
r.left = left >> 5;
|
||||||
|
r.columns = ((left + width - 1) >> 5) - r.left + 1;
|
||||||
|
|
||||||
|
if(r.columns == 1 && (visual_x & 31) + width <= 32)
|
||||||
|
{
|
||||||
|
r.x = (left & 31) - (visual_x & 31);
|
||||||
|
bopti_render_scsp(img, &r, gint_vram, NULL);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bopti_render_clip(x, y, img, left, top, width, height,
|
/* x-coordinate of the first pixel of the first column */
|
||||||
gint_vram, NULL);
|
r.x = visual_x - (left & 31);
|
||||||
|
bopti_render(img, &r, gint_vram, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
#include <gint/defs/types.h>
|
#include <gint/defs/types.h>
|
||||||
#include <gint/display.h>
|
#include <gint/display.h>
|
||||||
|
#include "bopti-asm.h"
|
||||||
|
|
||||||
/* masks(): Compute the vram masks for a given rectangle
|
/* masks(): Compute the vram masks for a given rectangle
|
||||||
|
|
||||||
|
@ -24,13 +25,10 @@
|
||||||
@masks Stores the result of the function (four uint32_t values) */
|
@masks Stores the result of the function (four uint32_t values) */
|
||||||
void masks(int x1, int x2, uint32_t *masks);
|
void masks(int x1, int x2, uint32_t *masks);
|
||||||
|
|
||||||
/* bopti_render_clip(): Render a bopti image with clipping
|
/* bopti_clip(): Clip a bounding box to image and VRAM
|
||||||
@x @y Location of the top-left corner
|
|
||||||
@img Image encoded by [fxconv]
|
@img Image encoded by [fxconv]
|
||||||
@left @top @w @h Bounding box to render
|
@rbox Rendering box */
|
||||||
@v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */
|
int bopti_clip(bopti_image_t const *img, struct rbox *rbox);
|
||||||
void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
|
||||||
int top, int w, int h, uint32_t *v1, uint32_t *v2);
|
|
||||||
|
|
||||||
/* bopti_render_noclip(): Render a bopti image without clipping
|
/* bopti_render_noclip(): Render a bopti image without clipping
|
||||||
This function is only ever slightly faster than bopti_render_clip(),
|
This function is only ever slightly faster than bopti_render_clip(),
|
||||||
|
@ -42,8 +40,8 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left,
|
||||||
@img Image encoded by [fxconv]
|
@img Image encoded by [fxconv]
|
||||||
@left @top @w @h Bounding box to render
|
@left @top @w @h Bounding box to render
|
||||||
@v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */
|
@v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */
|
||||||
void bopti_render_noclip(int x, int y, bopti_image_t const *img, int left,
|
void bopti_render_noclip(bopti_image_t const *img, struct rbox *rbox,
|
||||||
int top, int w, int h, uint32_t *v1, uint32_t *v2);
|
uint32_t *v1, uint32_t *v2);
|
||||||
|
|
||||||
//---
|
//---
|
||||||
// Alternate rendering modes
|
// Alternate rendering modes
|
||||||
|
@ -72,8 +70,7 @@ struct rendering_mode
|
||||||
(int x, int y, int fg, int bg, int halign, int valign,
|
(int x, int y, int fg, int bg, int halign, int valign,
|
||||||
char const *str);
|
char const *str);
|
||||||
void (*dsubimage)
|
void (*dsubimage)
|
||||||
(int x, int y, bopti_image_t const *image, int left, int top,
|
(bopti_image_t const *image, struct rbox *r, int flags);
|
||||||
int width, int height, int flags);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The alternate rendering mode pointer (initially NULL)*/
|
/* The alternate rendering mode pointer (initially NULL)*/
|
||||||
|
@ -89,9 +86,7 @@ void gint_gvline(int y1, int y2, int x, int color);
|
||||||
void gtext_opt
|
void gtext_opt
|
||||||
(int x, int y, int fg, int bg, int halign, int valign,
|
(int x, int y, int fg, int bg, int halign, int valign,
|
||||||
char const *str);
|
char const *str);
|
||||||
void gsubimage
|
void gsubimage(bopti_image_t const *image, struct rbox *r, int flags);
|
||||||
(int x, int y, bopti_image_t const *image, int left, int top,
|
|
||||||
int width, int height, int flags);
|
|
||||||
|
|
||||||
/* Short macro to call the alternate rendering function when available */
|
/* Short macro to call the alternate rendering function when available */
|
||||||
#define DMODE_OVERRIDE(func, ...) \
|
#define DMODE_OVERRIDE(func, ...) \
|
||||||
|
|
Loading…
Reference in a new issue