mirror of
https://github.com/Ninjdai1/pokeemerald.git
synced 2025-01-14 07:33:44 +01:00
Optimize sprite.c (#3175)
This commit is contained in:
commit
7d8bcce9a5
218
gflib/sprite.c
218
gflib/sprite.c
@ -48,11 +48,7 @@ struct OamDimensions
|
|||||||
s8 height;
|
s8 height;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void UpdateOamCoords(void);
|
static void SortSprites(u32 *spritePriorities, s32 n);
|
||||||
static void BuildSpritePriorities(void);
|
|
||||||
static void SortSprites(void);
|
|
||||||
static void CopyMatricesToOamBuffer(void);
|
|
||||||
static void AddSpritesToOamBuffer(void);
|
|
||||||
static u8 CreateSpriteAt(u8 index, const struct SpriteTemplate *template, s16 x, s16 y, u8 subpriority);
|
static u8 CreateSpriteAt(u8 index, const struct SpriteTemplate *template, s16 x, s16 y, u8 subpriority);
|
||||||
static void ResetOamMatrices(void);
|
static void ResetOamMatrices(void);
|
||||||
static void ResetSprite(struct Sprite *sprite);
|
static void ResetSprite(struct Sprite *sprite);
|
||||||
@ -280,12 +276,12 @@ u32 gOamMatrixAllocBitmap;
|
|||||||
u8 gReservedSpritePaletteCount;
|
u8 gReservedSpritePaletteCount;
|
||||||
|
|
||||||
EWRAM_DATA struct Sprite gSprites[MAX_SPRITES + 1] = {0};
|
EWRAM_DATA struct Sprite gSprites[MAX_SPRITES + 1] = {0};
|
||||||
EWRAM_DATA static u16 sSpritePriorities[MAX_SPRITES] = {0};
|
|
||||||
EWRAM_DATA static u8 sSpriteOrder[MAX_SPRITES] = {0};
|
EWRAM_DATA static u8 sSpriteOrder[MAX_SPRITES] = {0};
|
||||||
EWRAM_DATA static bool8 sShouldProcessSpriteCopyRequests = 0;
|
EWRAM_DATA static bool8 sShouldProcessSpriteCopyRequests = 0;
|
||||||
EWRAM_DATA static u8 sSpriteCopyRequestCount = 0;
|
EWRAM_DATA static u8 sSpriteCopyRequestCount = 0;
|
||||||
EWRAM_DATA static struct SpriteCopyRequest sSpriteCopyRequests[MAX_SPRITES] = {0};
|
EWRAM_DATA static struct SpriteCopyRequest sSpriteCopyRequests[MAX_SPRITES] = {0};
|
||||||
EWRAM_DATA u8 gOamLimit = 0;
|
EWRAM_DATA u8 gOamLimit = 0;
|
||||||
|
static EWRAM_DATA u8 gOamDummyIndex = 0;
|
||||||
EWRAM_DATA u16 gReservedSpriteTileCount = 0;
|
EWRAM_DATA u16 gReservedSpriteTileCount = 0;
|
||||||
EWRAM_DATA static u8 sSpriteTileAllocBitmap[128] = {0};
|
EWRAM_DATA static u8 sSpriteTileAllocBitmap[128] = {0};
|
||||||
EWRAM_DATA s16 gSpriteCoordOffsetX = 0;
|
EWRAM_DATA s16 gSpriteCoordOffsetX = 0;
|
||||||
@ -296,6 +292,7 @@ EWRAM_DATA bool8 gAffineAnimsDisabled = FALSE;
|
|||||||
void ResetSpriteData(void)
|
void ResetSpriteData(void)
|
||||||
{
|
{
|
||||||
ResetOamRange(0, 128);
|
ResetOamRange(0, 128);
|
||||||
|
gOamDummyIndex = 0;
|
||||||
ResetAllSprites();
|
ResetAllSprites();
|
||||||
ClearSpriteCopyRequests();
|
ClearSpriteCopyRequests();
|
||||||
ResetAffineAnimData();
|
ResetAffineAnimData();
|
||||||
@ -326,26 +323,37 @@ void AnimateSprites(void)
|
|||||||
|
|
||||||
void BuildOamBuffer(void)
|
void BuildOamBuffer(void)
|
||||||
{
|
{
|
||||||
u8 temp;
|
bool32 oamLoadDisabled;
|
||||||
UpdateOamCoords();
|
u32 i, stride;
|
||||||
BuildSpritePriorities();
|
u8 oamIndex;
|
||||||
SortSprites();
|
|
||||||
temp = gMain.oamLoadDisabled;
|
// All attributes which affect sorting packed into a single u32:
|
||||||
gMain.oamLoadDisabled = TRUE;
|
// { priority:2, subpriority:8, y:9, :5, index:8 }.
|
||||||
AddSpritesToOamBuffer();
|
// Index has its own byte even though it only needs 6 bits so that
|
||||||
CopyMatricesToOamBuffer();
|
// we can load it with a ldrb instead of having to mask out the
|
||||||
gMain.oamLoadDisabled = temp;
|
// bottom 6 bits.
|
||||||
sShouldProcessSpriteCopyRequests = TRUE;
|
u32 spritePriorities[MAX_SPRITES];
|
||||||
}
|
s32 toSort = 0;
|
||||||
|
u8 skippedSprites[MAX_SPRITES];
|
||||||
|
u32 skippedSpritesN = 0;
|
||||||
|
u32 matrices = 0;
|
||||||
|
|
||||||
void UpdateOamCoords(void)
|
|
||||||
{
|
|
||||||
u8 i;
|
|
||||||
for (i = 0; i < MAX_SPRITES; i++)
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
{
|
{
|
||||||
struct Sprite *sprite = &gSprites[i];
|
// Reuse existing sSpriteOrder because we expect the order to be
|
||||||
if (sprite->inUse && !sprite->invisible)
|
// relatively stable between frames.
|
||||||
|
u32 index = sSpriteOrder[i];
|
||||||
|
struct Sprite *sprite = &gSprites[index];
|
||||||
|
s32 y;
|
||||||
|
if (!sprite->inUse || sprite->invisible)
|
||||||
{
|
{
|
||||||
|
skippedSprites[skippedSpritesN++] = index;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sprite->oam.affineMode & ST_OAM_AFFINE_ON_MASK)
|
||||||
|
matrices |= 1 << sprite->oam.matrixNum;
|
||||||
|
|
||||||
if (sprite->coordOffsetEnabled)
|
if (sprite->coordOffsetEnabled)
|
||||||
{
|
{
|
||||||
sprite->oam.x = sprite->x + sprite->x2 + sprite->centerToCornerVecX + gSpriteCoordOffsetX;
|
sprite->oam.x = sprite->x + sprite->x2 + sprite->centerToCornerVecX + gSpriteCoordOffsetX;
|
||||||
@ -356,122 +364,54 @@ void UpdateOamCoords(void)
|
|||||||
sprite->oam.x = sprite->x + sprite->x2 + sprite->centerToCornerVecX;
|
sprite->oam.x = sprite->x + sprite->x2 + sprite->centerToCornerVecX;
|
||||||
sprite->oam.y = sprite->y + sprite->y2 + sprite->centerToCornerVecY;
|
sprite->oam.y = sprite->y + sprite->y2 + sprite->centerToCornerVecY;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void BuildSpritePriorities(void)
|
y = sprite->oam.y;
|
||||||
{
|
if (y >= DISPLAY_HEIGHT)
|
||||||
u16 i;
|
|
||||||
for (i = 0; i < MAX_SPRITES; i++)
|
|
||||||
{
|
{
|
||||||
struct Sprite *sprite = &gSprites[i];
|
y -= 256;
|
||||||
u16 priority = sprite->subpriority | (sprite->oam.priority << 8);
|
|
||||||
sSpritePriorities[i] = priority;
|
|
||||||
}
|
}
|
||||||
}
|
else if (sprite->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
||||||
|
&& sprite->oam.size == ST_OAM_SIZE_3)
|
||||||
void SortSprites(void)
|
|
||||||
{
|
|
||||||
u8 i;
|
|
||||||
for (i = 1; i < MAX_SPRITES; i++)
|
|
||||||
{
|
{
|
||||||
u8 j = i;
|
u32 shape = sprite->oam.shape;
|
||||||
struct Sprite *sprite1 = &gSprites[sSpriteOrder[i - 1]];
|
|
||||||
struct Sprite *sprite2 = &gSprites[sSpriteOrder[i]];
|
|
||||||
u16 sprite1Priority = sSpritePriorities[sSpriteOrder[i - 1]];
|
|
||||||
u16 sprite2Priority = sSpritePriorities[sSpriteOrder[i]];
|
|
||||||
s16 sprite1Y = sprite1->oam.y;
|
|
||||||
s16 sprite2Y = sprite2->oam.y;
|
|
||||||
|
|
||||||
if (sprite1Y >= DISPLAY_HEIGHT)
|
|
||||||
sprite1Y = sprite1Y - 256;
|
|
||||||
|
|
||||||
if (sprite2Y >= DISPLAY_HEIGHT)
|
|
||||||
sprite2Y = sprite2Y - 256;
|
|
||||||
|
|
||||||
if (sprite1->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
|
||||||
&& sprite1->oam.size == ST_OAM_SIZE_3)
|
|
||||||
{
|
|
||||||
u32 shape = sprite1->oam.shape;
|
|
||||||
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
||||||
{
|
{
|
||||||
if (sprite1Y > 128)
|
if (y > 128)
|
||||||
sprite1Y = sprite1Y - 256;
|
y -= 256;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sprite2->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
// y in [-128...159], so (159 - y) in [0..287].
|
||||||
&& sprite2->oam.size == ST_OAM_SIZE_3)
|
spritePriorities[toSort++]
|
||||||
{
|
= (sprite->oam.priority << 30)
|
||||||
u32 shape = sprite2->oam.shape;
|
| (sprite->subpriority << 22)
|
||||||
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
| (((159 - y) & 0x1FF) << 13)
|
||||||
{
|
| (index << 0);
|
||||||
if (sprite2Y > 128)
|
|
||||||
sprite2Y = sprite2Y - 256;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (j > 0
|
SortSprites(spritePriorities, toSort);
|
||||||
&& ((sprite1Priority > sprite2Priority)
|
|
||||||
|| (sprite1Priority == sprite2Priority && sprite1Y < sprite2Y)))
|
|
||||||
{
|
|
||||||
u8 temp = sSpriteOrder[j];
|
|
||||||
sSpriteOrder[j] = sSpriteOrder[j - 1];
|
|
||||||
sSpriteOrder[j - 1] = temp;
|
|
||||||
|
|
||||||
// UB: If j equals 1, then j-- makes j equal 0.
|
for (i = 0; i < toSort; i++)
|
||||||
// Then, sSpriteOrder[-1] gets accessed below.
|
sSpriteOrder[i] = spritePriorities[i] & 0xFF;
|
||||||
// Although this doesn't result in a bug in the ROM,
|
for (i = 0; i < skippedSpritesN; i++)
|
||||||
// the behavior is undefined.
|
sSpriteOrder[toSort + i] = skippedSprites[i];
|
||||||
j--;
|
|
||||||
#ifdef UBFIX
|
oamLoadDisabled = gMain.oamLoadDisabled;
|
||||||
if (j == 0)
|
gMain.oamLoadDisabled = TRUE;
|
||||||
|
|
||||||
|
for (i = 0, oamIndex = 0; i < toSort; i++)
|
||||||
|
{
|
||||||
|
if (AddSpriteToOamBuffer(&gSprites[spritePriorities[i] & 0xFF], &oamIndex))
|
||||||
break;
|
break;
|
||||||
#endif
|
}
|
||||||
|
|
||||||
sprite1 = &gSprites[sSpriteOrder[j - 1]];
|
for (i = oamIndex; i < gOamDummyIndex; i++)
|
||||||
sprite2 = &gSprites[sSpriteOrder[j]];
|
gMain.oamBuffer[i] = gDummyOamData;
|
||||||
sprite1Priority = sSpritePriorities[sSpriteOrder[j - 1]];
|
gOamDummyIndex = oamIndex;
|
||||||
sprite2Priority = sSpritePriorities[sSpriteOrder[j]];
|
|
||||||
sprite1Y = sprite1->oam.y;
|
|
||||||
sprite2Y = sprite2->oam.y;
|
|
||||||
|
|
||||||
if (sprite1Y >= DISPLAY_HEIGHT)
|
for (i = 0; matrices != 0; i++, matrices >>= 1)
|
||||||
sprite1Y = sprite1Y - 256;
|
|
||||||
|
|
||||||
if (sprite2Y >= DISPLAY_HEIGHT)
|
|
||||||
sprite2Y = sprite2Y - 256;
|
|
||||||
|
|
||||||
if (sprite1->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
|
||||||
&& sprite1->oam.size == ST_OAM_SIZE_3)
|
|
||||||
{
|
{
|
||||||
u32 shape = sprite1->oam.shape;
|
if (matrices & 1)
|
||||||
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
|
||||||
{
|
|
||||||
if (sprite1Y > 128)
|
|
||||||
sprite1Y = sprite1Y - 256;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sprite2->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
|
||||||
&& sprite2->oam.size == ST_OAM_SIZE_3)
|
|
||||||
{
|
|
||||||
u32 shape = sprite2->oam.shape;
|
|
||||||
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
|
||||||
{
|
|
||||||
if (sprite2Y > 128)
|
|
||||||
sprite2Y = sprite2Y - 256;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void CopyMatricesToOamBuffer(void)
|
|
||||||
{
|
|
||||||
u8 i;
|
|
||||||
for (i = 0; i < OAM_MATRIX_COUNT; i++)
|
|
||||||
{
|
{
|
||||||
u32 base = 4 * i;
|
u32 base = 4 * i;
|
||||||
gMain.oamBuffer[base + 0].affineParam = gOamMatrices[i].a;
|
gMain.oamBuffer[base + 0].affineParam = gOamMatrices[i].a;
|
||||||
@ -479,26 +419,32 @@ void CopyMatricesToOamBuffer(void)
|
|||||||
gMain.oamBuffer[base + 2].affineParam = gOamMatrices[i].c;
|
gMain.oamBuffer[base + 2].affineParam = gOamMatrices[i].c;
|
||||||
gMain.oamBuffer[base + 3].affineParam = gOamMatrices[i].d;
|
gMain.oamBuffer[base + 3].affineParam = gOamMatrices[i].d;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gMain.oamLoadDisabled = oamLoadDisabled;
|
||||||
|
sShouldProcessSpriteCopyRequests = TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddSpritesToOamBuffer(void)
|
static inline void InsertionSort(u32 *spritePriorities, s32 n)
|
||||||
{
|
{
|
||||||
u8 i = 0;
|
s32 i = 1;
|
||||||
u8 oamIndex = 0;
|
while (i < n)
|
||||||
|
|
||||||
while (i < MAX_SPRITES)
|
|
||||||
{
|
{
|
||||||
struct Sprite *sprite = &gSprites[sSpriteOrder[i]];
|
u32 x = spritePriorities[i];
|
||||||
if (sprite->inUse && !sprite->invisible && AddSpriteToOamBuffer(sprite, &oamIndex))
|
s32 j = i - 1;
|
||||||
return;
|
while (j >= 0 && spritePriorities[j] > x)
|
||||||
|
{
|
||||||
|
spritePriorities[j + 1] = spritePriorities[j];
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
spritePriorities[j + 1] = x;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while (oamIndex < gOamLimit)
|
static void SortSprites(u32 *spritePriorities, s32 n)
|
||||||
{
|
{
|
||||||
gMain.oamBuffer[oamIndex] = gDummyOamData;
|
InsertionSort(spritePriorities, n);
|
||||||
oamIndex++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 CreateSprite(const struct SpriteTemplate *template, s16 x, s16 y, u8 subpriority)
|
u8 CreateSprite(const struct SpriteTemplate *template, s16 x, s16 y, u8 subpriority)
|
||||||
@ -849,7 +795,7 @@ void CopyToSprites(u8 *src)
|
|||||||
|
|
||||||
void ResetAllSprites(void)
|
void ResetAllSprites(void)
|
||||||
{
|
{
|
||||||
u8 i;
|
u32 i;
|
||||||
|
|
||||||
for (i = 0; i < MAX_SPRITES; i++)
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
{
|
{
|
||||||
|
@ -664,6 +664,7 @@
|
|||||||
#define TIMER_64CLK 0x01
|
#define TIMER_64CLK 0x01
|
||||||
#define TIMER_256CLK 0x02
|
#define TIMER_256CLK 0x02
|
||||||
#define TIMER_1024CLK 0x03
|
#define TIMER_1024CLK 0x03
|
||||||
|
#define TIMER_COUNTUP 0x04
|
||||||
#define TIMER_INTR_ENABLE 0x40
|
#define TIMER_INTR_ENABLE 0x40
|
||||||
#define TIMER_ENABLE 0x80
|
#define TIMER_ENABLE 0x80
|
||||||
|
|
||||||
|
@ -193,3 +193,60 @@ TEST("RandomElement generates a uniform distribution")
|
|||||||
|
|
||||||
EXPECT_LT(error, UQ_4_12(0.025));
|
EXPECT_LT(error, UQ_4_12(0.025));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST("RandomUniform mul-based faster than mod-based (compile-time)")
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
struct Benchmark mulBenchmark, modBenchmark;
|
||||||
|
u32 mulSum = 0, modSum = 0;
|
||||||
|
|
||||||
|
BENCHMARK(&mulBenchmark)
|
||||||
|
{
|
||||||
|
mulSum += RandomUniformDefault(RNG_NONE, 0, 1);
|
||||||
|
mulSum += RandomUniformDefault(RNG_NONE, 0, 2);
|
||||||
|
mulSum += RandomUniformDefault(RNG_NONE, 0, 3);
|
||||||
|
mulSum += RandomUniformDefault(RNG_NONE, 0, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK(&modBenchmark)
|
||||||
|
{
|
||||||
|
modSum += Random() % 2;
|
||||||
|
modSum += Random() % 3;
|
||||||
|
modSum += Random() % 4;
|
||||||
|
modSum += Random() % 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_FASTER(mulBenchmark, modBenchmark);
|
||||||
|
|
||||||
|
// Reference mulSum/modSum to prevent optimization.
|
||||||
|
// These numbers are different because multiplication and modulus
|
||||||
|
// have subtly different biases (so subtle that it's irrelevant for
|
||||||
|
// our purposes).
|
||||||
|
EXPECT_EQ(mulSum, 3);
|
||||||
|
EXPECT_EQ(modSum, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST("RandomUniform mul-based faster than mod-based (run-time)")
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
struct Benchmark mulBenchmark, modBenchmark;
|
||||||
|
u32 mulSum = 0, modSum = 0;
|
||||||
|
|
||||||
|
BENCHMARK(&mulBenchmark)
|
||||||
|
{
|
||||||
|
for (i = 0; i < 32; i++)
|
||||||
|
mulSum += RandomUniformDefault(RNG_NONE, 0, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK(&modBenchmark)
|
||||||
|
{
|
||||||
|
for (i = 0; i < 32; i++)
|
||||||
|
modSum += Random() % (i + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_FASTER(mulBenchmark, modBenchmark);
|
||||||
|
|
||||||
|
// Reference mulSum/modSum to prevent optimization.
|
||||||
|
EXPECT_EQ(mulSum, 232);
|
||||||
|
EXPECT_EQ(modSum, 249);
|
||||||
|
}
|
||||||
|
303
test/sprite.c
Normal file
303
test/sprite.c
Normal file
@ -0,0 +1,303 @@
|
|||||||
|
#include "global.h"
|
||||||
|
#include "test.h"
|
||||||
|
#include "main.h"
|
||||||
|
#include "malloc.h"
|
||||||
|
#include "random.h"
|
||||||
|
#include "sprite.h"
|
||||||
|
|
||||||
|
#define OAM_MATRIX_COUNT 32
|
||||||
|
|
||||||
|
EWRAM_DATA static u16 sSpritePriorities[MAX_SPRITES] = {0};
|
||||||
|
EWRAM_DATA static u8 sSpriteOrder[MAX_SPRITES] = {0};
|
||||||
|
|
||||||
|
static void Old_BuildOamBuffer(void);
|
||||||
|
|
||||||
|
static void ExpectEqOamBuffers(const struct OamData *oldOamBuffer, const struct OamData *newOamBuffer)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
u32 matrices = 0;
|
||||||
|
|
||||||
|
// Compare the non-matrix data.
|
||||||
|
for (i = 0; i < gOamLimit; i++)
|
||||||
|
{
|
||||||
|
EXPECT(memcmp(&oldOamBuffer[i], &newOamBuffer[i], 6) == 0);
|
||||||
|
if (newOamBuffer[i].affineMode & ST_OAM_AFFINE_ON_MASK)
|
||||||
|
matrices |= 1 << newOamBuffer[i].matrixNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare the matrix data.
|
||||||
|
for (i = 0; i < OAM_MATRIX_COUNT; i++)
|
||||||
|
{
|
||||||
|
if (matrices & (1 << i))
|
||||||
|
{
|
||||||
|
u32 base = 4 * i;
|
||||||
|
EXPECT_EQ(oldOamBuffer[base + 0].affineParam, newOamBuffer[base + 0].affineParam);
|
||||||
|
EXPECT_EQ(oldOamBuffer[base + 1].affineParam, newOamBuffer[base + 1].affineParam);
|
||||||
|
EXPECT_EQ(oldOamBuffer[base + 2].affineParam, newOamBuffer[base + 2].affineParam);
|
||||||
|
EXPECT_EQ(oldOamBuffer[base + 3].affineParam, newOamBuffer[base + 3].affineParam);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ResetSpriteData_(void)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
ResetSpriteData();
|
||||||
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
|
sSpriteOrder[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void BenchmarkBuildOamBuffer(bool32 preSort)
|
||||||
|
{
|
||||||
|
struct Benchmark oldBuildOamBuffer, newBuildOamBuffer;
|
||||||
|
struct OamData *oldOamBuffer = Alloc(sizeof(gMain.oamBuffer));
|
||||||
|
|
||||||
|
if (preSort)
|
||||||
|
Old_BuildOamBuffer();
|
||||||
|
BENCHMARK(&oldBuildOamBuffer)
|
||||||
|
{
|
||||||
|
Old_BuildOamBuffer();
|
||||||
|
}
|
||||||
|
memcpy(oldOamBuffer, gMain.oamBuffer, sizeof(gMain.oamBuffer));
|
||||||
|
|
||||||
|
if (preSort)
|
||||||
|
BuildOamBuffer();
|
||||||
|
BENCHMARK(&newBuildOamBuffer)
|
||||||
|
{
|
||||||
|
BuildOamBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectEqOamBuffers(oldOamBuffer, gMain.oamBuffer);
|
||||||
|
EXPECT_FASTER(newBuildOamBuffer, oldBuildOamBuffer);
|
||||||
|
Free(oldOamBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST("BuildOamBuffer faster with no sprites")
|
||||||
|
{
|
||||||
|
ResetSpriteData_();
|
||||||
|
BenchmarkBuildOamBuffer(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST("BuildOamBuffer faster with max sprites (equal y/subpriority)")
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
|
||||||
|
ResetSpriteData_();
|
||||||
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
|
CreateSprite(&gDummySpriteTemplate, 0, 0, 0);
|
||||||
|
BenchmarkBuildOamBuffer(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST("BuildOamBuffer faster with max sprites (random y/subpriority)")
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
ResetSpriteData_();
|
||||||
|
SeedRng(0);
|
||||||
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
|
CreateSprite(&gDummySpriteTemplate, 0, Random() % 256, Random() % 256);
|
||||||
|
BenchmarkBuildOamBuffer(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST("BuildOamBuffer faster on already-sorted max sprites")
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
ResetSpriteData_();
|
||||||
|
SeedRng(0);
|
||||||
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
|
CreateSprite(&gDummySpriteTemplate, 0, Random() % 256, Random() % 256);
|
||||||
|
BenchmarkBuildOamBuffer(TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST("BuildOamBuffer faster with mix of sprites")
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
ResetSpriteData_();
|
||||||
|
SeedRng(0);
|
||||||
|
for (i = 0; i < MAX_SPRITES / 2; i++)
|
||||||
|
{
|
||||||
|
u32 spriteId = CreateSprite(&gDummySpriteTemplate, 0, Random() % 256, Random() % 256);
|
||||||
|
gSprites[spriteId].invisible = Random() % 4 == 0;
|
||||||
|
}
|
||||||
|
BenchmarkBuildOamBuffer(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Old implementation.
|
||||||
|
|
||||||
|
#define UBFIX
|
||||||
|
|
||||||
|
static void UpdateOamCoords(void)
|
||||||
|
{
|
||||||
|
u8 i;
|
||||||
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
|
{
|
||||||
|
struct Sprite *sprite = &gSprites[i];
|
||||||
|
if (sprite->inUse && !sprite->invisible)
|
||||||
|
{
|
||||||
|
if (sprite->coordOffsetEnabled)
|
||||||
|
{
|
||||||
|
sprite->oam.x = sprite->x + sprite->x2 + sprite->centerToCornerVecX + gSpriteCoordOffsetX;
|
||||||
|
sprite->oam.y = sprite->y + sprite->y2 + sprite->centerToCornerVecY + gSpriteCoordOffsetY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sprite->oam.x = sprite->x + sprite->x2 + sprite->centerToCornerVecX;
|
||||||
|
sprite->oam.y = sprite->y + sprite->y2 + sprite->centerToCornerVecY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void BuildSpritePriorities(void)
|
||||||
|
{
|
||||||
|
u16 i;
|
||||||
|
for (i = 0; i < MAX_SPRITES; i++)
|
||||||
|
{
|
||||||
|
struct Sprite *sprite = &gSprites[i];
|
||||||
|
u16 priority = sprite->subpriority | (sprite->oam.priority << 8);
|
||||||
|
sSpritePriorities[i] = priority;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SortSprites(void)
|
||||||
|
{
|
||||||
|
u8 i;
|
||||||
|
for (i = 1; i < MAX_SPRITES; i++)
|
||||||
|
{
|
||||||
|
u8 j = i;
|
||||||
|
struct Sprite *sprite1 = &gSprites[sSpriteOrder[i - 1]];
|
||||||
|
struct Sprite *sprite2 = &gSprites[sSpriteOrder[i]];
|
||||||
|
u16 sprite1Priority = sSpritePriorities[sSpriteOrder[i - 1]];
|
||||||
|
u16 sprite2Priority = sSpritePriorities[sSpriteOrder[i]];
|
||||||
|
s16 sprite1Y = sprite1->oam.y;
|
||||||
|
s16 sprite2Y = sprite2->oam.y;
|
||||||
|
|
||||||
|
if (sprite1Y >= DISPLAY_HEIGHT)
|
||||||
|
sprite1Y = sprite1Y - 256;
|
||||||
|
|
||||||
|
if (sprite2Y >= DISPLAY_HEIGHT)
|
||||||
|
sprite2Y = sprite2Y - 256;
|
||||||
|
|
||||||
|
if (sprite1->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
||||||
|
&& sprite1->oam.size == ST_OAM_SIZE_3)
|
||||||
|
{
|
||||||
|
u32 shape = sprite1->oam.shape;
|
||||||
|
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
||||||
|
{
|
||||||
|
if (sprite1Y > 128)
|
||||||
|
sprite1Y = sprite1Y - 256;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sprite2->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
||||||
|
&& sprite2->oam.size == ST_OAM_SIZE_3)
|
||||||
|
{
|
||||||
|
u32 shape = sprite2->oam.shape;
|
||||||
|
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
||||||
|
{
|
||||||
|
if (sprite2Y > 128)
|
||||||
|
sprite2Y = sprite2Y - 256;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (j > 0
|
||||||
|
&& ((sprite1Priority > sprite2Priority)
|
||||||
|
|| (sprite1Priority == sprite2Priority && sprite1Y < sprite2Y)))
|
||||||
|
{
|
||||||
|
u8 temp = sSpriteOrder[j];
|
||||||
|
sSpriteOrder[j] = sSpriteOrder[j - 1];
|
||||||
|
sSpriteOrder[j - 1] = temp;
|
||||||
|
|
||||||
|
// UB: If j equals 1, then j-- makes j equal 0.
|
||||||
|
// Then, sSpriteOrder[-1] gets accessed below.
|
||||||
|
// Although this doesn't result in a bug in the ROM,
|
||||||
|
// the behavior is undefined.
|
||||||
|
j--;
|
||||||
|
#ifdef UBFIX
|
||||||
|
if (j == 0)
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
sprite1 = &gSprites[sSpriteOrder[j - 1]];
|
||||||
|
sprite2 = &gSprites[sSpriteOrder[j]];
|
||||||
|
sprite1Priority = sSpritePriorities[sSpriteOrder[j - 1]];
|
||||||
|
sprite2Priority = sSpritePriorities[sSpriteOrder[j]];
|
||||||
|
sprite1Y = sprite1->oam.y;
|
||||||
|
sprite2Y = sprite2->oam.y;
|
||||||
|
|
||||||
|
if (sprite1Y >= DISPLAY_HEIGHT)
|
||||||
|
sprite1Y = sprite1Y - 256;
|
||||||
|
|
||||||
|
if (sprite2Y >= DISPLAY_HEIGHT)
|
||||||
|
sprite2Y = sprite2Y - 256;
|
||||||
|
|
||||||
|
if (sprite1->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
||||||
|
&& sprite1->oam.size == ST_OAM_SIZE_3)
|
||||||
|
{
|
||||||
|
u32 shape = sprite1->oam.shape;
|
||||||
|
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
||||||
|
{
|
||||||
|
if (sprite1Y > 128)
|
||||||
|
sprite1Y = sprite1Y - 256;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sprite2->oam.affineMode == ST_OAM_AFFINE_DOUBLE
|
||||||
|
&& sprite2->oam.size == ST_OAM_SIZE_3)
|
||||||
|
{
|
||||||
|
u32 shape = sprite2->oam.shape;
|
||||||
|
if (shape == ST_OAM_SQUARE || shape == ST_OAM_V_RECTANGLE)
|
||||||
|
{
|
||||||
|
if (sprite2Y > 128)
|
||||||
|
sprite2Y = sprite2Y - 256;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void CopyMatricesToOamBuffer(void)
|
||||||
|
{
|
||||||
|
u8 i;
|
||||||
|
for (i = 0; i < OAM_MATRIX_COUNT; i++)
|
||||||
|
{
|
||||||
|
u32 base = 4 * i;
|
||||||
|
gMain.oamBuffer[base + 0].affineParam = gOamMatrices[i].a;
|
||||||
|
gMain.oamBuffer[base + 1].affineParam = gOamMatrices[i].b;
|
||||||
|
gMain.oamBuffer[base + 2].affineParam = gOamMatrices[i].c;
|
||||||
|
gMain.oamBuffer[base + 3].affineParam = gOamMatrices[i].d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AddSpritesToOamBuffer(void)
|
||||||
|
{
|
||||||
|
u8 i = 0;
|
||||||
|
u8 oamIndex = 0;
|
||||||
|
|
||||||
|
while (i < MAX_SPRITES)
|
||||||
|
{
|
||||||
|
struct Sprite *sprite = &gSprites[sSpriteOrder[i]];
|
||||||
|
if (sprite->inUse && !sprite->invisible && AddSpriteToOamBuffer(sprite, &oamIndex))
|
||||||
|
return;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (oamIndex < gOamLimit)
|
||||||
|
{
|
||||||
|
gMain.oamBuffer[oamIndex] = gDummyOamData;
|
||||||
|
oamIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Old_BuildOamBuffer(void)
|
||||||
|
{
|
||||||
|
u8 temp;
|
||||||
|
UpdateOamCoords();
|
||||||
|
BuildSpritePriorities();
|
||||||
|
SortSprites();
|
||||||
|
temp = gMain.oamLoadDisabled;
|
||||||
|
gMain.oamLoadDisabled = TRUE;
|
||||||
|
AddSpritesToOamBuffer();
|
||||||
|
CopyMatricesToOamBuffer();
|
||||||
|
gMain.oamLoadDisabled = temp;
|
||||||
|
//sShouldProcessSpriteCopyRequests = TRUE;
|
||||||
|
}
|
44
test/test.h
44
test/test.h
@ -46,6 +46,7 @@ struct TestRunnerState
|
|||||||
u8 result;
|
u8 result;
|
||||||
u8 expectedResult;
|
u8 expectedResult;
|
||||||
bool8 expectLeaks:1;
|
bool8 expectLeaks:1;
|
||||||
|
bool8 inBenchmark:1;
|
||||||
u32 timeoutSeconds;
|
u32 timeoutSeconds;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -158,6 +159,49 @@ s32 MgbaPrintf_(const char *fmt, ...);
|
|||||||
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_GE(%d, %d) failed", gTestRunnerState.test->filename, __LINE__, _a, _b); \
|
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_GE(%d, %d) failed", gTestRunnerState.test->filename, __LINE__, _a, _b); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
struct Benchmark { s32 ticks; };
|
||||||
|
|
||||||
|
static inline void BenchmarkStart(void)
|
||||||
|
{
|
||||||
|
gTestRunnerState.inBenchmark = TRUE;
|
||||||
|
REG_TM3CNT = (TIMER_ENABLE | TIMER_64CLK) << 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct Benchmark BenchmarkStop(void)
|
||||||
|
{
|
||||||
|
REG_TM3CNT_H = 0;
|
||||||
|
gTestRunnerState.inBenchmark = FALSE;
|
||||||
|
return (struct Benchmark) { REG_TM3CNT_L };
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BENCHMARK(id) \
|
||||||
|
for (BenchmarkStart(); gTestRunnerState.inBenchmark; *(id) = BenchmarkStop())
|
||||||
|
|
||||||
|
// An approximation of how much overhead benchmarks introduce.
|
||||||
|
#define BENCHMARK_ABS 2
|
||||||
|
|
||||||
|
// An approximation for what percentage faster a benchmark has to be for
|
||||||
|
// us to be confident that it's faster than another.
|
||||||
|
#define BENCHMARK_REL 95
|
||||||
|
|
||||||
|
#define EXPECT_FASTER(a, b) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
u32 a_ = (a).ticks; u32 b_ = (b).ticks; \
|
||||||
|
MgbaPrintf_(#a ": %d ticks, " #b ": %d ticks", a_, b_); \
|
||||||
|
if (((a_ - BENCHMARK_ABS) * BENCHMARK_REL) >= (b_ * 100)) \
|
||||||
|
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_FASTER(" #a ", " #b ") failed", gTestRunnerState.test->filename, __LINE__); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define EXPECT_SLOWER(a, b) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
u32 a_ = (a).ticks; u32 b_ = (b).ticks; \
|
||||||
|
MgbaPrintf_(#a ": %d ticks, " #b ": %d ticks", a_, b_); \
|
||||||
|
if ((a_ * 100) <= ((b_ - BENCHMARK_ABS) * BENCHMARK_REL)) \
|
||||||
|
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_SLOWER(" #a ", " #b ") failed", gTestRunnerState.test->filename, __LINE__); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define KNOWN_FAILING \
|
#define KNOWN_FAILING \
|
||||||
Test_ExpectedResult(TEST_RESULT_FAIL)
|
Test_ExpectedResult(TEST_RESULT_FAIL)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user