Merge pull request #1087 from ipatix/fix_aggressive_loop_optimizations

fix aggressive loop optimizations
This commit is contained in:
PikalaxALT 2020-07-08 15:42:08 -04:00 committed by GitHub
commit 550875757b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 79 additions and 45 deletions

View File

@ -67,7 +67,7 @@ OBJ_DIR := build/emerald
LIBPATH := -L ../../tools/agbcc/lib
else
CC1 = $(shell $(CC) --print-prog-name=cc1) -quiet
override CFLAGS += -mthumb -mthumb-interwork -O2 -mabi=apcs-gnu -mtune=arm7tdmi -march=armv4t -fno-toplevel-reorder -fno-aggressive-loop-optimizations -Wno-pointer-to-int-cast
override CFLAGS += -mthumb -mthumb-interwork -O2 -mabi=apcs-gnu -mtune=arm7tdmi -march=armv4t -fno-toplevel-reorder -Wno-pointer-to-int-cast
ROM := pokeemerald_modern.gba
OBJ_DIR := build/modern
LIBPATH := -L "$(dir $(shell $(CC) -mthumb -print-file-name=libgcc.a))" -L "$(dir $(shell $(CC) -mthumb -print-file-name=libc.a))"
@ -232,7 +232,7 @@ $(C_BUILDDIR)/record_mixing.o: CFLAGS += -ffreestanding
$(C_BUILDDIR)/librfu_intr.o: CC1 := tools/agbcc/bin/agbcc_arm
$(C_BUILDDIR)/librfu_intr.o: CFLAGS := -O2 -mthumb-interwork -quiet
else
$(C_BUILDDIR)/librfu_intr.o: CFLAGS := -mthumb-interwork -O2 -mabi=apcs-gnu -mtune=arm7tdmi -march=armv4t -fno-toplevel-reorder -fno-aggressive-loop-optimizations -Wno-pointer-to-int-cast
$(C_BUILDDIR)/librfu_intr.o: CFLAGS := -mthumb-interwork -O2 -mabi=apcs-gnu -mtune=arm7tdmi -march=armv4t -fno-toplevel-reorder -Wno-pointer-to-int-cast
endif
ifeq ($(NODEP),1)

View File

@ -26,4 +26,12 @@
#define UNITS_METRIC
#endif
// Various undefined behavior bugs may or may not prevent compilation with
// newer compilers. So always fix them when using a modern compiler.
#if MODERN
#ifndef UBFIX
#define UBFIX
#endif
#endif
#endif // GUARD_CONFIG_H

View File

@ -18,6 +18,13 @@ typedef union // size = 0x24
/*0x23*/ u8 trainerIdHi;
} common;
// Common init (used for initialization loop)
struct {
/*0x00*/ u8 kind;
/*0x01*/ bool8 active;
/*0x02*/ u8 pad02[34];
} commonInit;
// Local shows
// TVSHOW_FAN_CLUB_LETTER
struct {

View File

@ -2202,6 +2202,11 @@ static s16 sub_8081BD4(void)
return sUnknown_03000E06;
}
#if MODERN
// TODO remove this as soon as the code below is understood
// add a UBFIX if required (code buggy?)
__attribute__((optimize("no-aggressive-loop-optimizations")))
#endif
static void Blender_CalculatePokeblock(struct BlenderBerry *berries, struct Pokeblock *pokeblock, u8 playersNo, u8 *flavors, u16 maxRPM)
{
s32 i, j;

View File

@ -3209,6 +3209,11 @@ static u32 sub_8024568(__attribute__((unused)) struct BerryCrushGame *r0, __attr
return 0;
}
#if MODERN
// TODO remove this as soon as the code below is understood
// add a UBFIX if required (code buggy?)
__attribute__((optimize("no-aggressive-loop-optimizations")))
#endif
void sub_8024578(struct BerryCrushGame *r4)
{
u8 r5 = 0;

View File

@ -5273,12 +5273,17 @@ void InitEasyChatPhrases(void)
gSaveBlock1Ptr->mail[i].words[j] = 0xFFFF;
}
#ifndef UBFIX
// BUG: This is supposed to clear 64 bits, but this loop is clearing 64 bytes.
// However, this bug has no resulting effect on gameplay because only the
// Mauville old man data is corrupted, which is initialized directly after
// this function is called when starting a new game.
for (i = 0; i < 64; i++)
gSaveBlock1Ptr->additionalPhrases[i] = 0;
#else
for (i = 0; i < ARRAY_COUNT(gSaveBlock1Ptr->additionalPhrases); i++)
gSaveBlock1Ptr->additionalPhrases[i] = 0;
#endif
}
static bool8 sub_811F28C(void)

View File

@ -533,9 +533,16 @@ static bool32 SavedMapViewIsEmpty(void)
u16 i;
u32 marker = 0;
#ifndef UBFIX
// BUG: This loop extends past the bounds of the mapView array. Its size is only 0x100.
for (i = 0; i < 0x200; i++)
marker |= gSaveBlock1Ptr->mapView[i];
#else
// UBFIX: Only iterate over 0x100
for (i = 0; i < ARRAY_COUNT(gSaveBlock1Ptr->mapView); i++)
marker |= gSaveBlock1Ptr->mapView[i];
#endif
if (marker == 0)
return TRUE;

View File

@ -5,7 +5,7 @@
// IWRAM common
u8 gCanvasColumnStart;
u16 (*gCanvasPixels)[][32];
u16 *gCanvasPixels;
u8 gCanvasRowEnd;
u8 gCanvasHeight;
u8 gCanvasColumnEnd;
@ -125,7 +125,7 @@ static void ApplyImageEffect_RedChannelGrayscale(u8 delta)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -150,7 +150,7 @@ static void ApplyImageEffect_RedChannelGrayscaleHighlight(u8 highlight)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -179,7 +179,7 @@ static void ApplyImageEffect_Grayscale(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -195,7 +195,7 @@ static void ApplyImageEffect_Blur(void)
for (i = 0; i < gCanvasColumnEnd; i++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][gCanvasRowStart * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[gCanvasRowStart * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart + i];
u16 prevPixel = *pixel;
@ -221,7 +221,7 @@ static void ApplyImageEffect_PersonalityColor(u8 personality)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -237,7 +237,7 @@ static void ApplyImageEffect_BlackAndWhite(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -255,7 +255,7 @@ static void ApplyImageEffect_BlackOutline(void)
// Handle top row of pixels first.
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
pixel = &pixelRow[gCanvasColumnStart];
*pixel = QuantizePixel_BlackOutline(pixel, pixel + 1);
for (i = 1, pixel++; i < gCanvasColumnEnd - 1; i++, pixel++)
@ -270,7 +270,7 @@ static void ApplyImageEffect_BlackOutline(void)
// Handle each column from left to right.
for (i = 0; i < gCanvasColumnEnd; i++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][gCanvasRowStart * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[gCanvasRowStart * gCanvasWidth];
pixel = &pixelRow[gCanvasColumnStart + i];
*pixel = QuantizePixel_BlackOutline(pixel, pixel + gCanvasWidth);
for (j = 1, pixel += gCanvasWidth; j < gCanvasRowEnd - 1; j++, pixel += gCanvasWidth)
@ -289,7 +289,7 @@ static void ApplyImageEffect_Invert(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -306,7 +306,7 @@ static void ApplyImageEffect_Shimmer(void)
u16 prevPixel;
// First, invert all of the colors.
pixel = (*gCanvasPixels)[0];
pixel = gCanvasPixels;
for (i = 0; i < 64; i++)
{
for (j = 0; j < 64; j++, pixel++)
@ -319,7 +319,7 @@ static void ApplyImageEffect_Shimmer(void)
// Blur the pixels twice.
for (j = 0; j < 64; j++)
{
pixel = &(*gCanvasPixels)[0][j];
pixel = &gCanvasPixels[j];
prevPixel = *pixel;
*pixel = 0x8000;
for (i = 1, pixel += 64; i < 63; i++, pixel += 64)
@ -332,7 +332,7 @@ static void ApplyImageEffect_Shimmer(void)
}
*pixel = 0x8000;
pixel = &(*gCanvasPixels)[0][j];
pixel = &gCanvasPixels[j];
prevPixel = *pixel;
*pixel = 0x8000;
for (i = 1, pixel += 64; i < 63; i++, pixel += 64)
@ -350,7 +350,7 @@ static void ApplyImageEffect_Shimmer(void)
// Finally, invert colors back to the original color space.
// The above blur causes the outline areas to darken, which makes
// this inversion give the effect of light outlines.
pixel = (*gCanvasPixels)[0];
pixel = gCanvasPixels;
for (i = 0; i < 64; i++)
{
for (j = 0; j < 64; j++, pixel++)
@ -367,7 +367,7 @@ static void ApplyImageEffect_BlurRight(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
u16 prevPixel = *pixel;
for (i = 1, pixel++; i < gCanvasColumnEnd - 1; i++, pixel++)
@ -387,7 +387,7 @@ static void ApplyImageEffect_BlurDown(void)
for (i = 0; i < gCanvasColumnEnd; i++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][gCanvasRowStart * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[gCanvasRowStart * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart + i];
u16 prevPixel = *pixel;
for (j = 1, pixel += gCanvasWidth; j < gCanvasRowEnd - 1; j++, pixel += gCanvasWidth)
@ -445,7 +445,7 @@ static void AddPointillismPoints(u16 arg0)
for (i = 0; i < points[0].delta; i++)
{
u16 *pixel = &(*gCanvasPixels)[points[i].row * 2][points[i].column];
u16 *pixel = &gCanvasPixels[points[i].row * 64] + points[i].column;
if (!(0x8000 & *pixel))
{
@ -910,7 +910,7 @@ static void QuantizePalette_Standard(bool8 useLimitedPalette)
gCanvasPalette[maxIndex] = RGB2(15, 15, 15);
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -978,7 +978,7 @@ static void QuantizePalette_BlackAndWhite(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -1009,7 +1009,7 @@ static void QuantizePalette_GrayscaleSmall(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -1027,7 +1027,7 @@ static void QuantizePalette_Grayscale(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{
@ -1045,7 +1045,7 @@ static void QuantizePalette_PrimaryColors(void)
for (j = 0; j < gCanvasRowEnd; j++)
{
u16 *pixelRow = &(*gCanvasPixels)[0][(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixelRow = &gCanvasPixels[(gCanvasRowStart + j) * gCanvasWidth];
u16 *pixel = &pixelRow[gCanvasColumnStart];
for (i = 0; i < gCanvasColumnEnd; i++, pixel++)
{

View File

@ -384,7 +384,7 @@ static const u16 gSlotPayouts[];
static const u8 *const gUnknown_083EDCE4;
static const u8 *const gUnknown_083EDCDC;
static const u32 gReelTimeGfx[];
static const struct SpriteSheet gSlotMachineSpriteSheets[];
static const struct SpriteSheet gSlotMachineSpriteSheets[22];
static const struct SpritePalette gSlotMachineSpritePalettes[];
static const u16 *const gUnknown_083EDE20;
static const s16 gInitialReelPositions[][2];
@ -4171,8 +4171,8 @@ static void sub_81063C0(void)
LZDecompressWram(gSlotMachineReelTime_Gfx, sUnknown_0203AAD4);
sUnknown_0203AAD8 = Alloc(0x3600);
LZDecompressWram(gReelTimeGfx, sUnknown_0203AAD8);
sUnknown_0203AB30 = AllocZeroed(sizeof(struct SpriteSheet) * 22);
for (i = 0; i < 22; i++)
sUnknown_0203AB30 = AllocZeroed(sizeof(struct SpriteSheet) * ARRAY_COUNT(gSlotMachineSpriteSheets));
for (i = 0; i < ARRAY_COUNT(gSlotMachineSpriteSheets); i++)
{
sUnknown_0203AB30[i].data = gSlotMachineSpriteSheets[i].data;
sUnknown_0203AB30[i].size = gSlotMachineSpriteSheets[i].size;
@ -6708,7 +6708,7 @@ static const struct SubspriteTable *const gUnknown_083EDBC4[] =
NULL
};
static const struct SpriteSheet gSlotMachineSpriteSheets[] =
static const struct SpriteSheet gSlotMachineSpriteSheets[22] =
{
{ .data = gSlotMachineReelSymbol1Tiles, .size = 0x200, .tag = 0 },
{ .data = gSlotMachineReelSymbol2Tiles, .size = 0x200, .tag = 1 },
@ -6727,15 +6727,12 @@ static const struct SpriteSheet gSlotMachineSpriteSheets[] =
{ .data = gSlotMachineNumber7Tiles, .size = 0x40, .tag = 14 },
{ .data = gSlotMachineNumber8Tiles, .size = 0x40, .tag = 15 },
{ .data = gSlotMachineNumber9Tiles, .size = 0x40, .tag = 16 },
};
static const u8 sUnused1[][8] =
{
{0, 0, 0, 0, 0, 2, 18},
{0, 0, 0, 0, 0, 2, 19},
{0, 0, 0, 0, 0, 3, 20},
{0, 0, 0, 0, 0, 3, 21},
{0, 0, 0, 0, 0, 0, 0}
// the data for these sheets is determined at runtime
{ .data = NULL, .size = 0x200, .tag = 18 },
{ .data = NULL, .size = 0x200, .tag = 19 },
{ .data = NULL, .size = 0x300, .tag = 20 },
{ .data = NULL, .size = 0x300, .tag = 21 },
{},
};
static const u8 *const gUnknown_083EDCDC = gUnknown_08DD19F8;

View File

@ -764,11 +764,11 @@ void ClearTVShowData(void)
for (i = 0; i < ARRAY_COUNT(gSaveBlock1Ptr->tvShows); i ++)
{
gSaveBlock1Ptr->tvShows[i].common.kind = 0;
gSaveBlock1Ptr->tvShows[i].common.active = 0;
for (j = 0; j < sizeof(TVShow) - 2; j ++)
gSaveBlock1Ptr->tvShows[i].commonInit.kind = 0;
gSaveBlock1Ptr->tvShows[i].commonInit.active = 0;
for (j = 0; j < ARRAY_COUNT(gSaveBlock1Ptr->tvShows[i].commonInit.pad02); j ++)
{
gSaveBlock1Ptr->tvShows[i].common.pad02[j] = 0;
gSaveBlock1Ptr->tvShows[i].commonInit.pad02[j] = 0;
}
}
ClearPokemonNews();
@ -3167,11 +3167,11 @@ void DeleteTVShowInArrayByIdx(TVShow *shows, u8 idx)
{
u8 i;
shows[idx].common.kind = TVSHOW_OFF_AIR;
shows[idx].common.active = FALSE;
for (i = 0; i < 34; i ++)
shows[idx].commonInit.kind = TVSHOW_OFF_AIR;
shows[idx].commonInit.active = FALSE;
for (i = 0; i < ARRAY_COUNT(shows[idx].commonInit.pad02); i++)
{
shows[idx].common.pad02[i] = 0;
shows[idx].commonInit.pad02[i] = 0;
}
}