diff --git a/include/gba/io_reg.h b/include/gba/io_reg.h index 148ce31db..d05ec7120 100644 --- a/include/gba/io_reg.h +++ b/include/gba/io_reg.h @@ -664,6 +664,7 @@ #define TIMER_64CLK 0x01 #define TIMER_256CLK 0x02 #define TIMER_1024CLK 0x03 +#define TIMER_COUNTUP 0x04 #define TIMER_INTR_ENABLE 0x40 #define TIMER_ENABLE 0x80 diff --git a/test/random.c b/test/random.c index ef364de27..091b9323a 100644 --- a/test/random.c +++ b/test/random.c @@ -151,3 +151,60 @@ TEST("RandomElement generates a uniform distribution") EXPECT_LT(error, UQ_4_12(0.025)); } + +TEST("RandomUniform mul-based faster than mod-based (compile-time)") +{ + u32 i; + struct Benchmark mulBenchmark, modBenchmark; + u32 mulSum = 0, modSum = 0; + + BENCHMARK(&mulBenchmark) + { + mulSum += RandomUniformDefault(RNG_NONE, 0, 1); + mulSum += RandomUniformDefault(RNG_NONE, 0, 2); + mulSum += RandomUniformDefault(RNG_NONE, 0, 3); + mulSum += RandomUniformDefault(RNG_NONE, 0, 4); + } + + BENCHMARK(&modBenchmark) + { + modSum += Random() % 2; + modSum += Random() % 3; + modSum += Random() % 4; + modSum += Random() % 5; + } + + EXPECT_FASTER(mulBenchmark, modBenchmark); + + // Reference mulSum/modSum to prevent optimization. + // These numbers are different because multiplication and modulus + // have subtly different biases (so subtle that it's irrelevant for + // our purposes). + EXPECT_EQ(mulSum, 3); + EXPECT_EQ(modSum, 4); +} + +TEST("RandomUniform mul-based faster than mod-based (run-time)") +{ + u32 i; + struct Benchmark mulBenchmark, modBenchmark; + u32 mulSum = 0, modSum = 0; + + BENCHMARK(&mulBenchmark) + { + for (i = 0; i < 32; i++) + mulSum += RandomUniformDefault(RNG_NONE, 0, i); + } + + BENCHMARK(&modBenchmark) + { + for (i = 0; i < 32; i++) + modSum += Random() % (i + 1); + } + + EXPECT_FASTER(mulBenchmark, modBenchmark); + + // Reference mulSum/modSum to prevent optimization. + EXPECT_EQ(mulSum, 232); + EXPECT_EQ(modSum, 249); +} diff --git a/test/test.h b/test/test.h index 24abca60d..228f19565 100644 --- a/test/test.h +++ b/test/test.h @@ -46,6 +46,7 @@ struct TestRunnerState u8 result; u8 expectedResult; bool8 expectLeaks:1; + bool8 inBenchmark:1; u32 timeoutSeconds; }; @@ -158,6 +159,47 @@ s32 MgbaPrintf_(const char *fmt, ...); Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_GE(%d, %d) failed", gTestRunnerState.test->filename, __LINE__, _a, _b); \ } while (0) +struct Benchmark { u32 ticks; }; + +static inline void BenchmarkStart(void) +{ + gTestRunnerState.inBenchmark = TRUE; + REG_TM3CNT = (TIMER_ENABLE | TIMER_64CLK) << 16; +} + +static inline struct Benchmark BenchmarkStop(void) +{ + REG_TM3CNT_H = 0; + gTestRunnerState.inBenchmark = FALSE; + return (struct Benchmark) { REG_TM3CNT_L }; +} + +#define BENCHMARK(id) \ + for (BenchmarkStart(); gTestRunnerState.inBenchmark; *(id) = BenchmarkStop()) + +// An approximation of how much overhead benchmarks introduce. +#define BENCHMARK_ABS 2 + +// An approximation for what percentage faster a benchmark has to be for +// us to be confident that it's faster than another. +#define BENCHMARK_REL 95 + +#define EXPECT_FASTER(a, b) \ + do \ + { \ + u32 a_ = (a).ticks; u32 b_ = (b).ticks; \ + if (((a_ - BENCHMARK_ABS) * BENCHMARK_REL) >= (b_ * 100)) \ + Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_FASTER(" #a ", " #b ") failed", gTestRunnerState.test->filename, __LINE__); \ + } while (0) + +#define EXPECT_SLOWER(a, b) \ + do \ + { \ + u32 a_ = (a).ticks; u32 b_ = (b).ticks; \ + if ((a_ * 100) <= ((b_ - BENCHMARK_ABS) * BENCHMARK_REL)) \ + Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_SLOWER(" #a ", " #b ") failed", gTestRunnerState.test->filename, __LINE__); \ + } while (0) + #define KNOWN_FAILING \ Test_ExpectedResult(TEST_RESULT_FAIL)