Add BENCHMARK to test runner

Counts the time spent in the block in arbitrary time units (64-cycles
currently). If the block takes more than ~4 million cycles (~0.25s) the
benchmark will wrap around.

Note that the time can be affected by the timeout IRQs, and should only
be taken as a loose indication of relative performance.
This commit is contained in:
Martin Griffin 2023-07-24 12:51:43 +01:00
parent cd5f997ee3
commit 3f5daa8cf1
3 changed files with 100 additions and 0 deletions

View File

@ -664,6 +664,7 @@
#define TIMER_64CLK 0x01
#define TIMER_256CLK 0x02
#define TIMER_1024CLK 0x03
#define TIMER_COUNTUP 0x04
#define TIMER_INTR_ENABLE 0x40
#define TIMER_ENABLE 0x80

View File

@ -151,3 +151,60 @@ TEST("RandomElement generates a uniform distribution")
EXPECT_LT(error, UQ_4_12(0.025));
}
TEST("RandomUniform mul-based faster than mod-based (compile-time)")
{
u32 i;
struct Benchmark mulBenchmark, modBenchmark;
u32 mulSum = 0, modSum = 0;
BENCHMARK(&mulBenchmark)
{
mulSum += RandomUniformDefault(RNG_NONE, 0, 1);
mulSum += RandomUniformDefault(RNG_NONE, 0, 2);
mulSum += RandomUniformDefault(RNG_NONE, 0, 3);
mulSum += RandomUniformDefault(RNG_NONE, 0, 4);
}
BENCHMARK(&modBenchmark)
{
modSum += Random() % 2;
modSum += Random() % 3;
modSum += Random() % 4;
modSum += Random() % 5;
}
EXPECT_FASTER(mulBenchmark, modBenchmark);
// Reference mulSum/modSum to prevent optimization.
// These numbers are different because multiplication and modulus
// have subtly different biases (so subtle that it's irrelevant for
// our purposes).
EXPECT_EQ(mulSum, 3);
EXPECT_EQ(modSum, 4);
}
TEST("RandomUniform mul-based faster than mod-based (run-time)")
{
u32 i;
struct Benchmark mulBenchmark, modBenchmark;
u32 mulSum = 0, modSum = 0;
BENCHMARK(&mulBenchmark)
{
for (i = 0; i < 32; i++)
mulSum += RandomUniformDefault(RNG_NONE, 0, i);
}
BENCHMARK(&modBenchmark)
{
for (i = 0; i < 32; i++)
modSum += Random() % (i + 1);
}
EXPECT_FASTER(mulBenchmark, modBenchmark);
// Reference mulSum/modSum to prevent optimization.
EXPECT_EQ(mulSum, 232);
EXPECT_EQ(modSum, 249);
}

View File

@ -46,6 +46,7 @@ struct TestRunnerState
u8 result;
u8 expectedResult;
bool8 expectLeaks:1;
bool8 inBenchmark:1;
u32 timeoutSeconds;
};
@ -158,6 +159,47 @@ s32 MgbaPrintf_(const char *fmt, ...);
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_GE(%d, %d) failed", gTestRunnerState.test->filename, __LINE__, _a, _b); \
} while (0)
struct Benchmark { u32 ticks; };
static inline void BenchmarkStart(void)
{
gTestRunnerState.inBenchmark = TRUE;
REG_TM3CNT = (TIMER_ENABLE | TIMER_64CLK) << 16;
}
static inline struct Benchmark BenchmarkStop(void)
{
REG_TM3CNT_H = 0;
gTestRunnerState.inBenchmark = FALSE;
return (struct Benchmark) { REG_TM3CNT_L };
}
#define BENCHMARK(id) \
for (BenchmarkStart(); gTestRunnerState.inBenchmark; *(id) = BenchmarkStop())
// An approximation of how much overhead benchmarks introduce.
#define BENCHMARK_ABS 2
// An approximation for what percentage faster a benchmark has to be for
// us to be confident that it's faster than another.
#define BENCHMARK_REL 95
#define EXPECT_FASTER(a, b) \
do \
{ \
u32 a_ = (a).ticks; u32 b_ = (b).ticks; \
if (((a_ - BENCHMARK_ABS) * BENCHMARK_REL) >= (b_ * 100)) \
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_FASTER(" #a ", " #b ") failed", gTestRunnerState.test->filename, __LINE__); \
} while (0)
#define EXPECT_SLOWER(a, b) \
do \
{ \
u32 a_ = (a).ticks; u32 b_ = (b).ticks; \
if ((a_ * 100) <= ((b_ - BENCHMARK_ABS) * BENCHMARK_REL)) \
Test_ExitWithResult(TEST_RESULT_FAIL, "%s:%d: EXPECT_SLOWER(" #a ", " #b ") failed", gTestRunnerState.test->filename, __LINE__); \
} while (0)
#define KNOWN_FAILING \
Test_ExpectedResult(TEST_RESULT_FAIL)