From 4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e Mon Sep 17 00:00:00 2001
From: Jannik Vogel <email@jannikvogel.de>
Date: Fri, 13 May 2016 08:46:14 +0200
Subject: [PATCH] Refactor access to state in shader-jit

---
 src/video_core/shader/shader.cpp         |  3 ++-
 src/video_core/shader/shader.h           | 25 +++++++++++++++---
 src/video_core/shader/shader_jit_x64.cpp | 32 ++++++++++++------------
 src/video_core/shader/shader_jit_x64.h   |  6 ++---
 4 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index e93a9d92a..d36d3d78e 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
 
 OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
     auto& config = g_state.regs.vs;
+    auto& setup = g_state.vs;
 
     MICROPROFILE_SCOPE(GPU_Shader);
 
@@ -81,7 +82,7 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
 
 #ifdef ARCHITECTURE_x86_64
     if (VideoCore::g_shader_jit_enabled)
-        jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
+        jit_shader->Run(setup, state, config.main_offset);
     else
         RunInterpreter(state);
 #else
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 983e4a967..84898f21c 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -283,10 +283,10 @@ struct UnitState {
     static size_t InputOffset(const SourceRegister& reg) {
         switch (reg.GetRegisterType()) {
         case RegisterType::Input:
-            return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+            return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 
         case RegisterType::Temporary:
-            return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+            return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 
         default:
             UNREACHABLE();
@@ -297,10 +297,10 @@ struct UnitState {
     static size_t OutputOffset(const DestRegister& reg) {
         switch (reg.GetRegisterType()) {
         case RegisterType::Output:
-            return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+            return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 
         case RegisterType::Temporary:
-            return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+            return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 
         default:
             UNREACHABLE();
@@ -323,6 +323,23 @@ struct ShaderSetup {
         std::array<Math::Vec4<u8>, 4> i;
     } uniforms;
 
+    static size_t UniformOffset(RegisterType type, unsigned index) {
+        switch (type) {
+        case RegisterType::FloatUniform:
+            return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
+
+        case RegisterType::BoolUniform:
+            return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
+
+        case RegisterType::IntUniform:
+            return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
+
+        default:
+            UNREACHABLE();
+            return 0;
+        }
+    }
+
     std::array<u32, 1024> program_code;
     std::array<u32, 1024> swizzle_data;
 
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 99f6c51eb..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
 // purposes, as documented below:
 
 /// Pointer to the uniform memory
-static const X64Reg UNIFORMS = R9;
+static const X64Reg SETUP = R9;
 /// The two 32-bit VS address offset registers set by the MOVA instruction
 static const X64Reg ADDROFFS_REG_0 = R10;
 static const X64Reg ADDROFFS_REG_1 = R11;
@@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
 /// Result of the previous CMP instruction for the Y-component comparison
 static const X64Reg COND1 = R14;
 /// Pointer to the UnitState instance for the current VS unit
-static const X64Reg REGISTERS = R15;
+static const X64Reg STATE = R15;
 /// SIMD scratch register
 static const X64Reg SCRATCH = XMM0;
 /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
 // State registers that must not be modified by external functions calls
 // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 static const BitSet32 persistent_regs = {
-    UNIFORMS, REGISTERS, // Pointers to register blocks
+    SETUP, STATE, // Pointers to register blocks
     ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
     ONE+16, NEGBIT+16, // Constants
 };
@@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
     size_t src_offset;
 
     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
-        src_ptr = UNIFORMS;
-        src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
+        src_ptr = SETUP;
+        src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
     } else {
-        src_ptr = REGISTERS;
+        src_ptr = STATE;
         src_offset = UnitState<false>::InputOffset(src_reg);
     }
 
@@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
     // If all components are enabled, write the result to the destination register
     if (swiz.dest_mask == NO_DEST_REG_MASK) {
         // Store dest back to memory
-        MOVAPS(MDisp(REGISTERS, dest_offset_disp), src);
+        MOVAPS(MDisp(STATE, dest_offset_disp), src);
 
     } else {
         // Not all components are enabled, so mask the result when storing to the destination register...
-        MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp));
+        MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
 
         if (Common::GetCPUCaps().sse4_1) {
             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
         }
 
         // Store dest back to memory
-        MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH);
+        MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
     }
 }
 
@@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
 }
 
 void JitShader::Compile_UniformCondition(Instruction instr) {
-    int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
-    CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
+    int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
+    CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
 }
 
 BitSet32 JitShader::PersistentCallerSavedRegs() {
@@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
 
     looping = true;
 
-    int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>));
-    MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset));
+    int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
+    MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
     MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
     SHR(32, R(LOOPCOUNT_REG), Imm8(8));
     AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -826,8 +826,8 @@ void JitShader::Compile() {
     // The stack pointer is 8 modulo 16 at the entry of a procedure
     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
 
-    MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
-    MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
+    MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
+    MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
 
     // Zero address/loop  registers
     XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -845,7 +845,7 @@ void JitShader::Compile() {
     MOVAPS(NEGBIT, MatR(RAX));
 
     // Jump to start of the shader program
-    JMPptr(R(ABI_PARAM2));
+    JMPptr(R(ABI_PARAM3));
 
     // Compile entire program
     Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 30aa7ff30..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
 public:
     JitShader();
 
-    void Run(void* registers, unsigned offset) const {
-        program(registers, code_ptr[offset]);
+    void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
+        program(&setup, &state, code_ptr[offset]);
     }
 
     void Compile();
@@ -117,7 +117,7 @@ private:
     /// Branches that need to be fixed up once the entire shader program is compiled
     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
 
-    using CompiledShader = void(void* registers, const u8* start_addr);
+    using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
     CompiledShader* program = nullptr;
 };