From 477d616f7df3b609afcb67d69d9570098cc00029 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 28 Jan 2019 18:11:23 -0300
Subject: [PATCH] shader_ir: Unify constant buffer offset values

Constant buffer values on the shader IR were using different offsets if
the access direct or indirect. cbuf34 has a non-multiplied offset while
cbuf36 does. On shader decoding this commit multiplies it by four on
cbuf34 queries.
---
 src/video_core/engines/shader_bytecode.h                | 8 ++++++++
 src/video_core/renderer_opengl/gl_rasterizer.cpp        | 2 +-
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 5 +++--
 src/video_core/shader/decode/arithmetic.cpp             | 2 +-
 src/video_core/shader/decode/arithmetic_half.cpp        | 2 +-
 src/video_core/shader/decode/arithmetic_integer.cpp     | 2 +-
 src/video_core/shader/decode/conversion.cpp             | 6 +++---
 src/video_core/shader/decode/ffma.cpp                   | 4 ++--
 src/video_core/shader/decode/float_set.cpp              | 2 +-
 src/video_core/shader/decode/float_set_predicate.cpp    | 2 +-
 src/video_core/shader/decode/hfma2.cpp                  | 7 ++++---
 src/video_core/shader/decode/integer_set.cpp            | 2 +-
 src/video_core/shader/decode/integer_set_predicate.cpp  | 2 +-
 src/video_core/shader/decode/memory.cpp                 | 6 +++---
 src/video_core/shader/decode/shift.cpp                  | 2 +-
 src/video_core/shader/decode/xmad.cpp                   | 5 +++--
 src/video_core/shader/shader_ir.h                       | 2 +-
 17 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9989825f8..713b01c9f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1248,11 +1248,19 @@ union Instruction {
     union {
         BitField<20, 14, u64> offset;
         BitField<34, 5, u64> index;
+
+        u64 GetOffset() const {
+            return offset * 4;
+        }
     } cbuf34;
 
     union {
         BitField<20, 16, s64> offset;
         BitField<36, 5, u64> index;
+
+        s64 GetOffset() const {
+            return offset;
+        }
     } cbuf36;
 
     // Unsure about the size of this one.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ee313cb2f..aed6843d4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -957,7 +957,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
             }
         } else {
             // Buffer is accessed directly, upload just what we use
-            size = used_buffer.GetSize() * sizeof(float);
+            size = used_buffer.GetSize();
         }
 
         // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 004245431..36035d0d2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -543,8 +543,9 @@ private:
             if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
                 // Direct access
                 const u32 offset_imm = immediate->GetValue();
-                return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / 4,
-                                   offset_imm % 4);
+                ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
+                return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+                                   offset_imm / (4 * 4), (offset_imm / 4) % 4);
 
             } else if (std::holds_alternative<OperationNode>(*offset)) {
                 // Indirect access
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index e7847f614..51b8d55d4 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
 
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a237dcb92..37eef2bf2 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -35,7 +35,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 p
         switch (opcode->get().GetId()) {
         case OpCode::Id::HADD2_C:
         case OpCode::Id::HMUL2_C:
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         case OpCode::Id::HADD2_R:
         case OpCode::Id::HMUL2_R:
             return GetRegister(instr.gpr20);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 4a8cc1a1c..cc9a76a19 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -26,7 +26,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
 
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index ee18d3a99..728a393a1 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
             if (instr.is_b_gpr) {
                 return GetRegister(instr.gpr20);
             } else {
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
             }
         }();
         const bool input_signed = instr.conversion.is_input_signed;
@@ -72,7 +72,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
             if (instr.is_b_gpr) {
                 return GetRegister(instr.gpr20);
             } else {
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
             }
         }();
 
@@ -110,7 +110,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
             if (instr.is_b_gpr) {
                 return GetRegister(instr.gpr20);
             } else {
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
             }
         }();
 
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index be8dc2230..52f39d3ff 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -27,14 +27,14 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
         switch (opcode->get().GetId()) {
         case OpCode::Id::FFMA_CR: {
-            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                     GetRegister(instr.gpr39)};
         }
         case OpCode::Id::FFMA_RR:
             return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
         case OpCode::Id::FFMA_RC: {
             return {GetRegister(instr.gpr39),
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
         }
         case OpCode::Id::FFMA_IMM:
             return {GetImmediate19(instr), GetRegister(instr.gpr39)};
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index ba846f1bd..9f9da2278 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
 
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index e88b04d18..dd3aef6f2 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u3
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
     op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 4a6b945f9..43a0a9e10 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -39,13 +39,14 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
             neg_b = instr.hfma2.negate_b;
             neg_c = instr.hfma2.negate_c;
             return {instr.hfma2.saturate, instr.hfma2.type_b,
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
-                    GetRegister(instr.gpr39)};
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
         case OpCode::Id::HFMA2_RC:
             neg_b = instr.hfma2.negate_b;
             neg_c = instr.hfma2.negate_c;
             return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
-                    instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+                    instr.hfma2.type_b,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
         case OpCode::Id::HFMA2_RR:
             neg_b = instr.hfma2.rr.negate_b;
             neg_c = instr.hfma2.rr.negate_c;
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 85e67b03b..16eb3985f 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
 
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index c8b105a08..daf97174b 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code,
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
 
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 04cb386b7..4d075f088 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -80,7 +80,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         Node index = GetRegister(instr.gpr8);
 
         const Node op_a =
-            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index);
+            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
 
         switch (instr.ld_c.type.Value()) {
         case Tegra::Shader::UniformType::Single:
@@ -89,7 +89,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
 
         case Tegra::Shader::UniformType::Double: {
             const Node op_b =
-                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
+                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
 
             SetTemporal(bb, 0, op_a);
             SetTemporal(bb, 1, op_b);
@@ -142,7 +142,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         ASSERT(cbuf != nullptr);
         const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
         ASSERT(cbuf_offset_imm != nullptr);
-        const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
+        const auto cbuf_offset = cbuf_offset_imm->GetValue();
 
         bb.push_back(Comment(
             fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 85026bb37..6623f8ff9 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         } else if (instr.is_b_gpr) {
             return GetRegister(instr.gpr20);
         } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         }
     }();
 
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 0cd9cd1cc..9cb864500 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -32,13 +32,14 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
         switch (opcode->get().GetId()) {
         case OpCode::Id::XMAD_CR:
-            return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+            return {instr.xmad.merge_56,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                     GetRegister(instr.gpr39)};
         case OpCode::Id::XMAD_RR:
             return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
         case OpCode::Id::XMAD_RC:
             return {false, GetRegister(instr.gpr39),
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
         case OpCode::Id::XMAD_IMM:
             return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
                     GetRegister(instr.gpr39)};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index c4ecb2e3c..6e42e3dfb 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -249,7 +249,7 @@ public:
     }
 
     u32 GetSize() const {
-        return max_offset + 1;
+        return max_offset + sizeof(float);
     }
 
 private: