From 60523113a9301e16bae91af61063bd8833926e8c Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sat, 27 Dec 2014 17:06:19 -0500
Subject: [PATCH] armemu: Implement UQADD8, UQADD16, UQSUB16, UQASX, and UQSAX

---
 src/core/arm/interpreter/armemu.cpp  | 65 ++++++++++++++++++++--------
 src/core/arm/interpreter/armsupp.cpp | 41 ++++++++++++++++++
 src/core/arm/skyeye_common/armemu.h  |  4 ++
 3 files changed, 92 insertions(+), 18 deletions(-)

diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index 9b680c1e2..5d26456c7 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -6117,26 +6117,55 @@ L_stm_s_takeabort:
         }
             printf("Unhandled v6 insn: uasx/usax\n");
             break;
-        case 0x66:
-			if ((instr & 0x0FF00FF0) == 0x06600FF0) { //uqsub8
-                u32 rd = (instr >> 12) & 0xF;
-                u32 rm = (instr >> 16) & 0xF;
-                u32 rn = (instr >> 0) & 0xF;
-                u32 subfrom = state->Reg[rm];
-                u32 tosub = state->Reg[rn];
+        case 0x66: // UQADD16, UQASX, UQSAX, UQSUB16, UQADD8, and UQSUB8
+            {
+                const u8 rd_idx = BITS(12, 15);
+                const u8 rm_idx = BITS(0, 3);
+                const u8 rn_idx = BITS(16, 19);
+                const u8 op2    = BITS(5, 7);
+                const u32 rm_val = state->Reg[rm_idx];
+                const u32 rn_val = state->Reg[rn_idx];
 
-                u8 b1 = (u8)((u8)(subfrom)-(u8)(tosub));
-                if (b1 > (u8)(subfrom)) b1 = 0;
-                u8 b2 = (u8)((u8)(subfrom >> 8) - (u8)(tosub >> 8));
-                if (b2 > (u8)(subfrom >> 8)) b2 = 0;
-                u8 b3 = (u8)((u8)(subfrom >> 16) - (u8)(tosub >> 16));
-                if (b3 > (u8)(subfrom >> 16)) b3 = 0;
-                u8 b4 = (u8)((u8)(subfrom >> 24) - (u8)(tosub >> 24));
-                if (b4 > (u8)(subfrom >> 24)) b4 = 0;
-                state->Reg[rd] = (u32)(b1 | b2 << 8 | b3 << 16 | b4 << 24);
+                u16 lo_val = 0;
+                u16 hi_val = 0;
+
+                // UQADD16
+                if (op2 == 0x00) {
+                    lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, rm_val & 0xFFFF);
+                    hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+                }
+                // UQASX
+                else if (op2 == 0x01) {
+                    lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+                    hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF);
+                }
+                // UQSAX
+                else if (op2 == 0x02) {
+                    lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+                    hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF);
+                }
+                // UQSUB16
+                else if (op2 == 0x03) {
+                    lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, rm_val & 0xFFFF);
+                    hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+                }
+                // UQADD8
+                else if (op2 == 0x04) {
+                    lo_val = ARMul_UnsignedSaturatedAdd8(rn_val, rm_val) |
+                             ARMul_UnsignedSaturatedAdd8(rn_val >> 8,  rm_val >> 8) << 8;
+                    hi_val = ARMul_UnsignedSaturatedAdd8(rn_val >> 16, rm_val >> 16) |
+                             ARMul_UnsignedSaturatedAdd8(rn_val >> 24, rm_val >> 24) << 8;
+                }
+                // UQSUB8
+                else {
+                    lo_val = ARMul_UnsignedSaturatedSub8(rn_val, rm_val) |
+                             ARMul_UnsignedSaturatedSub8(rn_val >> 8,  rm_val >> 8) << 8;
+                    hi_val = ARMul_UnsignedSaturatedSub8(rn_val >> 16, rm_val >> 16) |
+                             ARMul_UnsignedSaturatedSub8(rn_val >> 24, rm_val >> 24) << 8;
+                }
+
+                state->Reg[rd_idx] = ((lo_val & 0xFFFF) | hi_val << 16);
                 return 1;
-            } else {
-                printf ("Unhandled v6 insn: uqsub16\n");
             }
             break;
         case 0x67: // UHADD16, UHASX, UHSAX, UHSUB16, UHADD8, and UHSUB8.
diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp
index 6774f8a74..186b1bd73 100644
--- a/src/core/arm/interpreter/armsupp.cpp
+++ b/src/core/arm/interpreter/armsupp.cpp
@@ -469,6 +469,47 @@ ARMul_SubOverflow (ARMul_State * state, ARMword a, ARMword b, ARMword result)
     ASSIGNV (SubOverflow (a, b, result));
 }
 
+/* 8-bit unsigned saturated addition */
+u8 ARMul_UnsignedSaturatedAdd8(u8 left, u8 right)
+{
+    u8 result = left + right;
+
+    if (result < left)
+        result = 0xFF;
+
+    return result;
+}
+
+/* 16-bit unsigned saturated addition */
+u16 ARMul_UnsignedSaturatedAdd16(u16 left, u16 right)
+{
+    u16 result = left + right;
+
+    if (result < left)
+        result = 0xFFFF;
+
+    return result;
+}
+
+/* 8-bit unsigned saturated subtraction */
+u8 ARMul_UnsignedSaturatedSub8(u8 left, u8 right)
+{
+    if (left <= right)
+        return 0;
+
+    return left - right;
+}
+
+/* 16-bit unsigned saturated subtraction */
+u16 ARMul_UnsignedSaturatedSub16(u16 left, u16 right)
+{
+    if (left <= right)
+        return 0;
+
+    return left - right;
+}
+
+
 /* This function does the work of generating the addresses used in an
    LDC instruction.  The code here is always post-indexed, it's up to the
    caller to get the input address correct and to handle base register
diff --git a/src/core/arm/skyeye_common/armemu.h b/src/core/arm/skyeye_common/armemu.h
index 3ea14b5a3..0b87dd39c 100644
--- a/src/core/arm/skyeye_common/armemu.h
+++ b/src/core/arm/skyeye_common/armemu.h
@@ -603,6 +603,10 @@ extern void ARMul_MSRCpsr (ARMul_State *, ARMword, ARMword);
 extern void ARMul_SubOverflow (ARMul_State *, ARMword, ARMword, ARMword);
 extern void ARMul_AddOverflow (ARMul_State *, ARMword, ARMword, ARMword);
 extern void ARMul_AddOverflowQ(ARMul_State*, ARMword, ARMword);
+extern u8 ARMul_UnsignedSaturatedAdd8(u8, u8);
+extern u16 ARMul_UnsignedSaturatedAdd16(u16, u16);
+extern u8 ARMul_UnsignedSaturatedSub8(u8, u8);
+extern u16 ARMul_UnsignedSaturatedSub16(u16, u16);
 extern void ARMul_SubCarry (ARMul_State *, ARMword, ARMword, ARMword);
 extern void ARMul_AddCarry (ARMul_State *, ARMword, ARMword, ARMword);
 extern tdstate ARMul_ThumbDecode (ARMul_State *, ARMword, ARMword, ARMword *);