From e0d6e28b67f5fe15108a885280608ea9e51135f7 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 18 Jul 2016 21:04:39 +0100
Subject: [PATCH] Implement Thumb instructions: BX, BLX (reg), B (T1), B (T2)

---
 src/backend_x64/emit_x64.cpp                  | 280 +++++++++---------
 src/frontend/decoder/thumb16.h                |  10 +-
 .../disassembler/disassembler_thumb.cpp       |  17 ++
 src/frontend/ir/ir_emitter.cpp                |  10 +-
 src/frontend/ir/ir_emitter.h                  |   2 +
 src/frontend/translate/translate_thumb.cpp    |  40 +++
 6 files changed, 217 insertions(+), 142 deletions(-)

diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp
index 506c0847..30b2e181 100644
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@@ -722,6 +722,144 @@ void EmitX64::EmitAddCycles(size_t cycles) {
     code->SUB(64, MDisp(R15, offsetof(JitState, cycles_remaining)), Imm32(static_cast<u32>(cycles)));
 }
 
+static CCFlags EmitCond(Gen::XEmitter* code, Arm::Cond cond) {
+    // TODO: This code is a quick copy-paste-and-quickly-modify job from a previous JIT. Clean this up.
+
+    auto NFlag = [code](X64Reg reg){
+        code->MOV(32, R(reg), MJitStateCpsr());
+        code->SHR(32, R(reg), Imm8(31));
+        code->AND(32, R(reg), Imm32(1));
+    };
+
+    auto ZFlag = [code](X64Reg reg){
+        code->MOV(32, R(reg), MJitStateCpsr());
+        code->SHR(32, R(reg), Imm8(30));
+        code->AND(32, R(reg), Imm32(1));
+    };
+
+    auto CFlag = [code](X64Reg reg){
+        code->MOV(32, R(reg), MJitStateCpsr());
+        code->SHR(32, R(reg), Imm8(29));
+        code->AND(32, R(reg), Imm32(1));
+    };
+
+    auto VFlag = [code](X64Reg reg){
+        code->MOV(32, R(reg), MJitStateCpsr());
+        code->SHR(32, R(reg), Imm8(28));
+        code->AND(32, R(reg), Imm32(1));
+    };
+
+    CCFlags cc;
+
+    switch (cond) {
+        case Arm::Cond::EQ: //z
+            ZFlag(RAX);
+            code->CMP(8, R(RAX), Imm8(0));
+            cc = CC_NE;
+            break;
+        case Arm::Cond::NE: //!z
+            ZFlag(RAX);
+            code->CMP(8, R(RAX), Imm8(0));
+            cc = CC_E;
+            break;
+        case Arm::Cond::CS: //c
+            CFlag(RBX);
+            code->CMP(8, R(RBX), Imm8(0));
+            cc = CC_NE;
+            break;
+        case Arm::Cond::CC: //!c
+            CFlag(RBX);
+            code->CMP(8, R(RBX), Imm8(0));
+            cc = CC_E;
+            break;
+        case Arm::Cond::MI: //n
+            NFlag(RCX);
+            code->CMP(8, R(RCX), Imm8(0));
+            cc = CC_NE;
+            break;
+        case Arm::Cond::PL: //!n
+            NFlag(RCX);
+            code->CMP(8, R(RCX), Imm8(0));
+            cc = CC_E;
+            break;
+        case Arm::Cond::VS: //v
+            VFlag(RDX);
+            code->CMP(8, R(RDX), Imm8(0));
+            cc = CC_NE;
+            break;
+        case Arm::Cond::VC: //!v
+            VFlag(RDX);
+            code->CMP(8, R(RDX), Imm8(0));
+            cc = CC_E;
+            break;
+        case Arm::Cond::HI: { //c & !z
+            const X64Reg tmp = RSI;
+            ZFlag(RAX);
+            code->MOVZX(64, 8, tmp, R(RAX));
+            CFlag(RBX);
+            code->CMP(8, R(RBX), R(tmp));
+            cc = CC_A;
+            break;
+        }
+        case Arm::Cond::LS: { //!c | z
+            const X64Reg tmp = RSI;
+            ZFlag(RAX);
+            code->MOVZX(64, 8, tmp, R(RAX));
+            CFlag(RBX);
+            code->CMP(8, R(RBX), R(tmp));
+            cc = CC_BE;
+            break;
+        }
+        case Arm::Cond::GE: { // n == v
+            const X64Reg tmp = RSI;
+            VFlag(RDX);
+            code->MOVZX(64, 8, tmp, R(RDX));
+            NFlag(RCX);
+            code->CMP(8, R(RCX), R(tmp));
+            cc = CC_E;
+            break;
+        }
+        case Arm::Cond::LT: { // n != v
+            const X64Reg tmp = RSI;
+            VFlag(RDX);
+            code->MOVZX(64, 8, tmp, R(RDX));
+            NFlag(RCX);
+            code->CMP(8, R(RCX), R(tmp));
+            cc = CC_NE;
+            break;
+        }
+        case Arm::Cond::GT: { // !z & (n == v)
+            const X64Reg tmp = RSI;
+            NFlag(RCX);
+            code->MOVZX(64, 8, tmp, R(RCX));
+            VFlag(RDX);
+            code->XOR(8, R(tmp), R(RDX));
+            ZFlag(RAX);
+            code->OR(8, R(tmp), R(RAX));
+            code->TEST(8, R(tmp), R(tmp));
+            cc = CC_Z;
+            break;
+        }
+        case Arm::Cond::LE: { // z | (n != v)
+            X64Reg tmp = RSI;
+            NFlag(RCX);
+            code->MOVZX(64, 8, tmp, R(RCX));
+            VFlag(RDX);
+            code->XOR(8, R(tmp), R(RDX));
+            ZFlag(RAX);
+            code->OR(8, R(tmp), R(RAX));
+            code->TEST(8, R(tmp), R(tmp));
+            cc = CC_NZ;
+            break;
+        }
+        default:
+            ASSERT_MSG(0, "Unknown cond %zu", static_cast<size_t>(cond));
+            break;
+    }
+
+    return cc;
+}
+
 void EmitX64::EmitCondPrelude(Arm::Cond cond,
                               boost::optional<Arm::LocationDescriptor> cond_failed,
                               Arm::LocationDescriptor initial_location) {
@@ -732,142 +870,10 @@ void EmitX64::EmitCondPrelude(Arm::Cond cond,
 
     ASSERT(cond_failed.is_initialized());
 
-    // TODO: This code is a quick copy-paste-and-quickly-modify job from a previous JIT. Clean this up.
-
-    auto NFlag = [this](X64Reg reg){
-        this->code->MOV(32, R(reg), MJitStateCpsr());
-        this->code->SHR(32, R(reg), Imm8(31));
-        this->code->AND(32, R(reg), Imm32(1));
-    };
-
-    auto ZFlag = [this](X64Reg reg){
-        this->code->MOV(32, R(reg), MJitStateCpsr());
-        this->code->SHR(32, R(reg), Imm8(30));
-        this->code->AND(32, R(reg), Imm32(1));
-    };
-
-    auto CFlag = [this](X64Reg reg){
-        this->code->MOV(32, R(reg), MJitStateCpsr());
-        this->code->SHR(32, R(reg), Imm8(29));
-        this->code->AND(32, R(reg), Imm32(1));
-    };
-
-    auto VFlag = [this](X64Reg reg){
-        this->code->MOV(32, R(reg), MJitStateCpsr());
-        this->code->SHR(32, R(reg), Imm8(28));
-        this->code->AND(32, R(reg), Imm32(1));
-    };
-
-    CCFlags cc;
-
-    switch (cond) {
-    case Arm::Cond::EQ: //z
-        ZFlag(RAX);
-        code->CMP(8, R(RAX), Imm8(0));
-        cc = CC_NE;
-        break;
-    case Arm::Cond::NE: //!z
-        ZFlag(RAX);
-        code->CMP(8, R(RAX), Imm8(0));
-        cc = CC_E;
-        break;
-    case Arm::Cond::CS: //c
-        CFlag(RBX);
-        code->CMP(8, R(RBX), Imm8(0));
-        cc = CC_NE;
-        break;
-    case Arm::Cond::CC: //!c
-        CFlag(RBX);
-        code->CMP(8, R(RBX), Imm8(0));
-        cc = CC_E;
-        break;
-    case Arm::Cond::MI: //n
-        NFlag(RCX);
-        code->CMP(8, R(RCX), Imm8(0));
-        cc = CC_NE;
-        break;
-    case Arm::Cond::PL: //!n
-        NFlag(RCX);
-        code->CMP(8, R(RCX), Imm8(0));
-        cc = CC_E;
-        break;
-    case Arm::Cond::VS: //v
-        VFlag(RDX);
-        code->CMP(8, R(RDX), Imm8(0));
-        cc = CC_NE;
-        break;
-    case Arm::Cond::VC: //!v
-        VFlag(RDX);
-        code->CMP(8, R(RDX), Imm8(0));
-        cc = CC_E;
-        break;
-    case Arm::Cond::HI: { //c & !z
-        const X64Reg tmp = RSI;
-        ZFlag(RAX);
-        code->MOVZX(64, 8, tmp, R(RAX));
-        CFlag(RBX);
-        code->CMP(8, R(RBX), R(tmp));
-        cc = CC_A;
-        break;
-    }
-    case Arm::Cond::LS: { //!c | z
-        const X64Reg tmp = RSI;
-        ZFlag(RAX);
-        code->MOVZX(64, 8, tmp, R(RAX));
-        CFlag(RBX);
-        code->CMP(8, R(RBX), R(tmp));
-        cc = CC_BE;
-        break;
-    }
-    case Arm::Cond::GE: { // n == v
-        const X64Reg tmp = RSI;
-        VFlag(RDX);
-        code->MOVZX(64, 8, tmp, R(RDX));
-        NFlag(RCX);
-        code->CMP(8, R(RCX), R(tmp));
-        cc = CC_E;
-        break;
-    }
-    case Arm::Cond::LT: { // n != v
-        const X64Reg tmp = RSI;
-        VFlag(RDX);
-        code->MOVZX(64, 8, tmp, R(RDX));
-        NFlag(RCX);
-        code->CMP(8, R(RCX), R(tmp));
-        cc = CC_NE;
-        break;
-    }
-    case Arm::Cond::GT: { // !z & (n == v)
-        const X64Reg tmp = RSI;
-        NFlag(RCX);
-        code->MOVZX(64, 8, tmp, R(RCX));
-        VFlag(RDX);
-        code->XOR(8, R(tmp), R(RDX));
-        ZFlag(RAX);
-        code->OR(8, R(tmp), R(RAX));
-        code->TEST(8, R(tmp), R(tmp));
-        cc = CC_Z;
-        break;
-    }
-    case Arm::Cond::LE: { // z | (n != v)
-        X64Reg tmp = RSI;
-        NFlag(RCX);
-        code->MOVZX(64, 8, tmp, R(RCX));
-        VFlag(RDX);
-        code->XOR(8, R(tmp), R(RDX));
-        ZFlag(RAX);
-        code->OR(8, R(tmp), R(RAX));
-        code->TEST(8, R(tmp), R(tmp));
-        cc = CC_NZ;
-        break;
-    }
-    default:
-        ASSERT_MSG(0, "Unknown cond %zu", static_cast<size_t>(cond));
-        break;
-    }
+    CCFlags cc = EmitCond(code, cond);
 
     // TODO: Improve, maybe.
-    auto fixup = code->J_CC(cc, true);
+    auto fixup = code->J_CC(cc);
     EmitAddCycles(1); // TODO: Proper cycle count
     EmitTerminalLinkBlock(IR::Term::LinkBlock{cond_failed.get()}, initial_location);
     code->SetJumpTarget(fixup);
@@ -932,7 +938,11 @@ void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescript
 }
 
 void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) {
-    ASSERT_MSG(0, "Unimplemented");
+    CCFlags cc = EmitCond(code, terminal.if_);
+    auto fixup = code->J_CC(cc);
+    EmitTerminal(terminal.else_, initial_location);
+    code->SetJumpTarget(fixup);
+    EmitTerminal(terminal.then_, initial_location);
 }
 
 void EmitX64::ClearCache() {
diff --git a/src/frontend/decoder/thumb16.h b/src/frontend/decoder/thumb16.h
index 76d0385d..a0f1dbe9 100644
--- a/src/frontend/decoder/thumb16.h
+++ b/src/frontend/decoder/thumb16.h
@@ -141,12 +141,12 @@ boost::optional<const Thumb16Matcher<V>&> DecodeThumb16(u16 instruction) {
         INST(&V::thumb16_LDMIA,          "LDMIA",                    "11001nnnxxxxxxxx"),
 
         // Branch instructions
-        //INST(&V::thumb16_BX,             "BX (reg)",                 "010001110mmmm000"), // v4T
-        //INST(&V::thumb16_BLX,            "BLX (reg)",                "010001111mmmm000"), // v5T
+        INST(&V::thumb16_BX,             "BX",                       "010001110mmmm000"), // v4T
+        INST(&V::thumb16_BLX_reg,        "BLX (reg)",                "010001111mmmm000"), // v5T
         INST(&V::thumb16_UDF,            "UDF",                      "11011110--------"),
         INST(&V::thumb16_SVC,            "SVC",                      "11011111xxxxxxxx"),
-        //INST(&V::thumb16_B_cond,         "B (cond)",                 "1101ccccxxxxxxxx"),
-        //INST(&V::thumb16_B_imm,          "B (imm)",                  "11100xxxxxxxxxxx"),
+        INST(&V::thumb16_B_t1,           "B (T1)",                   "1101ccccvvvvvvvv"),
+        INST(&V::thumb16_B_t2,           "B (T2)",                   "11100vvvvvvvvvvv"),
         //INST(&V::thumb16_BLX_suffix,     "BLX (imm, suffix)",        "11101xxxxxxxxxx0"),
         //INST(&V::thumb16_BLX_prefix,     "BL/BLX (imm, prefix)",     "11110xxxxxxxxxxx"),
         //INST(&V::thumb16_BL_suffix,      "BL (imm, suffix)",         "11111xxxxxxxxxxx"),
@@ -157,8 +157,6 @@ boost::optional<const Thumb16Matcher<V>&> DecodeThumb16(u16 instruction) {
 
     const auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); };
 
-    assert(std::count_if(table.begin(), table.end(), matches_instruction) <= 1);
-
     auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
     return iter != table.end() ? boost::make_optional<const Thumb16Matcher<V>&>(*iter) : boost::none;
 }
diff --git a/src/frontend/disassembler/disassembler_thumb.cpp b/src/frontend/disassembler/disassembler_thumb.cpp
index 8a55a722..fad47611 100644
--- a/src/frontend/disassembler/disassembler_thumb.cpp
+++ b/src/frontend/disassembler/disassembler_thumb.cpp
@@ -380,6 +380,13 @@ public:
         return Common::StringFromFormat("ldm %s%s, %s", RegStr(n), write_back ? "!" : "", RegListStr(reg_list).c_str());
     }
 
+    std::string thumb16_BX(Reg m) {
+        return Common::StringFromFormat("bx %s", RegStr(m));
+    }
+
+    std::string thumb16_BLX_reg(Reg m) {
+        return Common::StringFromFormat("blx %s", RegStr(m));
+    }
 
     std::string thumb16_UDF() {
         return Common::StringFromFormat("udf");
@@ -388,6 +395,16 @@ public:
     std::string thumb16_SVC(Imm8 imm8) {
         return Common::StringFromFormat("svc #%u", imm8);
     }
+
+    std::string thumb16_B_t1(Cond cond, Imm8 imm8) {
+        s32 imm32 = Common::SignExtend<9, s32>(imm8 << 1) + 4;
+        return Common::StringFromFormat("b%s %s#%u", CondStr(cond), SignStr(imm32), abs(imm32));
+    }
+
+    std::string thumb16_B_t2(Imm11 imm11) {
+        s32 imm32 = Common::SignExtend<12, s32>(imm11 << 1) + 4;
+        return Common::StringFromFormat("b %s#%u", SignStr(imm32), abs(imm32));
+    }
 };
 
 std::string DisassembleThumb16(u16 instruction) {
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 7bf8edad..0ef1c5cc 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -57,6 +57,10 @@ void IREmitter::SetRegister(const Reg reg, IR::ValuePtr value) {
 void IREmitter::ALUWritePC(IR::ValuePtr value) {
     // This behaviour is ARM version-dependent.
     // The below implementation is for ARMv6k
+    BranchWritePC(value);
+}
+
+void IREmitter::BranchWritePC(IR::ValuePtr value) {
     if (!current_location.TFlag) {
         auto new_pc = And(value, Imm32(0xFFFFFFFC));
         Inst(IR::Opcode::SetRegister, { RegRef(Reg::PC), new_pc });
@@ -66,10 +70,14 @@ void IREmitter::ALUWritePC(IR::ValuePtr value) {
     }
 }
 
+void IREmitter::BXWritePC(IR::ValuePtr value) {
+    Inst(IR::Opcode::BXWritePC, {value});
+}
+
 void IREmitter::LoadWritePC(IR::ValuePtr value) {
     // This behaviour is ARM version-dependent.
     // The below implementation is for ARMv6k
-    Inst(IR::Opcode::BXWritePC, {value});
+    BXWritePC(value);
 }
 
 void IREmitter::CallSupervisor(IR::ValuePtr value) {
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 7e3874c6..16942c6c 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -43,6 +43,8 @@ public:
     void SetRegister(const Reg dest_reg, IR::ValuePtr value);
 
     void ALUWritePC(IR::ValuePtr value);
+    void BranchWritePC(IR::ValuePtr value);
+    void BXWritePC(IR::ValuePtr value);
     void LoadWritePC(IR::ValuePtr value);
     void CallSupervisor(IR::ValuePtr value);
 
diff --git a/src/frontend/translate/translate_thumb.cpp b/src/frontend/translate/translate_thumb.cpp
index 98cd1ef2..91f4a0e5 100644
--- a/src/frontend/translate/translate_thumb.cpp
+++ b/src/frontend/translate/translate_thumb.cpp
@@ -740,10 +740,50 @@ struct ThumbTranslatorVisitor final {
         return InterpretThisInstruction();
     }
 
+    bool thumb16_BX(Reg m) {
+        // BX <Rm>
+        ir.BXWritePC(ir.GetRegister(m));
+        ir.SetTerm(IR::Term::ReturnToDispatch{});
+        return false;
+    }
+
+    bool thumb16_BLX_reg(Reg m) {
+        // BLX <Rm>
+        ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.arm_pc + 2) | 1));
+        ir.BXWritePC(ir.GetRegister(m));
+        // TODO(optimization): Possible push RSB location
+        ir.SetTerm(IR::Term::ReturnToDispatch{});
+        return false;
+    }
+
     bool thumb16_SVC(Imm8 imm8) {
         u32 imm32 = imm8;
         // SVC #<imm8>
         ir.CallSupervisor(ir.Imm32(imm32));
+        ir.SetTerm(IR::Term::ReturnToDispatch{});
+        return false;
+    }
+
+    bool thumb16_B_t1(Cond cond, Imm8 imm8) {
+        s32 imm32 = Common::SignExtend<9, s32>(imm8 << 1) + 4;
+        if (cond == Cond::AL) {
+            return thumb16_UDF();
+        }
+        // B<cond> <label>
+        auto then_location = ir.current_location;
+        then_location.arm_pc += imm32;
+        auto else_location = ir.current_location;
+        else_location.arm_pc += 2;
+        ir.SetTerm(IR::Term::If{cond, IR::Term::LinkBlock{then_location}, IR::Term::LinkBlock{else_location}});
+        return false;
+    }
+
+    bool thumb16_B_t2(Imm11 imm11) {
+        s32 imm32 = Common::SignExtend<12, s32>(imm11 << 1) + 4;
+        // B <label>
+        auto next_location = ir.current_location;
+        next_location.arm_pc += imm32;
+        ir.SetTerm(IR::Term::LinkBlock{next_location});
         return false;
     }
 };