From 5711e6241926561648ba32494557c1af075d48b0 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 7 Jul 2016 17:53:09 +0800
Subject: [PATCH] Implement terminal instructions

---
 src/backend_x64/emit_x64.cpp          | 100 +++++++++++++++++++++-----
 src/backend_x64/emit_x64.h            |  14 +++-
 src/backend_x64/interface_x64.cpp     |   4 +-
 src/frontend/decoder/thumb1.h         |   2 +-
 src/frontend/ir/ir.h                  |  79 ++++++++++++++++++++
 src/frontend/ir_emitter.cpp           |   8 ++-
 src/frontend/ir_emitter.h             |   2 +
 src/frontend/translate_thumb.cpp      |  14 ++--
 tests/CMakeLists.txt                  |   2 +-
 tests/arm/fuzz_thumb.cpp              |  44 +++++++++++-
 tests/arm/test_thumb_instructions.cpp |  36 ++++++++--
 11 files changed, 269 insertions(+), 36 deletions(-)

diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp
index 4b997e7c..369c8d3b 100644
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@@ -8,6 +8,7 @@
 #include <unordered_map>
 
 #include "backend_x64/emit_x64.h"
+#include "common/x64/abi.h"
 #include "common/x64/emitter.h"
 #include "frontend/arm_types.h"
 
@@ -20,6 +21,14 @@ using namespace Gen;
 namespace Dynarmic {
 namespace BackendX64 {
 
+static OpArg MJitStateReg(Arm::Reg reg) {
+    return MDisp(R15, offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg));
+}
+
+static OpArg MJitStateCpsr() {
+    return MDisp(R15, offsetof(JitState, Cpsr));
+}
+
 // Mapping from opcode to Emit* member function.
 const static std::map<IR::Opcode, void (EmitX64::*)(IR::Value*)> emit_fns {
 #define OPCODE(name, type, ...) { IR::Opcode::name, &EmitX64::Emit##name },
@@ -51,7 +60,7 @@ CodePtr EmitX64::Emit(Arm::LocationDescriptor descriptor, Dynarmic::IR::Block bl
     }
 
     EmitAddCycles(block.cycle_count);
-    EmitReturnToDispatch();
+    EmitTerminal(block.terminal, block.location);
 
     return code_ptr;
 }
@@ -86,7 +95,7 @@ void EmitX64::EmitGetRegister(IR::Value* value_) {
 
     X64Reg result = reg_alloc.DefRegister(value);
 
-    code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Reg) + static_cast<size_t>(regref->value) * sizeof(u32)));
+    code->MOV(32, R(result), MJitStateReg(regref->value));
 }
 
 void EmitX64::EmitSetRegister(IR::Value* value_) {
@@ -95,7 +104,7 @@ void EmitX64::EmitSetRegister(IR::Value* value_) {
 
     X64Reg to_store = reg_alloc.UseRegister(value->GetArg(1).get());
 
-    code->MOV(32, MDisp(R15, offsetof(JitState, Reg) + static_cast<size_t>(regref->value) * sizeof(u32)), R(to_store));
+    code->MOV(32, MJitStateReg(regref->value), R(to_store));
 }
 
 void EmitX64::EmitGetNFlag(IR::Value* value_) {
@@ -105,7 +114,7 @@ void EmitX64::EmitGetNFlag(IR::Value* value_) {
 
     // TODO: Flag optimization
 
-    code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
+    code->MOV(32, R(result), MJitStateCpsr());
     code->SHR(32, R(result), Imm8(31));
 }
 
@@ -117,8 +126,8 @@ void EmitX64::EmitSetNFlag(IR::Value* value_) {
     // TODO: Flag optimization
 
     code->SHL(32, R(to_store), Imm8(31));
-    code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 31)));
-    code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
+    code->AND(32, MJitStateCpsr(), Imm32(~static_cast<u32>(1 << 31)));
+    code->OR(32, MJitStateCpsr(), R(to_store));
 }
 
 void EmitX64::EmitGetZFlag(IR::Value* value_) {
@@ -128,7 +137,7 @@ void EmitX64::EmitGetZFlag(IR::Value* value_) {
 
     // TODO: Flag optimization
 
-    code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
+    code->MOV(32, R(result), MJitStateCpsr());
     code->SHR(32, R(result), Imm8(30));
     code->AND(32, R(result), Imm32(1));
 }
@@ -141,8 +150,8 @@ void EmitX64::EmitSetZFlag(IR::Value* value_) {
     // TODO: Flag optimization
 
     code->SHL(32, R(to_store), Imm8(30));
-    code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 30)));
-    code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
+    code->AND(32, MJitStateCpsr(), Imm32(~static_cast<u32>(1 << 30)));
+    code->OR(32, MJitStateCpsr(), R(to_store));
 }
 
 void EmitX64::EmitGetCFlag(IR::Value* value_) {
@@ -152,7 +161,7 @@ void EmitX64::EmitGetCFlag(IR::Value* value_) {
 
     // TODO: Flag optimization
 
-    code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
+    code->MOV(32, R(result), MJitStateCpsr());
     code->SHR(32, R(result), Imm8(29));
     code->AND(32, R(result), Imm32(1));
 }
@@ -165,8 +174,8 @@ void EmitX64::EmitSetCFlag(IR::Value* value_) {
     // TODO: Flag optimization
 
     code->SHL(32, R(to_store), Imm8(29));
-    code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 29)));
-    code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
+    code->AND(32, MJitStateCpsr(), Imm32(~static_cast<u32>(1 << 29)));
+    code->OR(32, MJitStateCpsr(), R(to_store));
 }
 
 void EmitX64::EmitGetVFlag(IR::Value* value_) {
@@ -176,7 +185,7 @@ void EmitX64::EmitGetVFlag(IR::Value* value_) {
 
     // TODO: Flag optimization
 
-    code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
+    code->MOV(32, R(result), MJitStateCpsr());
     code->SHR(32, R(result), Imm8(28));
     code->AND(32, R(result), Imm32(1));
 }
@@ -189,8 +198,8 @@ void EmitX64::EmitSetVFlag(IR::Value* value_) {
     // TODO: Flag optimization
 
     code->SHL(32, R(to_store), Imm8(28));
-    code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 28)));
-    code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
+    code->AND(32, MJitStateCpsr(), Imm32(~static_cast<u32>(1 << 28)));
+    code->OR(32, MJitStateCpsr(), R(to_store));
 }
 
 void EmitX64::EmitGetCarryFromOp(IR::Value*) {
@@ -385,14 +394,69 @@ void EmitX64::EmitArithmeticShiftRight(IR::Value* value_) {
 
 void EmitX64::EmitAddCycles(size_t cycles) {
     ASSERT(cycles < std::numeric_limits<u32>::max());
-    code->SUB(64, MDisp(R15, offsetof(JitState, cycles_remaining)), Imm32(cycles));
+    code->SUB(64, MDisp(R15, offsetof(JitState, cycles_remaining)), Imm32(static_cast<u32>(cycles)));
 }
 
-void EmitX64::EmitReturnToDispatch() {
-    // TODO: Update cycle counts
+void EmitX64::EmitTerminal(IR::Terminal terminal, Arm::LocationDescriptor initial_location) {
+    switch (terminal.which()) {
+    case 1:
+        EmitTerminalInterpret(boost::get<IR::Term::Interpret>(terminal), initial_location);
+        return;
+    case 2:
+        EmitTerminalReturnToDispatch(boost::get<IR::Term::ReturnToDispatch>(terminal), initial_location);
+        return;
+    case 3:
+        EmitTerminalLinkBlock(boost::get<IR::Term::LinkBlock>(terminal), initial_location);
+        return;
+    case 4:
+        EmitTerminalLinkBlockFast(boost::get<IR::Term::LinkBlockFast>(terminal), initial_location);
+        return;
+    case 5:
+        EmitTerminalPopRSBHint(boost::get<IR::Term::PopRSBHint>(terminal), initial_location);
+        return;
+    case 6:
+        EmitTerminalIf(boost::get<IR::Term::If>(terminal), initial_location);
+        return;
+    default:
+        ASSERT_MSG(0, "Invalid Terminal. Bad programmer.");
+        return;
+    }
+}
 
+void EmitX64::EmitTerminalInterpret(IR::Term::Interpret terminal, Arm::LocationDescriptor initial_location) {
+    ASSERT_MSG(terminal.next.TFlag == initial_location.TFlag, "Unimplemented");
+    ASSERT_MSG(terminal.next.EFlag == initial_location.EFlag, "Unimplemented");
+
+    code->MOV(64, R(ABI_PARAM1), Imm64(terminal.next.arm_pc));
+    code->MOV(64, R(ABI_PARAM2), Imm64(reinterpret_cast<u64>(jit_interface)));
+    code->MOV(32, MJitStateReg(Arm::Reg::PC), R(ABI_PARAM1));
+    code->CALL(reinterpret_cast<void*>(cb.InterpreterFallback));
+    code->JMP(routines->RunCodeReturnAddress(), true); // TODO: Check cycles
+}
+
+void EmitX64::EmitTerminalReturnToDispatch(IR::Term::ReturnToDispatch, Arm::LocationDescriptor initial_location) {
     code->JMP(routines->RunCodeReturnAddress(), true);
 }
 
+void EmitX64::EmitTerminalLinkBlock(IR::Term::LinkBlock terminal, Arm::LocationDescriptor initial_location) {
+    ASSERT_MSG(terminal.next.TFlag == initial_location.TFlag, "Unimplemented");
+    ASSERT_MSG(terminal.next.EFlag == initial_location.EFlag, "Unimplemented");
+
+    code->MOV(32, MJitStateReg(Arm::Reg::PC), Imm32(terminal.next.arm_pc));
+    code->JMP(routines->RunCodeReturnAddress(), true); // TODO: Check cycles, Properly do a link
+}
+
+void EmitX64::EmitTerminalLinkBlockFast(IR::Term::LinkBlockFast terminal, Arm::LocationDescriptor initial_location) {
+    EmitTerminalLinkBlock(IR::Term::LinkBlock{terminal.next}, initial_location); // TODO: Implement
+}
+
+void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescriptor initial_location) {
+    EmitTerminalReturnToDispatch({}, initial_location);  // TODO: Implement RSB
+}
+
+void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) {
+    ASSERT_MSG(0, "Unimplemented");
+}
+
 } // namespace BackendX64
 } // namespace Dynarmic
diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h
index 25d8143b..f4d3a81b 100644
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@@ -20,8 +20,8 @@ namespace BackendX64 {
 
 class EmitX64 final {
 public:
-    EmitX64(Gen::XEmitter* code, Routines* routines, UserCallbacks cb)
-            : reg_alloc(code), code(code), routines(routines), cb(cb) {}
+    EmitX64(Gen::XEmitter* code, Routines* routines, UserCallbacks cb, Jit* jit_interface)
+            : reg_alloc(code), code(code), routines(routines), cb(cb), jit_interface(jit_interface) {}
 
     CodePtr Emit(Arm::LocationDescriptor descriptor, IR::Block ir);
 
@@ -53,7 +53,14 @@ public:
     void EmitArithmeticShiftRight(IR::Value* value);
 
     void EmitAddCycles(size_t cycles);
-    void EmitReturnToDispatch();
+
+    void EmitTerminal(IR::Terminal terminal, Arm::LocationDescriptor initial_location);
+    void EmitTerminalInterpret(IR::Term::Interpret terminal, Arm::LocationDescriptor initial_location);
+    void EmitTerminalReturnToDispatch(IR::Term::ReturnToDispatch terminal, Arm::LocationDescriptor initial_location);
+    void EmitTerminalLinkBlock(IR::Term::LinkBlock terminal, Arm::LocationDescriptor initial_location);
+    void EmitTerminalLinkBlockFast(IR::Term::LinkBlockFast terminal, Arm::LocationDescriptor initial_location);
+    void EmitTerminalPopRSBHint(IR::Term::PopRSBHint terminal, Arm::LocationDescriptor initial_location);
+    void EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location);
 
 private:
     std::set<IR::Value*> inhibit_emission;
@@ -62,6 +69,7 @@ private:
     Gen::XEmitter* code;
     Routines* routines;
     UserCallbacks cb;
+    Jit* jit_interface;
     std::unordered_map<Arm::LocationDescriptor, CodePtr, Arm::LocationDescriptorHash> basic_blocks;
 };
 
diff --git a/src/backend_x64/interface_x64.cpp b/src/backend_x64/interface_x64.cpp
index fcec3c90..7e678e16 100644
--- a/src/backend_x64/interface_x64.cpp
+++ b/src/backend_x64/interface_x64.cpp
@@ -28,7 +28,7 @@ struct BlockOfCode : Gen::XCodeBlock {
 };
 
 struct Jit::Impl {
-    Impl(UserCallbacks callbacks) : emitter(&block_of_code, &routines, callbacks), callbacks(callbacks) {}
+    Impl(Jit* jit, UserCallbacks callbacks) : emitter(&block_of_code, &routines, callbacks, jit), callbacks(callbacks) {}
 
     JitState jit_state{};
     Routines routines{};
@@ -57,7 +57,7 @@ private:
     }
 };
 
-Jit::Jit(UserCallbacks callbacks) : callbacks(callbacks), impl(std::make_unique<Impl>(callbacks)) {}
+Jit::Jit(UserCallbacks callbacks) : callbacks(callbacks), impl(std::make_unique<Impl>(this, callbacks)) {}
 
 Jit::~Jit() {}
 
diff --git a/src/frontend/decoder/thumb1.h b/src/frontend/decoder/thumb1.h
index 9bf0d2f1..a5049629 100644
--- a/src/frontend/decoder/thumb1.h
+++ b/src/frontend/decoder/thumb1.h
@@ -151,7 +151,7 @@ static const std::array<Thumb1Matcher<V>, 7> g_thumb1_instruction_table {{
 }};
 
 template<typename Visitor>
-boost::optional<const Thumb1Matcher<Visitor>&> DecodeThumb1(u16 instruction) {
+boost::optional<const Thumb1Matcher<Visitor>&> DecodeThumb16(u16 instruction) {
     const auto& table = g_thumb1_instruction_table<Visitor>;
     auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); };
 
diff --git a/src/frontend/ir/ir.h b/src/frontend/ir/ir.h
index a8ffecdf..b1e7227e 100644
--- a/src/frontend/ir/ir.h
+++ b/src/frontend/ir/ir.h
@@ -149,6 +149,84 @@ private:
     std::vector<ValueWeakPtr> args;
 };
 
+namespace Term {
+
+struct Invalid {};
+
+/**
+ * This terminal instruction calls the interpreter, starting at `next`.
+ * The interpreter must interpret at least 1 instruction but may choose to interpret more.
+ */
+struct Interpret {
+    explicit Interpret(const Arm::LocationDescriptor& next_) : next(next_) {}
+    Arm::LocationDescriptor next; ///< Location at which interpretation starts.
+};
+
+/**
+ * This terminal instruction returns control to the dispatcher.
+ * The dispatcher will use the value in R15 to determine what comes next.
+ */
+struct ReturnToDispatch {};
+
+/**
+ * This terminal instruction jumps to the basic block described by `next` if we have enough
+ * cycles remaining. If we do not have enough cycles remaining, we return to the
+ * dispatcher, which will return control to the host.
+ */
+struct LinkBlock {
+    explicit LinkBlock(const Arm::LocationDescriptor& next_) : next(next_) {}
+    Arm::LocationDescriptor next; ///< Location descriptor for next block.
+};
+
+/**
+ * This terminal instruction jumps to the basic block described by `next` unconditionally.
+ * This is an optimization and MUST only be emitted when this is guaranteed not to result
+ * in hanging, even in the face of other optimizations. (In practice, this means that only
+ * forward jumps to short-ish blocks would use this instruction.)
+ * A backend that doesn't support this optimization may choose to implement this exactly
+ * as LinkBlock.
+ */
+struct LinkBlockFast {
+    explicit LinkBlockFast(const Arm::LocationDescriptor& next_) : next(next_) {}
+    Arm::LocationDescriptor next; ///< Location descriptor for next block.
+};
+
+/**
+ * This terminal instruction checks the top of the Return Stack Buffer against R15.
+ * If RSB lookup fails, control is returned to the dispatcher.
+ * This is an optimization for faster function calls. A backend that doesn't support
+ * this optimization or doesn't have a RSB may choose to implement this exactly as
+ * ReturnToDispatch.
+ */
+struct PopRSBHint {};
+
+struct If;
+/// A Terminal is the terminal instruction in a MicroBlock.
+using Terminal = boost::variant<
+        Invalid,
+        Interpret,
+        ReturnToDispatch,
+        LinkBlock,
+        LinkBlockFast,
+        PopRSBHint,
+        boost::recursive_wrapper<If>
+>;
+
+/**
+ * This terminal instruction conditionally executes one terminal or another depending
+ * on the run-time state of the ARM flags.
+ */
+struct If {
+    If(Arm::Cond if_, Terminal then_, Terminal else_) : if_(if_), then_(then_), else_(else_) {}
+    Arm::Cond if_;
+    Terminal then_;
+    Terminal else_;
+};
+
+} // namespace Term
+
+using Term::Terminal;
+
 /**
  * A basic block. It consists of zero or more instructions followed by exactly one terminal.
  * Note that this is a linear IR and not a pure tree-based IR: i.e.: there is an ordering to
@@ -161,6 +239,7 @@ public:
 
     Arm::LocationDescriptor location;
     std::list<ValuePtr> instructions;
+    Terminal terminal = Term::Invalid{};
     size_t cycle_count = 0;
 };
 
diff --git a/src/frontend/ir_emitter.cpp b/src/frontend/ir_emitter.cpp
index ecff2566..b50001fe 100644
--- a/src/frontend/ir_emitter.cpp
+++ b/src/frontend/ir_emitter.cpp
@@ -4,7 +4,8 @@
  * General Public License version 2 or any later version.
  */
 
-#include "ir_emitter.h"
+#include "common/assert.h"
+#include "frontend/ir_emitter.h"
 
 namespace Dynarmic {
 namespace Arm {
@@ -73,6 +74,11 @@ IREmitter::ResultAndCarry IREmitter::ArithmeticShiftRight(IR::ValuePtr value_in,
     return {result, carry_out};
 }
 
+void IREmitter::SetTerm(const IR::Terminal& terminal) {
+    ASSERT_MSG(block.terminal.which() == 0, "Terminal has already been set.");
+    block.terminal = terminal;
+}
+
 IR::ValuePtr IREmitter::Inst(IR::Opcode op, std::initializer_list<IR::ValuePtr> args) {
     auto inst = std::make_shared<IR::Inst>(op);
     assert(args.size() == inst->NumArgs());
diff --git a/src/frontend/ir_emitter.h b/src/frontend/ir_emitter.h
index 7965fd14..c8c073d5 100644
--- a/src/frontend/ir_emitter.h
+++ b/src/frontend/ir_emitter.h
@@ -45,6 +45,8 @@ public:
     ResultAndCarry LogicalShiftRight(IR::ValuePtr value_in, IR::ValuePtr shift_amount, IR::ValuePtr carry_in);
     ResultAndCarry ArithmeticShiftRight(IR::ValuePtr value_in, IR::ValuePtr shift_amount, IR::ValuePtr carry_in);
 
+    void SetTerm(const IR::Terminal& terminal);
+
 private:
     IR::ValuePtr Inst(IR::Opcode op, std::initializer_list<IR::ValuePtr> args);
     IR::ValuePtr RegRef(Reg reg);
diff --git a/src/frontend/translate_thumb.cpp b/src/frontend/translate_thumb.cpp
index cea644b1..d3122223 100644
--- a/src/frontend/translate_thumb.cpp
+++ b/src/frontend/translate_thumb.cpp
@@ -22,6 +22,11 @@ struct TranslatorVisitor final {
 
     IREmitter ir;
 
+    bool TranslateThisInstruction() {
+        ir.SetTerm(IR::Term::Interpret(ir.current_location));
+        return false;
+    }
+
     bool thumb1_LSL_imm(Imm5 imm5, Reg m, Reg d) {
         u8 shift_n = imm5;
         // LSLS <Rd>, <Rm>, #<imm5>
@@ -94,7 +99,7 @@ struct TranslatorVisitor final {
     }
 
     bool thumb1_UDF() {
-        return false;
+        return TranslateThisInstruction();
     }
 };
 
@@ -108,12 +113,13 @@ static std::tuple<u32, ThumbInstSize> ReadThumbInstruction(u32 arm_pc, MemoryRea
         first_part >>= 16;
     first_part &= 0xFFFF;
 
-    if ((first_part & 0xF800) != 0xE800 && (first_part & 0xF000) != 0xF000) {
+    if ((first_part & 0xF800) <= 0xE800) {
         // 16-bit thumb instruction
         return std::make_tuple(first_part, ThumbInstSize::Thumb16);
     }
 
     // 32-bit thumb instruction
+    // These always start with 0b11101, 0b11110 or 0b11111.
 
     u32 second_part = (*memory_read_32)((arm_pc+2) & 0xFFFFFFFC);
     if (((arm_pc+2) & 0x2) != 0)
@@ -135,7 +141,7 @@ IR::Block TranslateThumb(LocationDescriptor descriptor, MemoryRead32FuncType mem
         std::tie(thumb_instruction, inst_size) = ReadThumbInstruction(arm_pc, memory_read_32);
 
         if (inst_size == ThumbInstSize::Thumb16) {
-            auto decoder = DecodeThumb1<TranslatorVisitor>(static_cast<u16>(thumb_instruction));
+            auto decoder = DecodeThumb16<TranslatorVisitor>(static_cast<u16>(thumb_instruction));
             if (decoder) {
                 should_continue = decoder->call(visitor, static_cast<u16>(thumb_instruction));
             } else {
@@ -151,7 +157,7 @@ IR::Block TranslateThumb(LocationDescriptor descriptor, MemoryRead32FuncType mem
             ASSERT_MSG(0, "Unimplemented");
         }
 
-        visitor.ir.current_location.arm_pc += inst_size == ThumbInstSize::Thumb16 ? 2 : 4;
+        visitor.ir.current_location.arm_pc += (inst_size == ThumbInstSize::Thumb16) ? 2 : 4;
         visitor.ir.block.cycle_count++;
     }
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 33042a3d..4fa857af 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -21,9 +21,9 @@ set(HEADERS
     skyeye_interpreter/dyncom/arm_dyncom_interpreter.h
     skyeye_interpreter/dyncom/arm_dyncom_run.h
     skyeye_interpreter/dyncom/arm_dyncom_thumb.h
+    skyeye_interpreter/skyeye_common/arm_regformat.h
     skyeye_interpreter/skyeye_common/armstate.h
     skyeye_interpreter/skyeye_common/armsupp.h
-    skyeye_interpreter/skyeye_common/arm_regformat.h
     skyeye_interpreter/skyeye_common/vfp/asm_vfp.h
     skyeye_interpreter/skyeye_common/vfp/vfp.h
     skyeye_interpreter/skyeye_common/vfp/vfp_helper.h
diff --git a/tests/arm/fuzz_thumb.cpp b/tests/arm/fuzz_thumb.cpp
index d4252f9e..b3b7ccde 100644
--- a/tests/arm/fuzz_thumb.cpp
+++ b/tests/arm/fuzz_thumb.cpp
@@ -4,5 +4,47 @@
  * General Public License version 2 or any later version.
  */
 
-#include "interface/interface.h"
+#include <catch.hpp>
+
+#include "common/common_types.h"
+#include "interface/interface.h"
+#include "skyeye_interpreter/dyncom/arm_dyncom_interpreter.h"
+#include "skyeye_interpreter/skyeye_common/armstate.h"
+
+std::array<u16, 1024> code_mem{};
+
+u32 MemoryRead32(u32 vaddr);
+void InterpreterFallback(u32 pc, Dynarmic::Jit* jit);
+Dynarmic::UserCallbacks GetUserCallbacks();
+
+u32 MemoryRead32(u32 vaddr) {
+    if (vaddr < code_mem.size() * sizeof(u16)) {
+        size_t index = vaddr / sizeof(u16);
+        return code_mem[index] | (code_mem[index+1] << 16);
+    }
+    return vaddr;
+}
+
+void InterpreterFallback(u32 pc, Dynarmic::Jit* jit) {
+    ARMul_State interp_state{USER32MODE};
+    interp_state.user_callbacks = GetUserCallbacks();
+    interp_state.NumInstrsToExecute = 1;
+
+    interp_state.Reg = jit->Regs();
+    interp_state.Cpsr = jit->Cpsr();
+    interp_state.Reg[15] = pc;
+
+    InterpreterClearCache();
+    InterpreterMainLoop(&interp_state);
+
+    jit->Regs() = interp_state.Reg;
+    jit->Cpsr() = interp_state.Cpsr;
+}
+
+Dynarmic::UserCallbacks GetUserCallbacks() {
+    Dynarmic::UserCallbacks user_callbacks{};
+    user_callbacks.MemoryRead32 = &MemoryRead32;
+    user_callbacks.InterpreterFallback = &InterpreterFallback;
+    return user_callbacks;
+}
 
diff --git a/tests/arm/test_thumb_instructions.cpp b/tests/arm/test_thumb_instructions.cpp
index feece5f5..cd3c7244 100644
--- a/tests/arm/test_thumb_instructions.cpp
+++ b/tests/arm/test_thumb_instructions.cpp
@@ -8,19 +8,43 @@
 
 #include "common/common_types.h"
 #include "interface/interface.h"
+#include "skyeye_interpreter/dyncom/arm_dyncom_interpreter.h"
+#include "skyeye_interpreter/skyeye_common/armstate.h"
 
-std::array<u32, 1024> code_mem{};
+std::array<u16, 1024> code_mem{};
+
+u32 MemoryRead32(u32 vaddr);
+void InterpreterFallback(u32 pc, Dynarmic::Jit* jit);
+Dynarmic::UserCallbacks GetUserCallbacks();
 
 u32 MemoryRead32(u32 vaddr) {
-    if (vaddr < code_mem.size() * sizeof(u32)) {
-        return code_mem[vaddr / sizeof(u32)];
+    if (vaddr < code_mem.size() * sizeof(u16)) {
+        size_t index = vaddr / sizeof(u16);
+        return code_mem[index] | (code_mem[index+1] << 16);
     }
     return vaddr;
 }
 
+void InterpreterFallback(u32 pc, Dynarmic::Jit* jit) {
+    ARMul_State interp_state{USER32MODE};
+    interp_state.user_callbacks = GetUserCallbacks();
+    interp_state.NumInstrsToExecute = 1;
+
+    interp_state.Reg = jit->Regs();
+    interp_state.Cpsr = jit->Cpsr();
+    interp_state.Reg[15] = pc;
+
+    InterpreterClearCache();
+    InterpreterMainLoop(&interp_state);
+
+    jit->Regs() = interp_state.Reg;
+    jit->Cpsr() = interp_state.Cpsr;
+}
+
 Dynarmic::UserCallbacks GetUserCallbacks() {
     Dynarmic::UserCallbacks user_callbacks{};
     user_callbacks.MemoryRead32 = &MemoryRead32;
+    user_callbacks.InterpreterFallback = &InterpreterFallback;
     return user_callbacks;
 }
 
@@ -28,7 +52,7 @@ TEST_CASE( "thumb: lsls r0, r1, #2", "[thumb]" ) {
     Dynarmic::Jit jit{GetUserCallbacks()};
     code_mem.fill({});
     code_mem[0] = 0x0088; // lsls r0, r1, #2
-    code_mem[1] = 0xDE00; // udf #0
+    code_mem[1] = 0xE7FE; // b +#0
 
     jit.Regs()[0] = 1;
     jit.Regs()[1] = 2;
@@ -39,6 +63,7 @@ TEST_CASE( "thumb: lsls r0, r1, #2", "[thumb]" ) {
 
     REQUIRE( jit.Regs()[0] == 8 );
     REQUIRE( jit.Regs()[1] == 2 );
+    REQUIRE( jit.Regs()[15] == 2 );
     REQUIRE( jit.Cpsr() == 0x00000030 );
 }
 
@@ -46,7 +71,7 @@ TEST_CASE( "thumb: lsls r0, r1, #31", "[thumb]" ) {
     Dynarmic::Jit jit{GetUserCallbacks()};
     code_mem.fill({});
     code_mem[0] = 0x07C8; // lsls r0, r1, #31
-    code_mem[1] = 0xDE00; // udf #0
+    code_mem[1] = 0xE7FE; // b +#0
 
     jit.Regs()[0] = 1;
     jit.Regs()[1] = 0xFFFFFFFF;
@@ -57,5 +82,6 @@ TEST_CASE( "thumb: lsls r0, r1, #31", "[thumb]" ) {
 
     REQUIRE( jit.Regs()[0] == 0x80000000 );
     REQUIRE( jit.Regs()[1] == 0xffffffff );
+    REQUIRE( jit.Regs()[15] == 2 );
     REQUIRE( jit.Cpsr() == 0x20000030 ); // C flag, Thumb, User-mode
 }