From df39308e034f7d25e54824fadb94b30d0a3f4768 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 9 Aug 2016 22:48:20 +0100
Subject: [PATCH] TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH,
 STREX, STREXB, STREXD, STREXH, SWP, SWPB

---
 src/CMakeLists.txt                            |   1 +
 src/backend_x64/emit_x64.cpp                  |  66 +++++++
 src/backend_x64/jitstate.h                    |   5 +
 src/frontend/decoder/arm.h                    |   4 +-
 .../disassembler/disassembler_arm.cpp         |  44 +++--
 src/frontend/ir/ir_emitter.cpp                |  41 +++++
 src/frontend/ir/ir_emitter.h                  |   6 +
 src/frontend/ir/opcodes.inc                   |   6 +
 .../translate_arm/synchronization.cpp         | 162 ++++++++++++++++++
 .../translate/translate_arm/translate_arm.h   |  22 +--
 src/ir_opt/dead_code_elimination_pass.cpp     |  42 ++++-
 tests/arm/fuzz_arm.cpp                        | 145 ++++++++--------
 12 files changed, 446 insertions(+), 98 deletions(-)
 create mode 100644 src/frontend/translate/translate_arm/synchronization.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5e4cd871..56a4cc98 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -27,6 +27,7 @@ set(SRCS
     frontend/translate/translate_arm/multiply.cpp
     frontend/translate/translate_arm/parallel.cpp
     frontend/translate/translate_arm/reversal.cpp
+    frontend/translate/translate_arm/synchronization.cpp
     frontend/translate/translate_arm/vfp2.cpp
     frontend/translate/translate_thumb.cpp
     ir_opt/dead_code_elimination_pass.cpp
diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp
index 1cd25246..90ef937a 100644
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@@ -1293,6 +1293,18 @@ void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) {
     FPThreeOp64(code, reg_alloc, block, inst, &XEmitter::SUBSD);
 }
 
+void EmitX64::EmitClearExclusive(IR::Block&, IR::Inst*) {
+    code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
+}
+
+void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) {
+    ASSERT(inst->GetArg(1).IsImmediate());
+    X64Reg address = reg_alloc.UseRegister(inst->GetArg(0), any_gpr);
+
+    code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(1));
+    code->MOV(32, MDisp(R15, offsetof(JitState, exclusive_address)), R(address));
+}
+
 void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
     reg_alloc.HostCall(inst, inst->GetArg(0));
 
@@ -1341,6 +1353,60 @@ void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
     code->ABI_CallFunction(reinterpret_cast<void*>(cb.MemoryWrite64));
 }
 
+static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void* fn) {
+    reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
+    X64Reg passed = reg_alloc.DefRegister(inst, any_gpr);
+    X64Reg tmp = ABI_RETURN; // Use one of the unusued HostCall registers.
+
+    code->MOV(32, R(passed), Imm32(1));
+    code->CMP(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
+    auto fail1_fixup = code->J_CC(CC_E);
+    code->MOV(32, R(tmp), R(ABI_PARAM1));
+    code->XOR(32, R(tmp), MDisp(R15, offsetof(JitState, exclusive_address)));
+    code->TEST(32, R(tmp), Imm32(JitState::RESERVATION_GRANULE_MASK));
+    auto fail2_fixup = code->J_CC(CC_NE);
+    code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
+    code->ABI_CallFunction(fn);
+    code->XOR(32, R(passed), R(passed));
+    code->SetJumpTarget(fail1_fixup);
+    code->SetJumpTarget(fail2_fixup);
+}
+
+void EmitX64::EmitExclusiveWriteMemory8(IR::Block&, IR::Inst* inst) {
+    ExclusiveWrite(code, reg_alloc, inst, reinterpret_cast<void*>(cb.MemoryWrite8));
+}
+
+void EmitX64::EmitExclusiveWriteMemory16(IR::Block&, IR::Inst* inst) {
+    ExclusiveWrite(code, reg_alloc, inst, reinterpret_cast<void*>(cb.MemoryWrite16));
+}
+
+void EmitX64::EmitExclusiveWriteMemory32(IR::Block&, IR::Inst* inst) {
+    ExclusiveWrite(code, reg_alloc, inst, reinterpret_cast<void*>(cb.MemoryWrite32));
+}
+
+void EmitX64::EmitExclusiveWriteMemory64(IR::Block&, IR::Inst* inst) {
+    reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
+    X64Reg passed = reg_alloc.DefRegister(inst, any_gpr);
+    X64Reg value_hi = reg_alloc.UseScratchRegister(inst->GetArg(2), any_gpr);
+    X64Reg value = ABI_PARAM2;
+    X64Reg tmp = ABI_RETURN; // Use one of the unusued HostCall registers.
+
+    code->MOV(32, R(passed), Imm32(1));
+    code->CMP(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
+    auto fail1_fixup = code->J_CC(CC_E);
+    code->MOV(32, R(tmp), R(ABI_PARAM1));
+    code->XOR(32, R(tmp), MDisp(R15, offsetof(JitState, exclusive_address)));
+    code->TEST(32, R(tmp), Imm32(JitState::RESERVATION_GRANULE_MASK));
+    auto fail2_fixup = code->J_CC(CC_NE);
+    code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
+    code->MOVZX(64, 32, value, R(value));
+    code->SHL(64, R(value_hi), Imm8(32));
+    code->OR(64, R(value), R(value_hi));
+    code->ABI_CallFunction(reinterpret_cast<void*>(cb.MemoryWrite64));
+    code->XOR(32, R(passed), R(passed));
+    code->SetJumpTarget(fail1_fixup);
+    code->SetJumpTarget(fail2_fixup);
+}
 
 void EmitX64::EmitAddCycles(size_t cycles) {
     ASSERT(cycles < std::numeric_limits<u32>::max());
diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h
index 0a11981c..e1e2618d 100644
--- a/src/backend_x64/jitstate.h
+++ b/src/backend_x64/jitstate.h
@@ -29,6 +29,11 @@ struct JitState {
     u32 save_host_MXCSR = 0;
     s64 cycles_remaining = 0;
 
+    // Exclusive state
+    static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
+    u32 exclusive_state = 0;
+    u32 exclusive_address = 0;
+
     u32 FPSCR_IDC = 0;
     u32 FPSCR_UFC = 0;
     u32 guest_FPSCR_flags = 0;
diff --git a/src/frontend/decoder/arm.h b/src/frontend/decoder/arm.h
index 4400eede..ad955408 100644
--- a/src/frontend/decoder/arm.h
+++ b/src/frontend/decoder/arm.h
@@ -181,8 +181,8 @@ std::vector<ArmMatcher<V>> GetArmDecodeTable() {
         INST(&V::arm_STREXB,      "STREXB",              "cccc00011100nnnndddd11111001mmmm"), // v6K
         INST(&V::arm_STREXD,      "STREXD",              "cccc00011010nnnndddd11111001mmmm"), // v6K
         INST(&V::arm_STREXH,      "STREXH",              "cccc00011110nnnndddd11111001mmmm"), // v6K
-        INST(&V::arm_SWP,         "SWP",                 "cccc00010000nnnndddd00001001mmmm"), // v2S (v6: Deprecated)
-        INST(&V::arm_SWPB,        "SWPB",                "cccc00010100nnnndddd00001001mmmm"), // v2S (v6: Deprecated)
+        INST(&V::arm_SWP,         "SWP",                 "cccc00010000nnnntttt00001001uuuu"), // v2S (v6: Deprecated)
+        INST(&V::arm_SWPB,        "SWPB",                "cccc00010100nnnntttt00001001uuuu"), // v2S (v6: Deprecated)
 
         // Load/Store instructions
         INST(&V::arm_LDR_imm,     "LDR (imm)",           "cccc010pu0w1nnnnddddvvvvvvvvvvvv"),
diff --git a/src/frontend/disassembler/disassembler_arm.cpp b/src/frontend/disassembler/disassembler_arm.cpp
index e05859ca..2d94f063 100644
--- a/src/frontend/disassembler/disassembler_arm.cpp
+++ b/src/frontend/disassembler/disassembler_arm.cpp
@@ -697,17 +697,39 @@ public:
     std::string arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
 
     // Synchronization Primitive instructions
-    std::string arm_CLREX() { return "ice"; }
-    std::string arm_LDREX(Cond cond, Reg n, Reg d) { return "ice"; }
-    std::string arm_LDREXB(Cond cond, Reg n, Reg d) { return "ice"; }
-    std::string arm_LDREXD(Cond cond, Reg n, Reg d) { return "ice"; }
-    std::string arm_LDREXH(Cond cond, Reg n, Reg d) { return "ice"; }
-    std::string arm_STREX(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
-    std::string arm_STREXB(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
-    std::string arm_STREXD(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
-    std::string arm_STREXH(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
-    std::string arm_SWP(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
-    std::string arm_SWPB(Cond cond, Reg n, Reg d, Reg m) { return "ice"; }
+    std::string arm_CLREX() {
+        return "clrex";
+    }
+    std::string arm_LDREX(Cond cond, Reg n, Reg d) {
+        return Common::StringFromFormat("ldrex%s %s, [%s]", CondToString(cond), RegToString(d), RegToString(n));
+    }
+    std::string arm_LDREXB(Cond cond, Reg n, Reg d) {
+        return Common::StringFromFormat("ldrexb%s %s, [%s]", CondToString(cond), RegToString(d), RegToString(n));
+    }
+    std::string arm_LDREXD(Cond cond, Reg n, Reg d) {
+        return Common::StringFromFormat("ldrexd%s %s, %s, [%s]", CondToString(cond), RegToString(d), RegToString(d+1), RegToString(n));
+    }
+    std::string arm_LDREXH(Cond cond, Reg n, Reg d) {
+        return Common::StringFromFormat("ldrexh%s %s, [%s]", CondToString(cond), RegToString(d), RegToString(n));
+    }
+    std::string arm_STREX(Cond cond, Reg n, Reg d, Reg m) {
+        return Common::StringFromFormat("strex%s %s, %s, [%s]", CondToString(cond), RegToString(d), RegToString(m), RegToString(n));
+    }
+    std::string arm_STREXB(Cond cond, Reg n, Reg d, Reg m) {
+        return Common::StringFromFormat("strexb%s %s, %s, [%s]", CondToString(cond), RegToString(d), RegToString(m), RegToString(n));
+    }
+    std::string arm_STREXD(Cond cond, Reg n, Reg d, Reg m) {
+        return Common::StringFromFormat("strexd%s %s, %s, %s, [%s]", CondToString(cond), RegToString(d), RegToString(m), RegToString(m+1), RegToString(n));
+    }
+    std::string arm_STREXH(Cond cond, Reg n, Reg d, Reg m) {
+        return Common::StringFromFormat("strexh%s %s, %s, [%s]", CondToString(cond), RegToString(d), RegToString(m), RegToString(n));
+    }
+    std::string arm_SWP(Cond cond, Reg n, Reg t, Reg t2) {
+        return Common::StringFromFormat("swp%s %s, %s, [%s]", CondToString(cond), RegToString(t), RegToString(t2), RegToString(n));
+    }
+    std::string arm_SWPB(Cond cond, Reg n, Reg t, Reg t2) {
+        return Common::StringFromFormat("swpb%s %s, %s, [%s]", CondToString(cond), RegToString(t), RegToString(t2), RegToString(n));
+    }
 
     // Status register access instructions
     std::string arm_CPS() { return "ice"; }
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 7d5f9ffe..26bb7703 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -362,6 +362,15 @@ IR::Value IREmitter::FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_
     return Inst(IR::Opcode::FPSub64, {a, b});
 }
 
+void IREmitter::ClearExlcusive() {
+    Inst(IR::Opcode::ClearExclusive, {});
+}
+
+void IREmitter::SetExclusive(const IR::Value& vaddr, size_t byte_size) {
+    ASSERT(byte_size == 1 || byte_size == 2 || byte_size == 4 || byte_size == 8 || byte_size == 16);
+    Inst(IR::Opcode::SetExclusive, {vaddr, Imm8(u8(byte_size))});
+}
+
 IR::Value IREmitter::ReadMemory8(const IR::Value& vaddr) {
     return Inst(IR::Opcode::ReadMemory8, {vaddr});
 }
@@ -412,6 +421,38 @@ void IREmitter::WriteMemory64(const IR::Value& vaddr, const IR::Value& value) {
     }
 }
 
+IR::Value IREmitter::ExclusiveWriteMemory8(const IR::Value& vaddr, const IR::Value& value) {
+    return Inst(IR::Opcode::ExclusiveWriteMemory8, {vaddr, value});
+}
+
+IR::Value IREmitter::ExclusiveWriteMemory16(const IR::Value& vaddr, const IR::Value& value) {
+    if (current_location.EFlag()) {
+        auto v = ByteReverseHalf(value);
+        return Inst(IR::Opcode::ExclusiveWriteMemory16, {vaddr, v});
+    } else {
+        return Inst(IR::Opcode::ExclusiveWriteMemory16, {vaddr, value});
+    }
+}
+
+IR::Value IREmitter::ExclusiveWriteMemory32(const IR::Value& vaddr, const IR::Value& value) {
+    if (current_location.EFlag()) {
+        auto v = ByteReverseWord(value);
+        return Inst(IR::Opcode::ExclusiveWriteMemory32, {vaddr, v});
+    } else {
+        return Inst(IR::Opcode::ExclusiveWriteMemory32, {vaddr, value});
+    }
+}
+
+IR::Value IREmitter::ExclusiveWriteMemory64(const IR::Value& vaddr, const IR::Value& value_lo, const IR::Value& value_hi) {
+    if (current_location.EFlag()) {
+        auto vlo = ByteReverseWord(value_lo);
+        auto vhi = ByteReverseWord(value_hi);
+        return Inst(IR::Opcode::ExclusiveWriteMemory64, {vaddr, vlo, vhi});
+    } else {
+        return Inst(IR::Opcode::ExclusiveWriteMemory64, {vaddr, value_lo, value_hi});
+    }
+}
+
 void IREmitter::Breakpoint() {
     Inst(IR::Opcode::Breakpoint, {});
 }
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 0b7761cf..d5fbae5e 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -113,6 +113,8 @@ public:
     IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
     IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
 
+    void ClearExlcusive();
+    void SetExclusive(const IR::Value& vaddr, size_t byte_size);
     IR::Value ReadMemory8(const IR::Value& vaddr);
     IR::Value ReadMemory16(const IR::Value& vaddr);
     IR::Value ReadMemory32(const IR::Value& vaddr);
@@ -121,6 +123,10 @@ public:
     void WriteMemory16(const IR::Value& vaddr, const IR::Value& value);
     void WriteMemory32(const IR::Value& vaddr, const IR::Value& value);
     void WriteMemory64(const IR::Value& vaddr, const IR::Value& value);
+    IR::Value ExclusiveWriteMemory8(const IR::Value& vaddr, const IR::Value& value);
+    IR::Value ExclusiveWriteMemory16(const IR::Value& vaddr, const IR::Value& value);
+    IR::Value ExclusiveWriteMemory32(const IR::Value& vaddr, const IR::Value& value);
+    IR::Value ExclusiveWriteMemory64(const IR::Value& vaddr, const IR::Value& value_lo, const IR::Value& value_hi);
 
     void Breakpoint();
 
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index 95c410a3..c4072883 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -82,6 +82,8 @@ OPCODE(FPSub32,                 T::F32,         T::F32,         T::F32
 OPCODE(FPSub64,                 T::F64,         T::F64,         T::F64                          )
 
 // Memory access
+OPCODE(ClearExclusive,          T::Void,                                                        )
+OPCODE(SetExclusive,            T::Void,        T::U32,         T::U8                           )
 OPCODE(ReadMemory8,             T::U8,          T::U32                                          )
 OPCODE(ReadMemory16,            T::U16,         T::U32                                          )
 OPCODE(ReadMemory32,            T::U32,         T::U32                                          )
@@ -90,3 +92,7 @@ OPCODE(WriteMemory8,            T::Void,        T::U32,         T::U8
 OPCODE(WriteMemory16,           T::Void,        T::U32,         T::U16                          )
 OPCODE(WriteMemory32,           T::Void,        T::U32,         T::U32                          )
 OPCODE(WriteMemory64,           T::Void,        T::U32,         T::U64                          )
+OPCODE(ExclusiveWriteMemory8,   T::U32,         T::U32,         T::U8                           )
+OPCODE(ExclusiveWriteMemory16,  T::U32,         T::U32,         T::U16                          )
+OPCODE(ExclusiveWriteMemory32,  T::U32,         T::U32,         T::U32                          )
+OPCODE(ExclusiveWriteMemory64,  T::U32,         T::U32,         T::U32,         T::U32          )
diff --git a/src/frontend/translate/translate_arm/synchronization.cpp b/src/frontend/translate/translate_arm/synchronization.cpp
new file mode 100644
index 00000000..31d01a7b
--- /dev/null
+++ b/src/frontend/translate/translate_arm/synchronization.cpp
@@ -0,0 +1,162 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "translate_arm.h"
+
+namespace Dynarmic {
+namespace Arm {
+
+bool ArmTranslatorVisitor::arm_CLREX() {
+    // CLREX
+    ir.ClearExlcusive();
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_LDREX(Cond cond, Reg n, Reg d) {
+    if (d == Reg::PC || n == Reg::PC)
+        return UnpredictableInstruction();
+    // LDREX <Rd>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        ir.SetExclusive(address, 4);
+        ir.SetRegister(d, ir.ReadMemory32(address));
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_LDREXB(Cond cond, Reg n, Reg d) {
+    if (d == Reg::PC || n == Reg::PC)
+        return UnpredictableInstruction();
+    // LDREXB <Rd>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        ir.SetExclusive(address, 1);
+        ir.SetRegister(d, ir.ZeroExtendByteToWord(ir.ReadMemory8(address)));
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_LDREXD(Cond cond, Reg n, Reg d) {
+    if (d == Reg::LR || d == Reg::PC || n == Reg::PC)
+        return UnpredictableInstruction();
+    // LDREXD <Rd>, <Rd1>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        ir.SetExclusive(address, 8);
+        // DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
+        auto lo = ir.ReadMemory32(address);
+        ir.SetRegister(d, lo);
+        auto hi = ir.ReadMemory32(ir.Add(address, ir.Imm32(4)));
+        ir.SetRegister(d+1, hi);
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_LDREXH(Cond cond, Reg n, Reg d) {
+    if (d == Reg::PC || n == Reg::PC)
+        return UnpredictableInstruction();
+    // LDREXH <Rd>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        ir.SetExclusive(address, 2);
+        ir.SetRegister(d, ir.ZeroExtendHalfToWord(ir.ReadMemory16(address)));
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_STREX(Cond cond, Reg n, Reg d, Reg m) {
+    if (n == Reg::PC || d == Reg::PC || m == Reg::PC)
+        return UnpredictableInstruction();
+    if (d == n || d == m)
+        return UnpredictableInstruction();
+    // STREX <Rd>, <Rm>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        auto value = ir.GetRegister(m);
+        auto passed = ir.ExclusiveWriteMemory32(address, value);
+        ir.SetRegister(d, passed);
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_STREXB(Cond cond, Reg n, Reg d, Reg m) {
+    if (n == Reg::PC || d == Reg::PC || m == Reg::PC)
+        return UnpredictableInstruction();
+    if (d == n || d == m)
+        return UnpredictableInstruction();
+    // STREXB <Rd>, <Rm>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        auto value = ir.LeastSignificantByte(ir.GetRegister(m));
+        auto passed = ir.ExclusiveWriteMemory8(address, value);
+        ir.SetRegister(d, passed);
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_STREXD(Cond cond, Reg n, Reg d, Reg m) {
+    if (n == Reg::PC || d == Reg::PC || m == Reg::LR || static_cast<size_t>(m) % 2 == 1)
+        return UnpredictableInstruction();
+    if (d == n || d == m || d == m+1)
+        return UnpredictableInstruction();
+    Reg m2 = m + 1;
+    // STREXD <Rd>, <Rm>, <Rm2>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        auto value_lo = ir.GetRegister(m);
+        auto value_hi = ir.GetRegister(m2);
+        auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi);
+        ir.SetRegister(d, passed);
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_STREXH(Cond cond, Reg n, Reg d, Reg m) {
+    if (n == Reg::PC || d == Reg::PC || m == Reg::PC)
+        return UnpredictableInstruction();
+    if (d == n || d == m)
+        return UnpredictableInstruction();
+    // STREXH <Rd>, <Rm>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto address = ir.GetRegister(n);
+        auto value = ir.LeastSignificantHalf(ir.GetRegister(m));
+        auto passed = ir.ExclusiveWriteMemory16(address, value);
+        ir.SetRegister(d, passed);
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_SWP(Cond cond, Reg n, Reg t, Reg t2) {
+    if (t == Reg::PC || t2 == Reg::PC || n == Reg::PC || n == t || n == t2)
+        return UnpredictableInstruction();
+    // TODO: UNDEFINED if current mode is Hypervisor
+    // SWP <Rt>, <Rt2>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto data = ir.ReadMemory32(ir.GetRegister(n));
+        ir.WriteMemory32(ir.GetRegister(n), ir.GetRegister(t2));
+        // TODO: Alignment check
+        ir.SetRegister(t, data);
+    }
+    return true;
+}
+
+bool ArmTranslatorVisitor::arm_SWPB(Cond cond, Reg n, Reg t, Reg t2) {
+    if (t == Reg::PC || t2 == Reg::PC || n == Reg::PC || n == t || n == t2)
+        return UnpredictableInstruction();
+    // TODO: UNDEFINED if current mode is Hypervisor
+    // SWPB <Rt>, <Rt2>, [<Rn>]
+    if (ConditionPassed(cond)) {
+        auto data = ir.ReadMemory8(ir.GetRegister(n));
+        ir.WriteMemory8(ir.GetRegister(n), ir.LeastSignificantByte(ir.GetRegister(t2)));
+        // TODO: Alignment check
+        ir.SetRegister(t, ir.ZeroExtendByteToWord(data));
+    }
+    return true;
+}
+
+
+} // namespace Arm
+} // namespace Dynarmic
diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h
index 7d3374f4..601c1c71 100644
--- a/src/frontend/translate/translate_arm/translate_arm.h
+++ b/src/frontend/translate/translate_arm/translate_arm.h
@@ -304,17 +304,17 @@ struct ArmTranslatorVisitor final {
     bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
 
     // Synchronization Primitive instructions
-    bool arm_CLREX() { return InterpretThisInstruction(); }
-    bool arm_LDREX(Cond cond, Reg n, Reg d) { return InterpretThisInstruction(); }
-    bool arm_LDREXB(Cond cond, Reg n, Reg d) { return InterpretThisInstruction(); }
-    bool arm_LDREXD(Cond cond, Reg n, Reg d) { return InterpretThisInstruction(); }
-    bool arm_LDREXH(Cond cond, Reg n, Reg d) { return InterpretThisInstruction(); }
-    bool arm_STREX(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
-    bool arm_STREXB(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
-    bool arm_STREXD(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
-    bool arm_STREXH(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
-    bool arm_SWP(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
-    bool arm_SWPB(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); }
+    bool arm_CLREX();
+    bool arm_LDREX(Cond cond, Reg n, Reg d);
+    bool arm_LDREXB(Cond cond, Reg n, Reg d);
+    bool arm_LDREXD(Cond cond, Reg n, Reg d);
+    bool arm_LDREXH(Cond cond, Reg n, Reg d);
+    bool arm_STREX(Cond cond, Reg n, Reg d, Reg m);
+    bool arm_STREXB(Cond cond, Reg n, Reg d, Reg m);
+    bool arm_STREXD(Cond cond, Reg n, Reg d, Reg m);
+    bool arm_STREXH(Cond cond, Reg n, Reg d, Reg m);
+    bool arm_SWP(Cond cond, Reg n, Reg d, Reg m);
+    bool arm_SWPB(Cond cond, Reg n, Reg d, Reg m);
 
     // Status register access instructions
     bool arm_CPS() { return InterpretThisInstruction(); }
diff --git a/src/ir_opt/dead_code_elimination_pass.cpp b/src/ir_opt/dead_code_elimination_pass.cpp
index ee35d7b7..f73ced4d 100644
--- a/src/ir_opt/dead_code_elimination_pass.cpp
+++ b/src/ir_opt/dead_code_elimination_pass.cpp
@@ -13,7 +13,47 @@ namespace Optimization {
 
 void DeadCodeElimination(IR::Block& block) {
     const auto is_side_effect_free = [](IR::Opcode op) -> bool {
-        return IR::GetTypeOf(op) != IR::Type::Void;
+        switch (op) {
+            case IR::Opcode::Breakpoint:
+            case IR::Opcode::SetRegister:
+            case IR::Opcode::SetExtendedRegister32:
+            case IR::Opcode::SetExtendedRegister64:
+            case IR::Opcode::SetNFlag:
+            case IR::Opcode::SetZFlag:
+            case IR::Opcode::SetCFlag:
+            case IR::Opcode::SetVFlag:
+            case IR::Opcode::OrQFlag:
+            case IR::Opcode::BXWritePC:
+            case IR::Opcode::CallSupervisor:
+            case IR::Opcode::FPAbs32:
+            case IR::Opcode::FPAbs64:
+            case IR::Opcode::FPAdd32:
+            case IR::Opcode::FPAdd64:
+            case IR::Opcode::FPDiv32:
+            case IR::Opcode::FPDiv64:
+            case IR::Opcode::FPMul32:
+            case IR::Opcode::FPMul64:
+            case IR::Opcode::FPNeg32:
+            case IR::Opcode::FPNeg64:
+            case IR::Opcode::FPSqrt32:
+            case IR::Opcode::FPSqrt64:
+            case IR::Opcode::FPSub32:
+            case IR::Opcode::FPSub64:
+            case IR::Opcode::ClearExclusive:
+            case IR::Opcode::SetExclusive:
+            case IR::Opcode::WriteMemory8:
+            case IR::Opcode::WriteMemory16:
+            case IR::Opcode::WriteMemory32:
+            case IR::Opcode::WriteMemory64:
+            case IR::Opcode::ExclusiveWriteMemory8:
+            case IR::Opcode::ExclusiveWriteMemory16:
+            case IR::Opcode::ExclusiveWriteMemory32:
+            case IR::Opcode::ExclusiveWriteMemory64:
+                return false;
+            default:
+                ASSERT(IR::GetTypeOf(op) != IR::Type::Void);
+                return true;
+        }
     };
 
     // We iterate over the instructions in reverse order.
diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp
index 2477f9ab..0add1adf 100644
--- a/tests/arm/fuzz_arm.cpp
+++ b/tests/arm/fuzz_arm.cpp
@@ -70,7 +70,7 @@ static u32 MemoryRead32(u32 vaddr) {
     return vaddr;
 }
 static u64 MemoryRead64(u32 vaddr) {
-    return vaddr;
+    return MemoryRead32(vaddr) | (u64(MemoryRead32(vaddr+4)) << 32);
 }
 
 static void MemoryWrite8(u32 vaddr, u8 value){
@@ -133,7 +133,7 @@ public:
         REQUIRE(strlen(format) == 32);
 
         for (int i = 0; i < 32; i++) {
-            const u32 bit = 1 << (31 - i);
+            const u32 bit = 1u << (31 - i);
             switch (format[i]) {
                 case '0':
                     mask |= bit;
@@ -169,6 +169,7 @@ public:
     }
     u32 Bits() const { return bits; }
     u32 Mask() const { return mask; }
+    bool IsValid(u32 inst) const { return is_valid(inst); }
 private:
     u32 bits = 0;
     u32 mask = 0;
@@ -210,10 +211,12 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe
 
         u32 initial_fpscr = RandInt<u32>(0x0, 0x1) << 24;
 
+        interp.UnsetExclusiveMemoryAddress();
         interp.Cpsr = initial_cpsr;
         interp.Reg = initial_regs;
         interp.ExtReg = initial_extregs;
         interp.VFP[VFP_FPSCR] = initial_fpscr;
+        jit.Reset();
         jit.Cpsr() = initial_cpsr;
         jit.Regs() = initial_regs;
         jit.ExtRegs() = initial_extregs;
@@ -242,7 +245,7 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe
 
             printf("\nInstruction Listing: \n");
             for (size_t i = 0; i < instruction_count; i++) {
-                 printf("%s\n", Dynarmic::Arm::DisassembleArm(code_mem[i]).c_str());
+                 printf("%x: %s\n", code_mem[i], Dynarmic::Arm::DisassembleArm(code_mem[i]).c_str());
             }
 
             printf("\nInitial Register Listing: \n");
@@ -555,15 +558,42 @@ TEST_CASE("Fuzz ARM data processing instructions", "[JitX64]") {
 }
 
 TEST_CASE("Fuzz ARM load/store instructions (byte, half-word, word)", "[JitX64]") {
-    const std::array<InstructionGenerator, 17> instructions = {{
+    auto EXD_valid = [](u32 inst) -> bool {
+        return Bits<0, 3>(inst) % 2 == 0 && Bits<0, 3>(inst) != 14 && Bits<12, 15>(inst) != (Bits<0, 3>(inst) + 1);
+    };
+
+    auto STREX_valid = [](u32 inst) -> bool {
+        return Bits<12, 15>(inst) != Bits<16, 19>(inst) && Bits<12, 15>(inst) != Bits<0, 3>(inst);
+    };
+
+    auto SWP_valid = [](u32 inst) -> bool {
+        return Bits<12, 15>(inst) != Bits<16, 19>(inst) && Bits<16, 19>(inst) != Bits<0, 3>(inst);
+    };
+
+    auto LDREXD_valid = [](u32 inst) -> bool {
+        return Bits<12, 15>(inst) != 14;
+    };
+
+    auto D_valid = [](u32 inst) -> bool {
+        u32 Rn = Bits<16, 19>(inst);
+        u32 Rd = Bits<12, 15>(inst);
+        u32 Rm = Bits<0, 3>(inst);
+        return Rn % 2 == 0 && Rd % 2 == 0 && Rm != Rd && Rm != Rd + 1 && Rd != 14;
+    };
+
+    const std::array<InstructionGenerator, 32> instructions = {{
         InstructionGenerator("cccc010pu0w1nnnnddddvvvvvvvvvvvv"), // LDR_imm
         InstructionGenerator("cccc011pu0w1nnnnddddvvvvvrr0mmmm"), // LDR_reg
         InstructionGenerator("cccc010pu1w1nnnnddddvvvvvvvvvvvv"), // LDRB_imm
         InstructionGenerator("cccc011pu1w1nnnnddddvvvvvrr0mmmm"), // LDRB_reg
+        InstructionGenerator("cccc000pu1w0nnnnddddvvvv1101vvvv", D_valid), // LDRD_imm
+        InstructionGenerator("cccc000pu0w0nnnndddd00001101mmmm", D_valid), // LDRD_reg
         InstructionGenerator("cccc010pu0w0nnnnddddvvvvvvvvvvvv"), // STR_imm
         InstructionGenerator("cccc011pu0w0nnnnddddvvvvvrr0mmmm"), // STR_reg
         InstructionGenerator("cccc010pu1w0nnnnddddvvvvvvvvvvvv"), // STRB_imm
         InstructionGenerator("cccc011pu1w0nnnnddddvvvvvrr0mmmm"), // STRB_reg
+        InstructionGenerator("cccc000pu1w0nnnnddddvvvv1111vvvv", D_valid), // STRD_imm
+        InstructionGenerator("cccc000pu0w0nnnndddd00001111mmmm", D_valid), // STRD_reg
         InstructionGenerator("cccc000pu1w1nnnnddddvvvv1011vvvv"), // LDRH_imm
         InstructionGenerator("cccc000pu0w1nnnndddd00001011mmmm"), // LDRH_reg
         InstructionGenerator("cccc000pu1w1nnnnddddvvvv1101vvvv"), // LDRSB_imm
@@ -573,87 +603,56 @@ TEST_CASE("Fuzz ARM load/store instructions (byte, half-word, word)", "[JitX64]"
         InstructionGenerator("cccc000pu1w0nnnnddddvvvv1011vvvv"), // STRH_imm
         InstructionGenerator("cccc000pu0w0nnnndddd00001011mmmm"), // STRH_reg
         InstructionGenerator("1111000100000001000000e000000000"), // SETEND
+        InstructionGenerator("11110101011111111111000000011111"), // CLREX
+        InstructionGenerator("cccc00011001nnnndddd111110011111"), // LDREX
+        InstructionGenerator("cccc00011101nnnndddd111110011111"), // LDREXB
+        InstructionGenerator("cccc00011011nnnndddd111110011111", LDREXD_valid), // LDREXD
+        InstructionGenerator("cccc00011111nnnndddd111110011111"), // LDREXH
+        InstructionGenerator("cccc00011000nnnndddd11111001mmmm", STREX_valid), // STREX
+        InstructionGenerator("cccc00011100nnnndddd11111001mmmm", STREX_valid), // STREXB
+        InstructionGenerator("cccc00011010nnnndddd11111001mmmm",
+                             [=](u32 inst) { return EXD_valid(inst) && STREX_valid(inst); }), // STREXD
+        InstructionGenerator("cccc00011110nnnndddd11111001mmmm", STREX_valid), // STREXH
+        InstructionGenerator("cccc00010000nnnntttt00001001uuuu", SWP_valid), // SWP
+        InstructionGenerator("cccc00010100nnnntttt00001001uuuu", SWP_valid), // SWPB
     }};
 
     auto instruction_select = [&]() -> u32 {
         size_t inst_index = RandInt<size_t>(0, instructions.size() - 1);
 
-        u32 cond = 0xE;
-        // Have a one-in-twenty-five chance of actually having a cond.
-        if (RandInt(1, 25) == 1) {
-            cond = RandInt<u32>(0x0, 0xD);
-        }
+        while (true) {
+            u32 cond = 0xE;
+            // Have a one-in-twenty-five chance of actually having a cond.
+            if (RandInt(1, 25) == 1) {
+                cond = RandInt<u32>(0x0, 0xD);
+            }
 
-        u32 Rn = RandInt<u32>(0, 14);
-        u32 Rd = RandInt<u32>(0, 14);
-        u32 W = 0;
-        u32 P = RandInt<u32>(0, 1);
-        if (P) W = RandInt<u32>(0, 1);
-        u32 U = RandInt<u32>(0, 1);
-        u32 rand = RandInt<u32>(0, 0xFF);
-        u32 Rm = RandInt<u32>(0, 14);
+            u32 Rn = RandInt<u32>(0, 14);
+            u32 Rd = RandInt<u32>(0, 14);
+            u32 W = 0;
+            u32 P = RandInt<u32>(0, 1);
+            if (P) W = RandInt<u32>(0, 1);
+            u32 U = RandInt<u32>(0, 1);
+            u32 rand = RandInt<u32>(0, 0xFF);
+            u32 Rm = RandInt<u32>(0, 14);
 
-        if (W) {
-            while (Rn == Rd) {
-                Rn = RandInt<u32>(0, 14);
-                Rd = RandInt<u32>(0, 14);
+            if (W) {
+                while (Rn == Rd) {
+                    Rn = RandInt<u32>(0, 14);
+                    Rd = RandInt<u32>(0, 14);
+                }
+            }
+
+            u32 assemble_randoms = (Rm << 0) | (rand << 4) | (Rd << 12) | (Rn << 16) | (W << 21) | (U << 23) | (P << 24) | (cond << 28);
+            u32 inst = instructions[inst_index].Bits() | (assemble_randoms & (~instructions[inst_index].Mask()));
+            if (instructions[inst_index].IsValid(inst)) {
+                return inst;
             }
         }
-
-        u32 assemble_randoms = (Rm << 0) | (rand << 4) | (Rd << 12) | (Rn << 16) | (W << 21) | (U << 23) | (P << 24) | (cond << 28);
-
-        return instructions[inst_index].Bits() | (assemble_randoms & (~instructions[inst_index].Mask()));
     };
 
     SECTION("short blocks") {
-        FuzzJitArm(5, 6, 10000, instruction_select);
-    }
-}
-
-TEST_CASE("Fuzz ARM load/store instructions (double-word)", "[JitX64]") {
-    const std::array<InstructionGenerator, 4> instructions = {{
-        InstructionGenerator("cccc000pu1w0nnnnddddvvvv1101vvvv"), // LDRD_imm
-        InstructionGenerator("cccc000pu0w0nnnndddd00001101mmmm"), // LDRD_reg
-        InstructionGenerator("cccc000pu1w0nnnnddddvvvv1111vvvv"), // STRD_imm
-        InstructionGenerator("cccc000pu0w0nnnndddd00001111mmmm"), // STRD_reg
-    }};
-
-    auto instruction_select = [&]() -> u32 {
-        size_t inst_index = RandInt<size_t>(0, instructions.size() - 1);
-
-        u32 cond = 0xE;
-        // Have a one-in-twenty-five chance of actually having a cond.
-        if (RandInt(1, 25) == 1) {
-            cond = RandInt<u32>(0x0, 0xD);
-        }
-
-        u32 Rn = RandInt<u32>(0, 6) * 2;
-        u32 Rd = RandInt<u32>(0, 6) * 2;
-        u32 W = 0;
-        u32 P = RandInt<u32>(0, 1);
-        if (P) W = RandInt<u32>(0, 1);
-        u32 U = RandInt<u32>(0, 1);
-        u32 rand = RandInt<u32>(0, 0xF);
-        u32 Rm = RandInt<u32>(0, 14);
-
-        if (W) {
-            while (Rn == Rd) {
-                Rn = RandInt<u32>(0, 6) * 2;
-                Rd = RandInt<u32>(0, 6) * 2;
-            }
-        }
-
-        while (Rm == Rd || Rm == Rd + 1) {
-            Rm = RandInt<u32>(0, 14);
-        }
-
-        u32 assemble_randoms = (Rm << 0) | (rand << 4) | (Rd << 12) | (Rn << 16) | (W << 21) | (U << 23) | (P << 24) | (cond << 28);
-
-        return instructions[inst_index].Bits() | (assemble_randoms & (~instructions[inst_index].Mask()));
-    };
-
-    SECTION("short blocks") {
-        FuzzJitArm(5, 6, 10000, instruction_select);
+        FuzzJitArm(5, 6, 30000, instruction_select);
     }
 }