Debt: backport A64 backend

enable W^X on Apple silicon
2022-05-22 23:26:14 +05:30 · 2022-05-22 23:26:14 +05:30 · df9d373a84
commit df9d373a84
parent 97edb626c7
40 changed files with 13387 additions and 53 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -8,9 +8,25 @@ if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
  set(MASTER_PROJECT ON)
 endif()

+# Add the module directory to the list of paths
+list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
+
+# Arch detection
+include(DetectArchitecture)
+if (NOT DEFINED ARCHITECTURE)
+    message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
+endif()
+message(STATUS "Target architecture: ${ARCHITECTURE}")
+
+set(REQUIRES_NO_EXECUTE_SUPPORT OFF)
+# Apple Silicon chips require W^X
+if(APPLE AND ARCHITECTURE STREQUAL "arm64")
+    set(REQUIRES_NO_EXECUTE_SUPPORT ON)
+endif()
+
 # Dynarmic project options
 option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON)
-option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF)
+option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" ${REQUIRES_NO_EXECUTE_SUPPORT})
 option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF)
 option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF)
 option(DYNARMIC_TESTS "Build tests" ${MASTER_PROJECT})
@ -39,9 +55,6 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
    message(SEND_ERROR "In-source builds are not allowed.")
 endif()

-# Add the module directory to the list of paths
-list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
-
 # Compiler flags
 if (MSVC)
    set(DYNARMIC_CXX_FLAGS
@ -105,13 +118,6 @@ else()
    endif()
 endif()

-# Arch detection
-include(DetectArchitecture)
-if (NOT DEFINED ARCHITECTURE)
-    message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
-endif()
-message(STATUS "Target architecture: ${ARCHITECTURE}")
-
 # Include Boost
 if (NOT TARGET boost)
    if (NOT Boost_INCLUDE_DIRS)
--- a/src/dynarmic/CMakeLists.txt
+++ b/src/dynarmic/CMakeLists.txt
@ -365,55 +365,66 @@ if (ARCHITECTURE STREQUAL "x86_64")
    else()
        target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
    endif()
+
 elseif(ARCHITECTURE STREQUAL "arm64")
-    target_link_libraries(dynarmic PRIVATE $<BUILD_INTERFACE:merry::oaknut>)
-
    target_sources(dynarmic PRIVATE
-        backend/arm64/a32_jitstate.cpp
-        backend/arm64/a32_jitstate.h
-        backend/arm64/abi.cpp
-        backend/arm64/abi.h
-        backend/arm64/devirtualize.h
-        backend/arm64/emit_arm64.cpp
-        backend/arm64/emit_arm64.h
-        backend/arm64/emit_arm64_a32.cpp
-        backend/arm64/emit_arm64_a32_coprocessor.cpp
-        backend/arm64/emit_arm64_a32_memory.cpp
-        backend/arm64/emit_arm64_a64.cpp
-        backend/arm64/emit_arm64_a64_memory.cpp
-        backend/arm64/emit_arm64_cryptography.cpp
-        backend/arm64/emit_arm64_data_processing.cpp
-        backend/arm64/emit_arm64_floating_point.cpp
-        backend/arm64/emit_arm64_packed.cpp
-        backend/arm64/emit_arm64_saturation.cpp
-        backend/arm64/emit_arm64_vector.cpp
-        backend/arm64/emit_arm64_vector_floating_point.cpp
-        backend/arm64/emit_arm64_vector_saturation.cpp
-        backend/arm64/emit_context.h
-        backend/arm64/exclusive_monitor.cpp
-        backend/arm64/fpsr_manager.cpp
-        backend/arm64/fpsr_manager.h
-        backend/arm64/reg_alloc.cpp
-        backend/arm64/reg_alloc.h
-        backend/arm64/stack_layout.h
-        common/spin_lock_arm64.cpp
-        common/spin_lock_arm64.h
+         backend/A64/emitter/a64_emitter.cpp
+         backend/A64/emitter/a64_emitter.h
+         backend/A64/emitter/arm_common.h
+         backend/A64/emitter/code_block.h
+         # backend/A64/a64_emit_a64.cpp
+         # backend/A64/a64_emit_a64.h
+         # backend/A64/a64_exclusive_monitor.cpp
+         # backend/A64/a64_interface.cpp
+         # backend/A64/a64_jitstate.cpp
+         # backend/A64/a64_jitstate.h
+         backend/A64/abi.cpp
+         backend/A64/abi.h
+         backend/A64/block_of_code.cpp
+         backend/A64/block_of_code.h
+         backend/A64/block_range_information.cpp
+         backend/A64/block_range_information.h
+         backend/A64/callback.cpp
+         backend/A64/callback.h
+         backend/A64/constant_pool.cpp
+         backend/A64/constant_pool.h
+         backend/A64/devirtualize.h
+         backend/A64/emit_a64.cpp
+         backend/A64/emit_a64.h
+         # backend/A64/emit_a64_aes.cpp
+         # backend/A64/emit_a64_crc32.cpp
+         backend/A64/emit_a64_data_processing.cpp
+         backend/A64/emit_a64_floating_point.cpp
+         backend/A64/emit_a64_packed.cpp
+         backend/A64/emit_a64_saturation.cpp
+         # backend/A64/emit_a64_sm4.cpp
+         # backend/A64/emit_a64_vector.cpp
+         # backend/A64/emit_a64_vector_floating_point.cpp         
+         backend/A64/exception_handler.h
+         backend/A64/hostloc.cpp
+         backend/A64/hostloc.h
+         backend/A64/jitstate_info.h
+         backend/A64/opcodes.inc
+         backend/A64/perf_map.cpp
+         backend/A64/perf_map.h
+         backend/A64/reg_alloc.cpp
+         backend/A64/reg_alloc.h
    )
-
+    
    if ("A32" IN_LIST DYNARMIC_FRONTENDS)
        target_sources(dynarmic PRIVATE
-            backend/arm64/a32_address_space.cpp
-            backend/arm64/a32_address_space.h
-            backend/arm64/a32_core.h
-            backend/arm64/a32_interface.cpp
-
-            # Move this to the list below when implemented
-            backend/arm64/a64_interface.cpp
+            backend/A64/a32_emit_a64.cpp
+            backend/A64/a32_emit_a64.h
+            backend/A64/a32_interface.cpp
+            backend/A64/a32_jitstate.cpp
+            backend/A64/a32_jitstate.h
        )
    endif()
-
-    if ("A64" IN_LIST DYNARMIC_FRONTENDS)
-        message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
+    
+    if (UNIX)
+        target_sources(dynarmic PRIVATE backend/A64/exception_handler_posix.cpp)
+    else()
+        target_sources(dynarmic PRIVATE backend/A64/exception_handler_generic.cpp)
    endif()
 else()
    message(FATAL_ERROR "Unsupported architecture")
--- a/src/dynarmic/backend/A64/a32_emit_a64.cpp
+++ b/src/dynarmic/backend/A64/a32_emit_a64.cpp
--- a/src/dynarmic/backend/A64/a32_emit_a64.h
+++ b/src/dynarmic/backend/A64/a32_emit_a64.h
@ -0,0 +1,138 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <functional>
+#include <optional>
+#include <set>
+#include <tuple>
+#include <unordered_map>
+
+#include "backend/A64/a32_jitstate.h"
+#include "backend/A64/block_range_information.h"
+#include "backend/A64/emit_a64.h"
+#include "backend/A64/exception_handler.h"
+#include "dynarmic/A32/a32.h"
+#include "dynarmic/A32/config.h"
+#include "frontend/A32/location_descriptor.h"
+#include "frontend/ir/terminal.h"
+
+namespace Dynarmic::BackendA64 {
+
+struct A64State;
+class RegAlloc;
+
+struct A32EmitContext final : public EmitContext {
+    A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
+    A32::LocationDescriptor Location() const;
+    bool IsSingleStep() const;
+    FP::RoundingMode FPSCR_RMode() const override;
+    u32 FPCR() const override;
+    bool FPSCR_FTZ() const override;
+    bool FPSCR_DN() const override;
+    std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
+};
+
+class A32EmitA64 final : public EmitA64 {
+public:
+    A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
+    ~A32EmitA64() override;
+
+    /**
+     * Emit host machine code for a basic block with intermediate representation `ir`.
+     * @note ir is modified.
+     */
+    BlockDescriptor Emit(IR::Block& ir);
+
+    void ClearCache() override;
+
+    void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
+
+    void FastmemCallback(CodePtr PC);
+
+protected:
+    const A32::UserConfig config;
+    A32::Jit* jit_interface;
+    BlockRangeInformation<u32> block_ranges;
+    ExceptionHandler exception_handler;
+
+    void EmitCondPrelude(const A32EmitContext& ctx);
+
+    struct FastDispatchEntry {
+        u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
+        const void* code_ptr = nullptr;
+    };
+    static_assert(sizeof(FastDispatchEntry) == 0x10);
+    static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
+    static constexpr size_t fast_dispatch_table_size = 0x10000;
+    std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
+    void ClearFastDispatchTable();
+
+    using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
+    std::set<DoNotFastmemMarker> do_not_fastmem;
+    DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
+    void DoNotFastmem(const DoNotFastmemMarker& marker);
+    bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
+
+    const void* read_memory_8;
+    const void* read_memory_16;
+    const void* read_memory_32;
+    const void* read_memory_64;
+    const void* write_memory_8;
+    const void* write_memory_16;
+    const void* write_memory_32;
+    const void* write_memory_64;
+    void GenMemoryAccessors();
+    template<typename T>
+    void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
+    template<typename T>
+    void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
+
+    const void* terminal_handler_pop_rsb_hint;
+    const void* terminal_handler_fast_dispatch_hint = nullptr;
+    FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
+    void GenTerminalHandlers();
+
+    // Microinstruction emitters
+#define OPCODE(...)
+#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
+#define A64OPC(...)
+#include "frontend/ir/opcodes.inc"
+#undef OPCODE
+#undef A32OPC
+#undef A64OPC
+
+    // Helpers
+    std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
+
+    // Fastmem
+    struct FastmemPatchInfo {
+        std::function<void()> callback;
+    };
+    std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
+
+    // Terminal instruction emitters
+    void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
+    void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+
+    // Patching
+    void Unpatch(const IR::LocationDescriptor& target_desc) override;
+    void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
+    void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
+    void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/a32_interface.cpp
+++ b/src/dynarmic/backend/A64/a32_interface.cpp
@ -0,0 +1,323 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <memory>
+
+#include <boost/icl/interval_set.hpp>
+#include <fmt/format.h>
+
+#include <dynarmic/A32/a32.h>
+#include <dynarmic/A32/context.h>
+
+#include "backend/A64/a32_emit_a64.h"
+#include "backend/A64/a32_jitstate.h"
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/callback.h"
+#include "backend/A64/devirtualize.h"
+#include "backend/A64/jitstate_info.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/llvm_disassemble.h"
+#include "common/scope_exit.h"
+#include "frontend/A32/translate/translate.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/location_descriptor.h"
+#include "ir_opt/passes.h"
+
+namespace Dynarmic::A32 {
+
+using namespace BackendA64;
+
+static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
+    return RunCodeCallbacks{
+        std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
+        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
+        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
+        reinterpret_cast<u64>(config.fastmem_pointer),
+    };
+}
+
+struct Jit::Impl {
+    Impl(Jit* jit, A32::UserConfig config)
+            : block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
+            , emitter(block_of_code, config, jit)
+            , config(std::move(config))
+            , jit_interface(jit)
+    {}
+
+    A32JitState jit_state;
+    BlockOfCode block_of_code;
+    A32EmitA64 emitter;
+
+    const A32::UserConfig config;
+
+    // Requests made during execution to invalidate the cache are queued up here.
+    size_t invalid_cache_generation = 0;
+    boost::icl::interval_set<u32> invalid_cache_ranges;
+    bool invalidate_entire_cache = false;
+
+    void Execute() {
+        const CodePtr current_codeptr = [this]{
+            // RSB optimization
+            const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
+            if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
+                jit_state.rsb_ptr = new_rsb_ptr;
+                return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
+            }
+
+            return GetCurrentBlock();
+        }();
+
+        block_of_code.RunCode(&jit_state, current_codeptr);
+    }
+
+    void Step() {
+        block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
+    }
+
+    std::string Disassemble(const IR::LocationDescriptor& descriptor) {
+        auto block = GetBasicBlock(descriptor);
+        std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
+#ifdef DYNARMIC_USE_LLVM
+        for (const u32* pos = reinterpret_cast<const u32*>(block.entrypoint);
+             reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(block.entrypoint) + block.size; pos += 1) {
+            fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
+            fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
+            result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));        
+        }
+#endif
+        return result;
+    }
+
+    void PerformCacheInvalidation() {
+        if (invalidate_entire_cache) {
+            jit_state.ResetRSB();
+            block_of_code.ClearCache();
+            emitter.ClearCache();
+
+            invalid_cache_ranges.clear();
+            invalidate_entire_cache = false;
+            invalid_cache_generation++;
+            return;
+        }
+
+        if (invalid_cache_ranges.empty()) {
+            return;
+        }
+
+        jit_state.ResetRSB();
+        emitter.InvalidateCacheRanges(invalid_cache_ranges);
+        invalid_cache_ranges.clear();
+        invalid_cache_generation++;
+    }
+
+    void RequestCacheInvalidation() {
+        if (jit_interface->is_executing) {
+            jit_state.halt_requested = true;
+            return;
+        }
+
+        PerformCacheInvalidation();
+    }
+
+private:
+    Jit* jit_interface;
+
+    static CodePtr GetCurrentBlockThunk(void* this_voidptr) {
+        Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
+        return this_.GetCurrentBlock();
+    }
+
+    IR::LocationDescriptor GetCurrentLocation() const {
+        return IR::LocationDescriptor{jit_state.GetUniqueHash()};
+    }
+
+    CodePtr GetCurrentBlock() {
+        return GetBasicBlock(GetCurrentLocation()).entrypoint;
+    }
+
+    CodePtr GetCurrentSingleStep() {
+        return GetBasicBlock(A32::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true)).entrypoint;
+    }
+
+    A32EmitA64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
+        auto block = emitter.GetBasicBlock(descriptor);
+        if (block)
+            return *block;
+
+        constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
+        if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
+            invalidate_entire_cache = true;
+            PerformCacheInvalidation();
+        }
+
+        IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions});
+        if (config.enable_optimizations) {
+            Optimization::A32GetSetElimination(ir_block);
+            Optimization::DeadCodeElimination(ir_block);
+            Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
+            Optimization::ConstantPropagation(ir_block);
+            Optimization::DeadCodeElimination(ir_block);
+            Optimization::A32MergeInterpretBlocksPass(ir_block, config.callbacks);
+        }
+        Optimization::VerificationPass(ir_block);
+        return emitter.Emit(ir_block);
+    }
+};
+
+Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {}
+
+Jit::~Jit() = default;
+
+void Jit::Run() {
+    ASSERT(!is_executing);
+    is_executing = true;
+    SCOPE_EXIT { this->is_executing = false; };
+
+    impl->jit_state.halt_requested = false;
+
+    impl->Execute();
+
+    impl->PerformCacheInvalidation();
+}
+
+void Jit::Step() {
+    ASSERT(!is_executing);
+    is_executing = true;
+    SCOPE_EXIT { this->is_executing = false; };
+
+    impl->jit_state.halt_requested = true;
+
+    impl->Step();
+
+    impl->PerformCacheInvalidation();
+}
+
+void Jit::ClearCache() {
+    impl->invalidate_entire_cache = true;
+    impl->RequestCacheInvalidation();
+}
+
+void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
+    impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
+    impl->RequestCacheInvalidation();
+}
+
+void Jit::Reset() {
+    ASSERT(!is_executing);
+    impl->jit_state = {};
+}
+
+void Jit::HaltExecution() {
+    impl->jit_state.halt_requested = true;
+}
+
+std::array<u32, 16>& Jit::Regs() {
+    return impl->jit_state.Reg;
+}
+const std::array<u32, 16>& Jit::Regs() const {
+    return impl->jit_state.Reg;
+}
+
+std::array<u32, 64>& Jit::ExtRegs() {
+    return impl->jit_state.ExtReg;
+}
+
+const std::array<u32, 64>& Jit::ExtRegs() const {
+    return impl->jit_state.ExtReg;
+}
+
+u32 Jit::Cpsr() const {
+    return impl->jit_state.Cpsr();
+}
+
+void Jit::SetCpsr(u32 value) {
+    return impl->jit_state.SetCpsr(value);
+}
+
+u32 Jit::Fpscr() const {
+    return impl->jit_state.Fpscr();
+}
+
+void Jit::SetFpscr(u32 value) {
+    return impl->jit_state.SetFpscr(value);
+}
+
+Context Jit::SaveContext() const {
+    Context ctx;
+    SaveContext(ctx);
+    return ctx;
+}
+
+struct Context::Impl {
+    A32JitState jit_state;
+    size_t invalid_cache_generation;
+};
+
+Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
+Context::~Context() = default;
+Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
+Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
+Context& Context::operator=(const Context& ctx) {
+    *impl = *ctx.impl;
+    return *this;
+}
+Context& Context::operator=(Context&& ctx) noexcept {
+    impl = std::move(ctx.impl);
+    return *this;
+}
+
+std::array<std::uint32_t, 16>& Context::Regs() {
+    return impl->jit_state.Reg;
+}
+const std::array<std::uint32_t, 16>& Context::Regs() const {
+    return impl->jit_state.Reg;
+}
+std::array<std::uint32_t, 64>& Context::ExtRegs() {
+    return impl->jit_state.ExtReg;
+}
+const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
+    return impl->jit_state.ExtReg;
+}
+
+std::uint32_t Context::Cpsr() const {
+    return impl->jit_state.Cpsr();
+}
+void Context::SetCpsr(std::uint32_t value) {
+    impl->jit_state.SetCpsr(value);
+}
+
+std::uint32_t Context::Fpscr() const {
+    return impl->jit_state.Fpscr();
+}
+void Context::SetFpscr(std::uint32_t value) {
+    return impl->jit_state.SetFpscr(value);
+}
+
+void Jit::SaveContext(Context& ctx) const {
+    ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
+    ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
+}
+
+void Jit::LoadContext(const Context& ctx) {
+    bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
+    impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
+}
+
+std::string Jit::Disassemble() const {
+    std::string result;
+#ifdef DYNARMIC_USE_LLVM
+    for (const u32* pos = reinterpret_cast<const u32*>(impl->block_of_code.GetCodeBegin());
+         reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(impl->block_of_code.GetCodePtr()); pos += 1) {
+        fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
+        fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
+        result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
+    }
+#endif
+    return result;
+}
+
+} // namespace Dynarmic::A32
--- a/src/dynarmic/backend/A64/a32_jitstate.cpp
+++ b/src/dynarmic/backend/A64/a32_jitstate.cpp
@ -0,0 +1,172 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "backend/A64/a32_jitstate.h"
+#include "backend/A64/block_of_code.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "frontend/A32/location_descriptor.h"
+
+namespace Dynarmic::BackendA64 {
+
+/**
+ * CPSR Bits
+ * =========
+ *
+ * ARM CPSR flags
+ * --------------
+ * N        bit 31       Negative flag
+ * Z        bit 30       Zero flag
+ * C        bit 29       Carry flag
+ * V        bit 28       oVerflow flag
+ * Q        bit 27       Saturation flag
+ * IT[1:0]  bits 25-26   If-Then execution state (lower 2 bits)
+ * J        bit 24       Jazelle instruction set flag
+ * GE       bits 16-19   Greater than or Equal flags
+ * IT[7:2]  bits 10-15   If-Then execution state (upper 6 bits)
+ * E        bit 9        Data Endianness flag
+ * A        bit 8        Disable imprecise Aborts
+ * I        bit 7        Disable IRQ interrupts
+ * F        bit 6        Disable FIQ interrupts
+ * T        bit 5        Thumb instruction set flag
+ * M        bits 0-4     Processor Mode bits
+ *
+ * A64 flags
+ * -------------------
+ * N    bit 31       Negative flag
+ * Z    bit 30       Zero flag
+ * C    bit 29       Carry flag
+ * V    bit 28       oVerflow flag
+ */
+
+u32 A32JitState::Cpsr() const {
+    DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
+    DEBUG_ASSERT((cpsr_q & ~1) == 0);
+    DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
+
+    u32 cpsr = 0;
+
+    // NZCV flags
+    cpsr |= cpsr_nzcv;
+    // Q flag
+    cpsr |= cpsr_q ? 1 << 27 : 0;
+    // GE flags
+    cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
+    cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
+    cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
+    cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
+    // E flag, T flag
+    cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
+    cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
+    // IT state
+    cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
+    cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
+    // Other flags
+    cpsr |= cpsr_jaifm;
+
+    return cpsr;
+}
+
+void A32JitState::SetCpsr(u32 cpsr) {
+    // NZCV flags
+    cpsr_nzcv = cpsr & 0xF0000000;
+    // Q flag
+    cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
+    // GE flags
+    cpsr_ge = 0;
+    cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
+    cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
+    cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
+    cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
+
+    upper_location_descriptor &= 0xFFFF0000;
+    // E flag, T flag
+    upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
+    upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
+    // IT state
+    upper_location_descriptor |= (cpsr >>  0) & 0b11111100'00000000;
+    upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
+
+    // Other flags
+    cpsr_jaifm = cpsr & 0x010001DF;
+}
+
+void A32JitState::ResetRSB() {
+    rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
+    rsb_codeptrs.fill(0);
+}
+
+/**
+ * FPSCR
+ * =========================
+ *
+ * VFP FPSCR cumulative exception bits
+ * -----------------------------------
+ * IDC  bit 7   Input Denormal cumulative exception bit       // Only ever set when FPSCR.FTZ = 1
+ * IXC  bit 4   Inexact cumulative exception bit
+ * UFC  bit 3   Underflow cumulative exception bit
+ * OFC  bit 2   Overflow cumulative exception bit
+ * DZC  bit 1   Division by Zero cumulative exception bit
+ * IOC  bit 0   Invalid Operation cumulative exception bit
+ *
+ * VFP FPSCR exception trap enables
+ * --------------------------------
+ * IDE  bit 15  Input Denormal exception trap enable
+ * IXE  bit 12  Inexact exception trap enable
+ * UFE  bit 11  Underflow exception trap enable
+ * OFE  bit 10  Overflow exception trap enable
+ * DZE  bit 9   Division by Zero exception trap enable
+ * IOE  bit 8   Invalid Operation exception trap enable
+ *
+ * VFP FPSCR mode bits
+ * -------------------
+ * AHP      bit 26      Alternate half-precision
+ * DN       bit 25      Default NaN
+ * FZ       bit 24      Flush to Zero
+ * RMode    bits 22-23  Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
+ * Stride   bits 20-21  Vector stride
+ * Len      bits 16-18  Vector length
+ */
+
+// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
+constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
+constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
+
+u32 A32JitState::Fpscr() const {
+    DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
+
+    const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
+
+    u32 FPSCR = fpcr_mode | fpsr_nzcv;
+    FPSCR |= (guest_fpsr & 0x1F);
+    FPSCR |= fpsr_exc;
+
+    return FPSCR;
+}
+
+void A32JitState::SetFpscr(u32 FPSCR) {
+    // Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
+    static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
+
+    upper_location_descriptor &= 0x0000FFFF;
+    upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
+
+    fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
+    guest_fpcr = 0;
+    guest_fpsr = 0;
+
+    // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
+    fpsr_exc = FPSCR & 0x9F;
+
+    // Mode Bits
+    guest_fpcr |= FPSCR & 0x07C09F00;
+
+    // Exceptions
+    guest_fpsr |= FPSCR & 0x9F;
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/a32_jitstate.h
+++ b/src/dynarmic/backend/A64/a32_jitstate.h
@ -0,0 +1,111 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+class BlockOfCode;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4324) // Structure was padded due to alignment specifier
+#endif
+
+struct A32JitState {
+    using ProgramCounterType = u32;
+
+    A32JitState() { ResetRSB(); }
+
+    std::array<u32, 16> Reg{}; // Current register file.
+    // TODO: Mode-specific register sets unimplemented.
+
+    u32 upper_location_descriptor = 0;
+
+    u32 cpsr_ge = 0;
+    u32 cpsr_q = 0;
+    u32 cpsr_nzcv = 0;
+    u32 cpsr_jaifm = 0;
+    u32 Cpsr() const;
+    void SetCpsr(u32 cpsr);
+
+    alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
+
+    static constexpr size_t SpillCount = 64;
+    std::array<u64, SpillCount> Spill{}; // Spill.
+    static size_t GetSpillLocationOffsetFromIndex(size_t i) {
+        return static_cast<u64>(offsetof(A32JitState, Spill) + i * sizeof(u64));
+    }
+
+    // For internal use (See: BlockOfCode::RunCode)
+    u64 guest_fpcr = 0;
+    u64 guest_fpsr = 0;
+    u64 save_host_FPCR = 0;
+    s64 cycles_to_run = 0;
+    s64 cycles_remaining = 0;
+    bool halt_requested = false;
+    bool check_bit = false;
+
+    // Exclusive state
+    static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
+    u32 exclusive_state = 0;
+    u32 exclusive_address = 0;
+
+    static constexpr size_t RSBSize = 8; // MUST be a power of 2.
+    static constexpr size_t RSBPtrMask = RSBSize - 1;
+    u32 rsb_ptr = 0;
+    std::array<u64, RSBSize> rsb_location_descriptors;
+    std::array<u64, RSBSize> rsb_codeptrs;
+    void ResetRSB();
+
+    u32 fpsr_exc = 0;
+    u32 fpsr_qc = 0; // Dummy value
+    u32 fpsr_nzcv = 0;
+    u32 Fpscr() const;
+    void SetFpscr(u32 FPSCR);
+
+    u64 GetUniqueHash() const noexcept {
+        return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
+    }
+
+    void TransferJitState(const A32JitState& src, bool reset_rsb) {
+        Reg = src.Reg;
+        upper_location_descriptor = src.upper_location_descriptor;
+        cpsr_ge = src.cpsr_ge;
+        cpsr_q = src.cpsr_q;
+        cpsr_nzcv = src.cpsr_nzcv;
+        cpsr_jaifm = src.cpsr_jaifm;
+        ExtReg = src.ExtReg;
+        guest_fpcr = src.guest_fpcr;
+        guest_fpsr = src.guest_fpsr;
+        fpsr_exc = src.fpsr_exc;
+        fpsr_qc = src.fpsr_qc;
+        fpsr_nzcv = src.fpsr_nzcv;
+
+        exclusive_state = 0;
+        exclusive_address = 0;
+
+        if (reset_rsb) {
+            ResetRSB();
+        } else {
+            rsb_ptr = src.rsb_ptr;
+            rsb_location_descriptors = src.rsb_location_descriptors;
+            rsb_codeptrs = src.rsb_codeptrs;
+        }
+    }
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+using CodePtr = const void*;
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/abi.cpp
+++ b/src/dynarmic/backend/A64/abi.cpp
@ -0,0 +1,87 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// 20th Sep 2018: This code was modified for Dynarmic.
+
+#include <algorithm>
+#include <vector>
+
+#include "backend/A64/abi.h"
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "common/iterator_util.h"
+
+namespace Dynarmic::BackendA64 {
+
+template<typename RegisterArrayT>
+void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
+    u32 gprs = 0 , fprs = 0;
+
+    for (HostLoc reg : regs) {
+        if (HostLocIsGPR(reg)) {
+            gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
+        } else if (HostLocIsFPR(reg)) {
+            fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
+        }
+    }
+
+    code.fp_emitter.ABI_PushRegisters(fprs);
+    code.ABI_PushRegisters(gprs);
+}
+
+template<typename RegisterArrayT>
+void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
+    u32 gprs = 0, fprs = 0;
+
+    for (HostLoc reg : regs) {
+        if (HostLocIsGPR(reg)) {
+            gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
+        } else if (HostLocIsFPR(reg)) {
+            fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
+        }
+    }
+
+    code.ABI_PopRegisters(gprs);
+    code.fp_emitter.ABI_PopRegisters(fprs);
+}
+
+void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
+    ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
+}
+
+void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
+    ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
+}
+
+void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
+    ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
+}
+
+void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
+    ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
+}
+
+void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
+    std::vector<HostLoc> regs;
+    std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
+    ABI_PushRegistersAndAdjustStack(code, regs);
+}
+
+void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
+    std::vector<HostLoc> regs;
+    std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
+    ABI_PopRegistersAndAdjustStack(code, regs);
+}
+
+} // namespace Dynarmic::BackendX64
--- a/src/dynarmic/backend/A64/abi.h
+++ b/src/dynarmic/backend/A64/abi.h
@ -0,0 +1,110 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+#pragma once
+
+#include <array>
+
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/hostloc.h"
+
+namespace Dynarmic::BackendA64 {
+
+constexpr HostLoc ABI_RETURN = HostLoc::X0;
+
+constexpr HostLoc ABI_PARAM1 = HostLoc::X0;
+constexpr HostLoc ABI_PARAM2 = HostLoc::X1;
+constexpr HostLoc ABI_PARAM3 = HostLoc::X2;
+constexpr HostLoc ABI_PARAM4 = HostLoc::X3;
+constexpr HostLoc ABI_PARAM5 = HostLoc::X4;
+constexpr HostLoc ABI_PARAM6 = HostLoc::X5;
+constexpr HostLoc ABI_PARAM7 = HostLoc::X6;
+constexpr HostLoc ABI_PARAM8 = HostLoc::X7;
+
+constexpr std::array<HostLoc, 43> ABI_ALL_CALLER_SAVE = {  
+    HostLoc::X0,
+    HostLoc::X1,
+    HostLoc::X2,
+    HostLoc::X3,
+    HostLoc::X4,
+    HostLoc::X5,
+    HostLoc::X6,
+    HostLoc::X7,
+    HostLoc::X8,
+    HostLoc::X9,
+    HostLoc::X10,
+    HostLoc::X11,
+    HostLoc::X12,
+    HostLoc::X13,
+    HostLoc::X14,
+    HostLoc::X15,
+    HostLoc::X16,
+    HostLoc::X17,
+    HostLoc::X18,
+
+    HostLoc::Q0,
+    HostLoc::Q1,
+    HostLoc::Q2,
+    HostLoc::Q3,
+    HostLoc::Q4,
+    HostLoc::Q5,
+    HostLoc::Q6,
+    HostLoc::Q7,
+   
+    HostLoc::Q16,
+    HostLoc::Q17,
+    HostLoc::Q18,
+    HostLoc::Q19,
+    HostLoc::Q20,
+    HostLoc::Q21,
+    HostLoc::Q22,
+    HostLoc::Q23,
+    HostLoc::Q24,
+    HostLoc::Q25,
+    HostLoc::Q26,
+    HostLoc::Q27,
+    HostLoc::Q28,
+    HostLoc::Q29,
+    HostLoc::Q30,
+    HostLoc::Q31,
+};
+
+constexpr std::array<HostLoc, 20> ABI_ALL_CALLEE_SAVE = {
+    HostLoc::X19,
+    HostLoc::X20,
+    HostLoc::X21,
+    HostLoc::X22,
+    HostLoc::X23,
+    HostLoc::X24,
+    HostLoc::X25,
+    HostLoc::X26,
+    HostLoc::X27,
+    HostLoc::X28,
+    HostLoc::X29,
+    HostLoc::X30,
+
+    HostLoc::Q8,
+    HostLoc::Q9,
+    HostLoc::Q10,
+    HostLoc::Q11,
+    HostLoc::Q12,
+    HostLoc::Q13,
+    HostLoc::Q14,
+    HostLoc::Q15,
+};
+
+constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
+
+static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 63, "Invalid total number of registers");
+
+void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
+void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
+void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
+void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
+
+void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
+void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
+
+} // namespace Dynarmic::BackendX64
--- a/src/dynarmic/backend/A64/block_of_code.cpp
+++ b/src/dynarmic/backend/A64/block_of_code.cpp
@ -0,0 +1,336 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <array>
+#include <cstring>
+#include <limits>
+
+#include "backend/A64/a32_jitstate.h"
+#include "backend/A64/abi.h"
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/perf_map.h"
+#include "common/assert.h"
+
+#ifdef _WIN32
+    #include <windows.h>
+#else
+    #include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <pthread.h>
+#endif
+
+namespace Dynarmic::BackendA64 {
+
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN  = Arm64Gen::ARM64Reg::X0;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
+
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
+
+const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
+
+const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
+                                                         BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
+                                                         BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
+                                                         BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
+
+namespace {
+
+constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
+constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
+
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+void ProtectMemory([[maybe_unused]] const void* base, [[maybe_unused]] size_t size, bool is_executable) {
+#if defined(_WIN32)
+    DWORD oldProtect = 0;
+    VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
+#elif defined(__APPLE__)
+    pthread_jit_write_protect_np(is_executable);
+#else
+    static const size_t pageSize = sysconf(_SC_PAGESIZE);
+    const size_t iaddr = reinterpret_cast<size_t>(base);
+    const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
+    const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
+    mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
+#endif
+}
+#endif
+
+} // anonymous namespace
+
+BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
+        : fp_emitter(this)
+        , cb(std::move(cb))
+        , jsi(jsi)
+        , constant_pool(*this) {
+    AllocCodeSpace(TOTAL_CODE_SIZE);
+    EnableWriting();
+    GenRunCode();
+}
+
+void BlockOfCode::PreludeComplete() {
+    prelude_complete = true;
+    near_code_begin = GetCodePtr();
+    far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
+    FlushIcache();
+    ClearCache();
+    DisableWriting();
+}
+
+void BlockOfCode::EnableWriting() {
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+    ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
+#endif
+}
+
+void BlockOfCode::DisableWriting() {
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+    ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
+#endif
+}
+
+void BlockOfCode::ClearCache() {
+    ASSERT(prelude_complete);
+    in_far_code = false;
+    near_code_ptr = near_code_begin;
+    far_code_ptr = far_code_begin;
+    SetCodePtr(near_code_begin);
+    constant_pool.Clear();
+}
+
+size_t BlockOfCode::SpaceRemaining() const {
+    ASSERT(prelude_complete);
+    // This function provides an underestimate of near-code-size but that's okay.
+    // (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
+    // These are offsets from Xbyak::CodeArray::top_.
+    std::size_t far_code_offset, near_code_offset;
+    if (in_far_code) {
+        near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
+        far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
+    } else {
+        near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
+        far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
+    }
+    if (far_code_offset > TOTAL_CODE_SIZE)
+        return 0;
+    if (near_code_offset > FAR_CODE_OFFSET)
+        return 0;
+    return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
+}
+
+void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
+    run_code(jit_state, code_ptr);
+}
+
+void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
+    step_code(jit_state, code_ptr);
+}
+
+void BlockOfCode::ReturnFromRunCode(bool fpscr_already_exited) {
+    size_t index = 0;
+    if (fpscr_already_exited)
+        index |= FPSCR_ALREADY_EXITED;
+    B(return_from_run_code[index]);
+}
+
+void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) {
+    size_t index = FORCE_RETURN;
+    if (fpscr_already_exited)
+        index |= FPSCR_ALREADY_EXITED;
+    B(return_from_run_code[index]);
+}
+
+void BlockOfCode::GenRunCode() {
+    const u8* loop, *enter_fpscr_then_loop;
+
+    AlignCode16();
+    run_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
+
+    // This serves two purposes:
+    // 1. It saves all the registers we as a callee need to save.
+    // 2. It aligns the stack so that the code the JIT emits can assume
+    //    that the stack is appropriately aligned for CALLs.
+    ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
+
+    MOV(Arm64Gen::X28, ABI_PARAM1);
+    MOVI2R(Arm64Gen::X27, cb.value_in_X27);
+    MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register
+
+    cb.GetTicksRemaining->EmitCall(*this);
+    STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
+    MOV(Arm64Gen::X26, ABI_RETURN);
+
+    SwitchFpscrOnEntry();
+    BR(Arm64Gen::X25);
+
+    AlignCode16();
+    step_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
+    ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
+
+    MOV(Arm64Gen::X28, ABI_PARAM1);
+    
+    MOVI2R(Arm64Gen::X26, 1);
+    STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run);    
+
+    SwitchFpscrOnEntry();
+    BR(ABI_PARAM2);
+
+    enter_fpscr_then_loop = GetCodePtr();
+    SwitchFpscrOnEntry();
+    loop = GetCodePtr();
+    cb.LookupBlock->EmitCall(*this);
+    BR(ABI_RETURN);    
+
+    // Return from run code variants
+    const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){
+        if (!force_return) {
+            CMP(Arm64Gen::X26, Arm64Gen::ZR);
+            B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop);
+        }
+
+        if (!fpscr_already_exited) {
+            SwitchFpscrOnExit();
+        }
+
+        cb.AddTicks->EmitCall(*this, [this](RegList param) {
+            LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
+            SUB(param[0], param[0], Arm64Gen::X26);
+        });
+
+        ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
+        RET();
+    };
+
+    return_from_run_code[0] = AlignCode16();
+    emit_return_from_run_code(false, false);
+
+    return_from_run_code[FPSCR_ALREADY_EXITED] = AlignCode16();
+    emit_return_from_run_code(true, false);
+
+    return_from_run_code[FORCE_RETURN] = AlignCode16();
+    emit_return_from_run_code(false, true);
+
+    return_from_run_code[FPSCR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
+    emit_return_from_run_code(true, true);
+
+    PerfMapRegister(run_code, GetCodePtr(), "dynarmic_dispatcher");
+}
+
+void BlockOfCode::SwitchFpscrOnEntry() {
+    MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
+    STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
+    
+    LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
+    _MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
+    LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
+    _MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1);    
+}
+
+void BlockOfCode::SwitchFpscrOnExit() {
+    MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
+    STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
+    MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPSR);
+    STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
+
+    LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
+    _MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
+}
+
+void BlockOfCode::UpdateTicks() {
+    cb.AddTicks->EmitCall(*this, [this](RegList param) {
+        LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
+        SUB(param[0], param[0], Arm64Gen::X26);
+    });
+
+    cb.GetTicksRemaining->EmitCall(*this);
+    STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
+    MOV(Arm64Gen::X26, ABI_RETURN);
+}
+
+void BlockOfCode::LookupBlock() {
+    cb.LookupBlock->EmitCall(*this);
+}
+
+void BlockOfCode::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
+    ASSERT_MSG(!in_far_code, "Can't patch when in far code, yet!");
+    constant_pool.EmitPatchLDR(Rt, lower, upper);
+}
+
+void BlockOfCode::PatchConstPool() {
+    constant_pool.PatchPool();
+}
+
+void BlockOfCode::SwitchToFarCode() {
+    ASSERT(prelude_complete);
+    ASSERT(!in_far_code);
+    in_far_code = true;
+    near_code_ptr = GetCodePtr();
+    SetCodePtr(far_code_ptr);
+
+    ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
+}
+
+void BlockOfCode::SwitchToNearCode() {
+    ASSERT(prelude_complete);
+    ASSERT(in_far_code);
+    in_far_code = false;
+    far_code_ptr = GetCodePtr();
+    SetCodePtr(near_code_ptr);
+}
+
+CodePtr BlockOfCode::GetCodeBegin() const {
+    return near_code_begin;
+}
+
+u8* BlockOfCode::GetRegion() const {
+    return region;
+}
+
+std::size_t BlockOfCode::GetRegionSize() const {
+    return total_region_size;
+}
+
+void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {    
+    ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
+
+    void* ret = GetWritableCodePtr();
+    region_size += alloc_size;
+    SetCodePtr(GetCodePtr() + alloc_size);
+    memset(ret, 0, alloc_size);    
+    return ret;
+}
+
+void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
+    u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
+    ARM64XEmitter::SetCodePtr(ptr);
+}
+
+void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
+    size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
+    ASSERT(current_size <= size);
+    for (u32 i = 0; i < (size - current_size) / 4; i++) {
+        HINT(Arm64Gen::HINT_NOP);
+    }
+}
+
+//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
+//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
+//    return cpu_info.has(type);
+//#else
+//    (void)type;
+//    return false;
+//#endif
+//}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/block_of_code.h
+++ b/src/dynarmic/backend/A64/block_of_code.h
@ -0,0 +1,147 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <type_traits>
+
+#include "backend/A64/callback.h"
+#include "backend/A64/constant_pool.h"
+#include "backend/A64/jitstate_info.h"
+#include "backend/A64/emitter/a64_emitter.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+using CodePtr = const void*;
+
+struct RunCodeCallbacks {
+    std::unique_ptr<Callback> LookupBlock;
+    std::unique_ptr<Callback> AddTicks;
+    std::unique_ptr<Callback> GetTicksRemaining;
+    u64 value_in_X27;
+};
+
+class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
+public:
+    BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
+    BlockOfCode(const BlockOfCode&) = delete;
+
+
+    /// Call when external emitters have finished emitting their preludes.
+    void PreludeComplete();
+
+    /// Change permissions to RW. This is required to support systems with W^X enforced.
+    void EnableWriting();
+    /// Change permissions to RX. This is required to support systems with W^X enforced.
+    void DisableWriting();
+
+    /// Clears this block of code and resets code pointer to beginning.
+    void ClearCache();
+    /// Calculates how much space is remaining to use. This is the minimum of near code and far code.
+    size_t SpaceRemaining() const;
+
+    /// Runs emulated code from code_ptr.
+    void RunCode(void* jit_state, CodePtr code_ptr) const;
+    /// Runs emulated code from code_ptr for a single cycle.
+    void StepCode(void* jit_state, CodePtr code_ptr) const;
+    /// Code emitter: Returns to dispatcher
+    void ReturnFromRunCode(bool fpscr_already_exited = false);
+    /// Code emitter: Returns to dispatcher, forces return to host
+    void ForceReturnFromRunCode(bool fpscr_already_exited = false);
+    /// Code emitter: Makes guest FPSR and FPCR the current FPSR and FPCR
+    void SwitchFpscrOnEntry();
+    /// Code emitter: Makes saved host FPCR the current FPCR
+    void SwitchFpscrOnExit();
+    /// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
+    /// @note this clobbers ABI caller-save registers
+    void UpdateTicks();
+    /// Code emitter: Performs a block lookup based on current state
+    /// @note this clobbers ABI caller-save registers
+    void LookupBlock();
+
+    u64 MConst(u64 lower, u64 upper = 0);
+
+    void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
+
+    void PatchConstPool();
+
+    /// Far code sits far away from the near code. Execution remains primarily in near code.
+    /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
+    void SwitchToFarCode();
+    void SwitchToNearCode();
+
+    CodePtr GetCodeBegin() const;
+    u8* GetRegion() const;
+    std::size_t GetRegionSize() const;
+
+    const void* GetReturnFromRunCodeAddress() const {
+        return return_from_run_code[0];
+    }
+
+    const void* GetForceReturnFromRunCodeAddress() const {
+        return return_from_run_code[FORCE_RETURN];
+    }
+
+    /// Allocate memory of `size` bytes from the same block of memory the code is in.
+    /// This is useful for objects that need to be placed close to or within code.
+    /// The lifetime of this memory is the same as the code around it.
+    void* AllocateFromCodeSpace(size_t size);
+
+    void SetCodePtr(CodePtr code_ptr);
+    void EnsurePatchLocationSize(CodePtr begin, size_t size);
+
+    Arm64Gen::ARM64FloatEmitter fp_emitter;
+
+    // ABI registers
+
+    static const Arm64Gen::ARM64Reg ABI_RETURN;
+    static const Arm64Gen::ARM64Reg ABI_RETURN2;
+    static const Arm64Gen::ARM64Reg ABI_PARAM1;
+    static const Arm64Gen::ARM64Reg ABI_PARAM2;
+    static const Arm64Gen::ARM64Reg ABI_PARAM3;
+    static const Arm64Gen::ARM64Reg ABI_PARAM4;
+    static const Arm64Gen::ARM64Reg ABI_PARAM5;
+    static const Arm64Gen::ARM64Reg ABI_PARAM6;
+    static const Arm64Gen::ARM64Reg ABI_PARAM7;
+    static const Arm64Gen::ARM64Reg ABI_PARAM8;
+
+    static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
+
+    static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
+
+    // bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
+
+    JitStateInfo GetJitStateInfo() const { return jsi; }
+
+private:
+    RunCodeCallbacks cb;
+    JitStateInfo jsi;
+
+    bool prelude_complete = false;
+    CodePtr near_code_begin;
+    CodePtr far_code_begin;
+
+    ConstantPool constant_pool;
+
+    bool in_far_code = false;
+    CodePtr near_code_ptr;
+    CodePtr far_code_ptr;
+
+    using RunCodeFuncType = void(*)(void*, CodePtr);
+    RunCodeFuncType run_code = nullptr;
+    RunCodeFuncType step_code = nullptr;
+    static constexpr size_t FPSCR_ALREADY_EXITED = 1 << 0;
+    static constexpr size_t FORCE_RETURN = 1 << 1;
+    std::array<const void*, 4> return_from_run_code;
+    void GenRunCode();
+
+    //Xbyak::util::Cpu cpu_info;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/block_range_information.cpp
+++ b/src/dynarmic/backend/A64/block_range_information.cpp
@ -0,0 +1,45 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <unordered_set>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+
+#include "backend/A64/block_range_information.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+template <typename ProgramCounterType>
+void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
+    block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
+}
+
+template <typename ProgramCounterType>
+void BlockRangeInformation<ProgramCounterType>::ClearCache() {
+    block_ranges.clear();
+}
+
+template <typename ProgramCounterType>
+std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
+    std::unordered_set<IR::LocationDescriptor> erase_locations;
+    for (auto invalidate_interval : ranges) {
+        auto pair = block_ranges.equal_range(invalidate_interval);
+        for (auto it = pair.first; it != pair.second; ++it) {
+            for (const auto &descriptor : it->second) {
+                erase_locations.insert(descriptor);
+            }
+        }
+    }
+    // TODO: EFFICIENCY: Remove ranges that are to be erased.
+    return erase_locations;
+}
+
+template class BlockRangeInformation<u32>;
+template class BlockRangeInformation<u64>;
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/block_range_information.h
+++ b/src/dynarmic/backend/A64/block_range_information.h
@ -0,0 +1,29 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <unordered_set>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+
+#include "frontend/ir/location_descriptor.h"
+
+namespace Dynarmic::BackendA64 {
+
+template <typename ProgramCounterType>
+class BlockRangeInformation {
+public:
+    void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
+    void ClearCache();
+    std::unordered_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
+
+private:
+    boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/callback.cpp
+++ b/src/dynarmic/backend/A64/callback.cpp
@ -0,0 +1,41 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "backend/A64/callback.h"
+#include "backend/A64/block_of_code.h"
+
+namespace Dynarmic::BackendA64 {
+
+Callback::~Callback() = default;
+
+void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
+    l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
+    code.QuickCallFunction(fn);
+}
+
+void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
+    l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
+    code.QuickCallFunction(fn);
+}
+
+void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
+    l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
+    code.MOVI2R(code.ABI_PARAM1, arg);
+    code.QuickCallFunction(fn);
+}
+
+void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
+#if defined(WIN32) && !defined(__MINGW64__)
+    l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
+    code.MOVI2R(code.ABI_PARAM1, arg);
+#else
+    l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
+    code.MOVI2R(code.ABI_PARAM2, arg);
+#endif
+    code.QuickCallFunction(fn);
+}
+
+} // namespace Dynarmic::BackendX64
--- a/src/dynarmic/backend/A64/callback.h
+++ b/src/dynarmic/backend/A64/callback.h
@ -0,0 +1,54 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <functional>
+#include <vector>
+
+#include "backend/A64/emitter/a64_emitter.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+using RegList = std::vector<Arm64Gen::ARM64Reg>;
+
+class BlockOfCode;
+
+class Callback {
+public:
+    virtual ~Callback();
+
+    virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0;
+    virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0;
+};
+
+class SimpleCallback final : public Callback {
+public:
+    template <typename Function>
+    SimpleCallback(Function fn) : fn(reinterpret_cast<void (*)()>(fn)) {}
+
+    void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
+    void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
+
+private:
+    void (*fn)();
+};
+
+class ArgCallback final : public Callback {
+public:
+    template <typename Function>
+    ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
+
+    void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
+    void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
+
+private:
+    void (*fn)();
+    u64 arg;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/constant_pool.cpp
+++ b/src/dynarmic/backend/A64/constant_pool.cpp
@ -0,0 +1,65 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <cstring>
+
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/constant_pool.h"
+#include "common/assert.h"
+
+namespace Dynarmic::BackendA64 {
+
+ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {}
+
+void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
+    const auto constant = std::make_tuple(lower, upper);
+    auto iter = constant_info.find(constant);
+    if (iter == constant_info.end()) {
+        struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
+        patch_info.emplace_back(p);
+        code.BRK(0);
+        return;
+    }
+
+    const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
+
+    if (!(offset >= -0x40000 && offset <= 0x3FFFF)) {
+        constant_info.erase(constant);
+        struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
+        patch_info.emplace_back(p);
+        code.BRK(0x42);
+        return;
+    }
+    DEBUG_ASSERT((offset & 3) == 0);
+    code.LDR(Rt, offset / 4);
+}
+
+void ConstantPool::PatchPool() {
+    u8* pool_ptr = code.GetWritableCodePtr();
+    for (PatchInfo patch : patch_info) {
+        auto iter = constant_info.find(patch.constant);
+        if (iter == constant_info.end()) {
+            std::memcpy(pool_ptr, &std::get<0>(patch.constant), sizeof(u64));
+            std::memcpy(pool_ptr + sizeof(u64), &std::get<1>(patch.constant), sizeof(u64));
+            iter = constant_info.emplace(patch.constant, pool_ptr).first;
+            pool_ptr += align_size;
+        }
+        code.SetCodePtr(patch.ptr);
+
+        const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
+        DEBUG_ASSERT((offset & 3) == 0);
+        code.LDR(patch.Rt, offset / 4);
+    }
+    patch_info.clear();
+    code.SetCodePtr(pool_ptr);
+}
+
+void  ConstantPool::Clear() {
+    constant_info.clear();
+    patch_info.clear();
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/constant_pool.h
+++ b/src/dynarmic/backend/A64/constant_pool.h
@ -0,0 +1,47 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <map>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+class BlockOfCode;
+
+/// ConstantPool allocates a block of memory from BlockOfCode.
+/// It places constants into this block of memory, returning the address
+/// of the memory location where the constant is placed. If the constant
+/// already exists, its memory location is reused.
+class ConstantPool final {
+public:
+    ConstantPool(BlockOfCode& code);
+
+    void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
+
+    void PatchPool();
+
+    void Clear();
+
+private:
+    static constexpr size_t align_size = 16; // bytes
+
+    std::map<std::tuple<u64, u64>, void*> constant_info;
+
+    BlockOfCode& code;
+
+    struct PatchInfo {
+        const void* ptr;
+        Arm64Gen::ARM64Reg Rt;
+        std::tuple<u64, u64> constant;
+    };
+
+    std::vector<PatchInfo> patch_info;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/devirtualize.h
+++ b/src/dynarmic/backend/A64/devirtualize.h
@ -0,0 +1,77 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstring>
+#include <memory>
+
+#include <mp/traits/function_info.h>
+
+#include "backend/A64/callback.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace impl {
+
+template <typename FunctionType, FunctionType mfp>
+struct ThunkBuilder;
+
+template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
+struct ThunkBuilder<R(C::*)(Args...), mfp> {
+    static R Thunk(C* this_, Args... args) {
+        return (this_->*mfp)(std::forward<Args>(args)...);
+    }
+};
+
+} // namespace impl
+
+template<auto mfp>
+ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
+    return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
+}
+
+template<auto mfp>
+ArgCallback DevirtualizeWindows(mp::class_type<decltype(mfp)>* this_) {
+    static_assert(sizeof(mfp) == 8);
+    return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
+}
+
+template<auto mfp>
+ArgCallback DevirtualizeAarch64(mp::class_type<decltype(mfp)>* this_) {
+    struct MemberFunctionPointer {
+        /// For a non-virtual function, this is a simple function pointer.
+        /// For a virtual function, it is virtual table offset in bytes.
+        u64 ptr;
+        /// Twice the required adjustment to `this`, plus 1 if the member function is virtual.
+        u64 adj;
+    } mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
+
+    static_assert(sizeof(MemberFunctionPointer) == 16);
+    static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
+
+    u64 fn_ptr = mfp_struct.ptr;
+    u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj / 2;
+    if (mfp_struct.adj & 1) {
+        u64 vtable = Common::BitCastPointee<u64>(this_ptr);
+        fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr);
+    }
+    return ArgCallback{fn_ptr, this_ptr};
+}
+
+template<auto mfp>
+ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
+#if defined(linux) || defined(__linux) || defined(__linux__)
+    return DevirtualizeAarch64<mfp>(this_);
+#else
+    return DevirtualizeGeneric<mfp>(this_);
+#endif
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/emit_a64.cpp
+++ b/src/dynarmic/backend/A64/emit_a64.cpp
@ -0,0 +1,286 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/emit_a64.h"
+#include "backend/A64/hostloc.h"
+#include "backend/A64/perf_map.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/scope_exit.h"
+#include "common/variant_util.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
+// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
+
+namespace Dynarmic::BackendA64 {
+
+EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
+    : reg_alloc(reg_alloc), block(block) {}
+
+void EmitContext::EraseInstruction(IR::Inst* inst) {
+    block.Instructions().erase(inst);
+    inst->ClearArgs();
+}
+
+EmitA64::EmitA64(BlockOfCode& code)
+    : code(code) {}
+
+EmitA64::~EmitA64() = default;
+
+std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
+    auto iter = block_descriptors.find(descriptor);
+    if (iter == block_descriptors.end())
+        return std::nullopt;
+    return iter->second;
+}
+
+void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
+}
+
+void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
+    code.BRK(0);
+}
+
+void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    if (!args[0].IsImmediate()) {
+        ctx.reg_alloc.DefineValue(inst, args[0]);
+    }
+}
+
+void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
+    auto iter = block_descriptors.find(target);
+    CodePtr target_code_ptr = iter != block_descriptors.end()
+                            ? iter->second.entrypoint
+                            : code.GetReturnFromRunCodeAddress();
+
+    code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
+
+    code.MOVI2R(loc_desc_reg, target.Value());
+
+    patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
+    EmitPatchMovX0(target_code_ptr);
+
+    code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
+    code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
+    code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
+
+    code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
+    code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
+    code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);        
+}
+
+void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    ASSERT(args[0].IsImmediate());
+    u64 unique_hash_of_target = args[0].GetImmediateU64();
+
+    ctx.reg_alloc.ScratchGpr({HostLoc::X0});
+    Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
+    Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
+
+    PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
+}
+
+void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_FALSE("should never happen");
+}
+
+void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_FALSE("should never happen");
+}
+
+void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_FALSE("should never happen");
+}
+
+void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_FALSE("should never happen");
+}
+
+void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_FALSE("should never happen");
+}
+
+void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
+    Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
+    code.CMP(value, ZR);
+    code.MRS(nzcv, FIELD_NZCV);
+    ctx.reg_alloc.DefineValue(inst, nzcv);
+}
+
+void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (args[0].IsImmediate()) {
+        Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
+        u32 value = 0;
+        value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
+        value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
+        value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
+        value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
+        code.MOVI2R(nzcv, value);
+        ctx.reg_alloc.DefineValue(inst, nzcv);
+    } else {
+        Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
+        Arm64Gen::ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
+        // TODO: Optimize
+        code.LSR(nzcv, nzcv, 28);
+        code.MOVI2R(scratch, 0b00010000'10000001);
+        code.MUL(nzcv, nzcv, scratch);
+        code.ANDI2R(nzcv, nzcv, 1, scratch);
+        ctx.reg_alloc.DefineValue(inst, nzcv);
+    }
+}
+
+void EmitA64::EmitAddCycles(size_t cycles) {
+    ASSERT(cycles < std::numeric_limits<u32>::max());
+    code.SUBI2R(X26, X26, static_cast<u32>(cycles));
+}
+
+FixupBranch EmitA64::EmitCond(IR::Cond cond) {
+    FixupBranch label;
+
+    const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
+    code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
+    code._MSR(FIELD_NZCV, cpsr);
+
+    switch (cond) {
+    case IR::Cond::EQ: //z
+        label = code.B(CC_EQ);
+        break;
+    case IR::Cond::NE: //!z
+        label = code.B(CC_NEQ);
+        break;
+    case IR::Cond::CS: //c
+        label = code.B(CC_CS);
+        break;
+    case IR::Cond::CC: //!c
+        label = code.B(CC_CC);
+        break;
+    case IR::Cond::MI: //n
+        label = code.B(CC_MI);
+        break;
+    case IR::Cond::PL: //!n
+        label = code.B(CC_PL);
+        break;
+    case IR::Cond::VS: //v
+        label = code.B(CC_VS);
+        break;
+    case IR::Cond::VC: //!v
+        label = code.B(CC_VC);
+        break;
+    case IR::Cond::HI:  //c & !z
+        label = code.B(CC_HI);
+        break;
+    case IR::Cond::LS:  //!c | z
+        label = code.B(CC_LS);
+        break;
+    case IR::Cond::GE:  // n == v
+        label = code.B(CC_GE);
+        break;
+    case IR::Cond::LT:  // n != v
+        label = code.B(CC_LT);
+        break;
+    case IR::Cond::GT:  // !z & (n == v)
+        label = code.B(CC_GT);
+        break;
+    case IR::Cond::LE:  // z | (n != v)
+        label = code.B(CC_LE);
+        break;    
+    default:
+        ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
+        break;
+    }
+
+    return label;
+}
+
+EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
+    PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
+    Patch(descriptor, entrypoint);
+    BlockDescriptor block_desc{entrypoint, size};
+
+    block_descriptors.emplace(descriptor.Value(), block_desc);
+    return block_desc;
+}
+
+void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
+    Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
+        using T = std::decay_t<decltype(x)>;
+        if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
+            this->EmitTerminalImpl(x, initial_location, is_single_step);
+        } else {
+            ASSERT_MSG(false, "Invalid terminal");
+        }
+    });
+}
+
+void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
+    const CodePtr save_code_ptr = code.GetCodePtr();
+    const PatchInformation& patch_info = patch_information[desc];
+
+    for (CodePtr location : patch_info.jg) {
+        code.SetCodePtr(location);
+        EmitPatchJg(desc, bb);
+        code.FlushIcache();
+    }
+
+    for (CodePtr location : patch_info.jmp) {
+        code.SetCodePtr(location);
+        EmitPatchJmp(desc, bb);
+        code.FlushIcache();
+    }
+
+    for (CodePtr location : patch_info.mov_x0) {
+        code.SetCodePtr(location);
+        EmitPatchMovX0(bb);
+        code.FlushIcache();
+    }
+
+    code.SetCodePtr(save_code_ptr);
+}
+
+void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
+    Patch(desc, nullptr);
+}
+
+void EmitA64::ClearCache() {
+    block_descriptors.clear();
+    patch_information.clear();
+
+    PerfMapClear();
+}
+
+void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
+    code.EnableWriting();
+    SCOPE_EXIT { code.DisableWriting(); };
+
+    for (const auto &descriptor : locations) {
+        auto it = block_descriptors.find(descriptor);
+        if (it == block_descriptors.end()) {
+            continue;
+        }
+
+        if (patch_information.count(descriptor)) {
+            Unpatch(descriptor);
+        }
+        block_descriptors.erase(it);
+    }
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/emit_a64.h
+++ b/src/dynarmic/backend/A64/emit_a64.h
@ -0,0 +1,124 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "backend/A64/reg_alloc.h"
+#include "backend/A64/emitter/a64_emitter.h"
+#include "common/bit_util.h"
+#include "common/fp/rounding_mode.h"
+#include "frontend/ir/location_descriptor.h"
+#include "frontend/ir/terminal.h"
+
+namespace Dynarmic::IR {
+class Block;
+class Inst;
+} // namespace Dynarmic::IR
+
+namespace Dynarmic::BackendA64 {
+
+class BlockOfCode;
+
+using namespace Arm64Gen;
+
+using A64FullVectorWidth = std::integral_constant<size_t, 128>;
+
+// Array alias that always sizes itself according to the given type T
+// relative to the size of a vector register. e.g. T = u32 would result
+// in a std::array<u32, 4>.
+template <typename T>
+using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
+
+struct EmitContext {
+    EmitContext(RegAlloc& reg_alloc, IR::Block& block);
+
+    void EraseInstruction(IR::Inst* inst);
+
+    virtual FP::RoundingMode FPSCR_RMode() const = 0;
+    virtual u32 FPCR() const = 0;
+    virtual bool FPSCR_FTZ() const = 0;
+    virtual bool FPSCR_DN() const = 0;
+    virtual bool AccurateNaN() const { return true; }
+
+    RegAlloc& reg_alloc;
+    IR::Block& block;
+};
+
+class EmitA64 {
+public:
+    struct BlockDescriptor {
+        CodePtr entrypoint;  // Entrypoint of emitted code
+        size_t size;         // Length in bytes of emitted code
+    };
+
+    EmitA64(BlockOfCode& code);
+    virtual ~EmitA64();
+
+    /// Looks up an emitted host block in the cache.
+    std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
+
+    /// Empties the entire cache.
+    virtual void ClearCache();
+
+    /// Invalidates a selection of basic blocks.
+    void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
+
+protected:
+    // Microinstruction emitters
+#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
+#define A32OPC(...)
+#define A64OPC(...)
+#include "backend/A64/opcodes.inc"
+#undef OPCODE
+#undef A32OPC
+#undef A64OPC
+
+    // Helpers
+    virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
+    void EmitAddCycles(size_t cycles);
+    FixupBranch EmitCond(IR::Cond cond);
+    BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
+    void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
+
+    // Terminal instruction emitters
+    void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
+    virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+
+    // Patching
+    struct PatchInformation {
+        std::vector<CodePtr> jg;
+        std::vector<CodePtr> jmp;
+        std::vector<CodePtr> mov_x0;
+    };
+    void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
+    virtual void Unpatch(const IR::LocationDescriptor& target_desc);
+    virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
+    virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
+    virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
+
+    // State
+    BlockOfCode& code;
+    std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
+    std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
+};
+
+} // namespace Dynarmic::BackendX64
--- a/src/dynarmic/backend/A64/emit_a64_data_processing.cpp
+++ b/src/dynarmic/backend/A64/emit_a64_data_processing.cpp
--- a/src/dynarmic/backend/A64/emit_a64_floating_point.cpp
+++ b/src/dynarmic/backend/A64/emit_a64_floating_point.cpp
@ -0,0 +1,471 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <optional>
+#include <type_traits>
+#include <utility>
+
+#include "backend/A64/abi.h"
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/emit_a64.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/op.h"
+#include "common/fp/rounding_mode.h"
+#include "common/fp/util.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace {
+
+Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rounding_mode) {
+    switch (rounding_mode) {
+    case FP::RoundingMode::ToNearest_TieEven:
+        return RoundingMode::ROUND_N;
+    case FP::RoundingMode::TowardsPlusInfinity:
+        return RoundingMode::ROUND_P;
+    case FP::RoundingMode::TowardsMinusInfinity:
+        return RoundingMode::ROUND_M;
+    case FP::RoundingMode::TowardsZero:
+        return RoundingMode::ROUND_Z;
+    case FP::RoundingMode::ToNearest_TieAwayFromZero:
+        return RoundingMode::ROUND_A;
+    default:
+        UNREACHABLE();
+    }
+}
+
+template <size_t fsize, typename Function>
+void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
+    result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
+    if constexpr (std::is_member_function_pointer_v<Function>) {
+        (code.fp_emitter.*fn)(result, result);
+    } else {
+        fn(result);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+template <size_t fsize, typename Function>
+void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
+    ARM64Reg operand = ctx.reg_alloc.UseScratchFpr(args[1]);
+    result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
+    operand = fsize == 32 ? EncodeRegToSingle(operand) : EncodeRegToDouble(operand);
+
+    if constexpr (std::is_member_function_pointer_v<Function>) {
+        (code.fp_emitter.*fn)(result, result, operand);
+    }
+    else {
+        fn(result, result, operand);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+} // anonymous namespace
+
+//void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
+//    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+//    const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
+//
+//    code.pand(result, code.MConst(xword, f16_non_sign_mask));
+//
+//    ctx.reg_alloc.DefineValue(inst, result);
+//}
+
+void EmitA64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
+
+    code.fp_emitter.FABS(result, result);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+
+    code.fp_emitter.FABS(result, result);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+//void EmitA64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
+//    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+//    const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
+//
+//    code.pxor(result, code.MConst(xword, f16_negative_zero));
+//
+//    ctx.reg_alloc.DefineValue(inst, result);
+//}
+
+void EmitA64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
+
+    code.fp_emitter.FNEG(result, result);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+
+    code.fp_emitter.FNEG(result, result);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
+}
+
+void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
+}
+
+void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
+}
+
+void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
+}
+
+void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
+}
+
+void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
+}
+void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
+    FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
+}
+
+void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) {
+    FPTwoOp<64>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
+}
+
+void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
+}
+
+void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
+}
+
+static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
+    ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
+    // Fpsr's nzcv is copied across integer nzcv 
+    code.MRS(nzcv, FIELD_NZCV);
+    return nzcv;
+}
+
+void EmitA64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    ARM64Reg reg_a = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[0]));
+    ARM64Reg reg_b = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[1]));
+    bool exc_on_qnan = args[2].GetImmediateU1();
+
+    if (exc_on_qnan) {
+        code.fp_emitter.FCMPE(reg_a, reg_b);
+    } else {
+        code.fp_emitter.FCMP(reg_a, reg_b);
+    }
+
+    ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
+    ctx.reg_alloc.DefineValue(inst, nzcv);
+}
+
+void EmitA64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg reg_a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[0]));
+    const ARM64Reg reg_b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+    bool exc_on_qnan = args[2].GetImmediateU1();
+
+    if (exc_on_qnan) {
+        code.fp_emitter.FCMPE(reg_a, reg_b);
+    } else {
+        code.fp_emitter.FCMP(reg_a, reg_b);
+    }
+
+    ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
+    ctx.reg_alloc.DefineValue(inst, nzcv);
+}
+
+void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
+
+    code.fp_emitter.FCVT(64, 16, result, result);
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
+    code.fp_emitter.FCVT(32, 16, result, result);
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
+
+    code.fp_emitter.FCVT(64, 32, result, result);
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
+    code.fp_emitter.FCVT(16, 32, result, result);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    code.fp_emitter.FCVT(16, 64, result, result);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    code.fp_emitter.FCVT(32, 64, result, result);
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+template<size_t fsize, bool unsigned_, size_t isize>
+static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const size_t fbits = args[1].GetImmediateU8();
+    const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    const auto round_imm = ConvertRoundingModeToA64RoundingMode(rounding_mode);
+
+    ASSERT_MSG(fbits == 0, "fixed point conversions are not supported yet");
+
+    ARM64Reg src = ctx.reg_alloc.UseScratchFpr(args[0]);
+    ARM64Reg result = ctx.reg_alloc.ScratchGpr();
+    src = fsize == 64 ? EncodeRegToDouble(src) : EncodeRegToSingle(src);
+    result = isize == 64 ? result : DecodeReg(result);
+
+    if constexpr (unsigned_) {
+        code.fp_emitter.FCVTU(result, src, round_imm);
+    }
+    else {
+        code.fp_emitter.FCVTS(result, src, round_imm);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+
+}
+
+void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<64, false, 32>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<64, false, 64>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<64, true, 32>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<64, true, 64>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<32, false, 32>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<32, false, 64>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<32, true, 32>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<32, true, 64>(code, ctx, inst);
+}
+
+void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.SCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.SCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    
+    const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.UCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.UCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.SCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.SCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.SCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.SCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.SCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.SCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.UCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.UCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+
+    const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
+    const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.UCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.UCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+
+    const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
+    const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
+    const size_t fbits = args[1].GetImmediateU8();
+    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+    ASSERT(rounding_mode == ctx.FPSCR_RMode());
+
+    if (fbits != 0) {
+        code.fp_emitter.UCVTF(result, from, fbits);
+    }
+    else {
+        code.fp_emitter.UCVTF(result, from);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/emit_a64_packed.cpp
+++ b/src/dynarmic/backend/A64/emit_a64_packed.cpp
@ -0,0 +1,469 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/emit_a64.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+namespace Dynarmic::BackendA64 {
+
+void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.ADD(B, sum, sum, b);
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.CMHI(B, ge, b, sum);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, sum);
+}
+
+void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.SQADD(B, ge, a, b);
+        code.fp_emitter.CMGE_zero(B, ge, ge);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.fp_emitter.ADD(B, a, a, b);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.ADD(H, sum, sum, b);
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.CMHI(H, ge, b, sum);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, sum);
+}
+
+void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.SQADD(H, ge, a, b);
+        code.fp_emitter.CMGE_zero(H, ge, ge);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.fp_emitter.ADD(H, a, a, b);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.CMHS(B, ge, a, b);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.fp_emitter.SUB(B, a, a, b);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.SQSUB(B, ge, a, b);
+        code.fp_emitter.CMGE_zero(B, ge, ge);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.fp_emitter.SUB(B, a, a, b);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.CMHS(H, ge, a, b);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.fp_emitter.SUB(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if (ge_inst) {
+        const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
+
+        code.fp_emitter.SQSUB(H, ge, a, b);
+        code.fp_emitter.CMGE_zero(H, ge, ge);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.fp_emitter.SUB(H, a, a, b);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UHADD(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UHADD(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SHADD(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SHADD(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UHSUB(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SHSUB(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UHSUB(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SHSUB(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const ARM64Reg reg_a_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
+    const ARM64Reg reg_b_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[1]));
+    const ARM64Reg reg_a_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
+    const ARM64Reg reg_b_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
+    ARM64Reg reg_sum, reg_diff;
+
+    if (is_signed) {
+        code.SXTH(reg_a_lo, reg_a_hi);
+        code.SXTH(reg_b_lo, reg_b_hi);
+        code.ASR(reg_a_hi, reg_a_hi, 16);
+        code.ASR(reg_b_hi, reg_b_hi, 16);
+    } else {
+        code.UXTH(reg_a_lo, reg_a_hi);
+        code.UXTH(reg_b_lo, reg_b_hi);
+        code.LSR(reg_a_hi, reg_a_hi, 16);
+        code.LSR(reg_b_hi, reg_b_hi, 16);
+    }
+
+    if (hi_is_sum) {
+        code.SUB(reg_a_lo, reg_a_lo, reg_b_hi);
+        code.ADD(reg_a_hi, reg_a_hi, reg_b_lo);
+        reg_diff = reg_a_lo;
+        reg_sum = reg_a_hi;
+    } else {
+        code.ADD(reg_a_lo, reg_a_lo, reg_b_hi);
+        code.SUB(reg_a_hi, reg_a_hi, reg_b_lo);
+        reg_diff = reg_a_hi;
+        reg_sum = reg_a_lo;
+    }
+
+    if (ge_inst) {
+        // The reg_b registers are no longer required.
+        const ARM64Reg ge_sum = reg_b_hi;
+        const ARM64Reg ge_diff = reg_b_lo;
+
+        if (!is_signed) {
+            code.LSL(ge_sum, reg_sum, 15);
+            code.ASR(ge_sum, ge_sum, 31);
+        } else {
+            code.MVN(ge_sum, reg_sum);
+            code.ASR(ge_sum, ge_sum, 31);
+        }
+        code.MVN(ge_diff, reg_diff);
+        code.ASR(ge_diff, ge_diff, 31);
+        code.ANDI2R(ge_sum, ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
+        code.ANDI2R(ge_diff, ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
+        code.ORR(ge_sum, ge_sum, ge_diff);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    if (is_halving) {
+        code.LSR(reg_a_hi, reg_a_hi, 1);
+        code.LSR(reg_a_lo, reg_a_lo, 1);
+    }
+
+    // reg_a_lo now contains the low word and reg_a_hi now contains the high word.
+    // Merge them.
+    code.BFM(reg_a_lo, reg_a_hi, 16, 15);
+
+    ctx.reg_alloc.DefineValue(inst, reg_a_lo);
+}
+
+void EmitA64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, false, false);
+}
+
+void EmitA64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, true, false);
+}
+
+void EmitA64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, false, false);
+}
+
+void EmitA64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, true, false);
+}
+
+void EmitA64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, false, true);
+}
+
+void EmitA64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, true, true);
+}
+
+void EmitA64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, false, true);
+}
+
+void EmitA64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, true, true);
+}
+
+void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UQADD(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SQADD(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UQSUB(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SQSUB(B, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UQADD(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SQADD(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UQSUB(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.SQSUB(H, a, a, b);
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    code.fp_emitter.UABD(B, a, a, b);
+    code.fp_emitter.UADDLV(B, a, a);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitA64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+    const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[2]));
+
+    code.fp_emitter.BSL(ge, b, a);
+
+    ctx.reg_alloc.DefineValue(inst, ge);
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/emit_a64_saturation.cpp
+++ b/src/dynarmic/backend/A64/emit_a64_saturation.cpp
@ -0,0 +1,167 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <limits>
+
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/emit_a64.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace {
+
+enum class Op {
+    Add,
+    Sub,
+};
+
+template<Op op, size_t size>
+void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
+    ARM64Reg addend = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
+
+    if constexpr (op == Op::Add) {
+        code.fp_emitter.SQADD(size, result, result, addend);
+    }
+    else {
+        code.fp_emitter.SQSUB(size, result, result, addend);
+    }
+
+    if (overflow_inst) {
+        ARM64Reg overflow = ctx.reg_alloc.ScratchGpr();
+
+        code.MRS(overflow, FIELD_FPSR);
+        code.UBFX(overflow, overflow, 27, 1);
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+} // anonymous namespace
+
+void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
+}
+
+void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const size_t N = args[1].GetImmediateU8();
+    ASSERT(N >= 1 && N <= 32);
+
+    if (N == 32) {
+        if (overflow_inst) {
+            const auto no_overflow = IR::Value(false);
+            overflow_inst->ReplaceUsesWith(no_overflow);
+        }
+        ctx.reg_alloc.DefineValue(inst, args[0]);
+        return;
+    }
+
+    const u32 mask = (1u << N) - 1;
+    const u32 positive_saturated_value = (1u << (N - 1)) - 1;
+    const u32 negative_saturated_value = 1u << (N - 1);
+    const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
+
+    const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
+    const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
+    const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
+    const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
+
+    // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
+    code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
+
+    // Put the appropriate saturated value in result
+    code.MOVI2R(tmp, positive_saturated_value);
+    code.CMP(reg_a, tmp);
+    code.MOVI2R(result, sext_negative_satured_value);
+    code.CSEL(result, tmp, result, CC_GT);
+
+    // Do the saturation
+    code.CMPI2R(overflow, mask, tmp);
+    code.CSEL(result, reg_a, result, CC_LS);
+
+    if (overflow_inst) {
+        code.CSET(overflow, CC_HI);
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const size_t N = args[1].GetImmediateU8();
+    ASSERT(N <= 31);
+
+    const u32 saturated_value = (1u << N) - 1;
+
+    const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
+    const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
+    const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
+
+    // Pseudocode: result = clamp(reg_a, 0, saturated_value);
+    code.MOVI2R(result, saturated_value);
+    code.CMP(reg_a, result);
+    code.CSEL(result, WZR, result, CC_LE);
+    code.CSEL(result, reg_a, result, CC_LS);
+
+    if (overflow_inst) {
+        code.CSET(overflow, CC_HI);
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/emitter/a64_emitter.cpp
+++ b/src/dynarmic/backend/A64/emitter/a64_emitter.cpp
--- a/src/dynarmic/backend/A64/emitter/a64_emitter.h
+++ b/src/dynarmic/backend/A64/emitter/a64_emitter.h
--- a/src/dynarmic/backend/A64/emitter/arm_common.h
+++ b/src/dynarmic/backend/A64/emitter/arm_common.h
@ -0,0 +1,28 @@
+// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+enum CCFlags {
+    CC_EQ = 0,     // Equal
+    CC_NEQ,        // Not equal
+    CC_CS,         // Carry Set
+    CC_CC,         // Carry Clear
+    CC_MI,         // Minus (Negative)
+    CC_PL,         // Plus
+    CC_VS,         // Overflow
+    CC_VC,         // No Overflow
+    CC_HI,         // Unsigned higher
+    CC_LS,         // Unsigned lower or same
+    CC_GE,         // Signed greater than or equal
+    CC_LT,         // Signed less than
+    CC_GT,         // Signed greater than
+    CC_LE,         // Signed less than or equal
+    CC_AL,         // Always (unconditional) 14
+    CC_HS = CC_CS, // Alias of CC_CS  Unsigned higher or same
+    CC_LO = CC_CC, // Alias of CC_CC  Unsigned lower
+};
+const u32 NO_COND = 0xE0000000;
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/emitter/code_block.h
+++ b/src/dynarmic/backend/A64/emitter/code_block.h
@ -0,0 +1,139 @@
+// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#endif
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+// Everything that needs to generate code should inherit from this.
+// You get memory management for free, plus, you can use all emitter functions
+// without having to prefix them with gen-> or something similar. Example
+// implementation: class JIT : public CodeBlock<ARMXEmitter> {}
+template <class T>
+class CodeBlock : public T {
+private:
+    // A privately used function to set the executable RAM space to something
+    // invalid. For debugging usefulness it should be used to set the RAM to a
+    // host specific breakpoint instruction
+    virtual void PoisonMemory() = 0;
+
+protected:
+    u8* region = nullptr;
+    // Size of region we can use.
+    size_t region_size = 0;
+    // Original size of the region we allocated.
+    size_t total_region_size = 0;
+
+    bool m_is_child = false;
+    std::vector<CodeBlock*> m_children;
+
+public:
+    CodeBlock() = default;
+    virtual ~CodeBlock() {
+        if (region)
+            FreeCodeSpace();
+    }
+    CodeBlock(const CodeBlock&) = delete;
+    CodeBlock& operator=(const CodeBlock&) = delete;
+    CodeBlock(CodeBlock&&) = delete;
+    CodeBlock& operator=(CodeBlock&&) = delete;
+
+    // Call this before you generate any code.
+    void AllocCodeSpace(size_t size) {
+        region_size = size;
+        total_region_size = size;
+#if defined(_WIN32)
+        void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+#else
+#if defined(__APPLE__)
+        void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
+#else
+        void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
+#endif
+
+        if (ptr == MAP_FAILED)
+            ptr = nullptr;
+#endif
+        ASSERT_MSG(ptr != nullptr, "Failed to allocate executable memory");
+        region = static_cast<u8*>(ptr);
+        T::SetCodePtr(region);
+    }
+
+    // Always clear code space with breakpoints, so that if someone accidentally
+    // executes uninitialized, it just breaks into the debugger.
+    void ClearCodeSpace() {
+        PoisonMemory();
+        ResetCodePtr();
+    }
+
+    // Call this when shutting down. Don't rely on the destructor, even though
+    // it'll do the job.
+    void FreeCodeSpace() {
+        ASSERT(!m_is_child);
+        ASSERT(munmap(region, total_region_size) == 0);
+        region = nullptr;
+        region_size = 0;
+        total_region_size = 0;
+        for (CodeBlock* child : m_children) {
+            child->region = nullptr;
+            child->region_size = 0;
+            child->total_region_size = 0;
+        }
+    }
+
+    bool IsInSpace(const u8* ptr) const {
+        return ptr >= region && ptr < (region + region_size);
+    }
+    // Cannot currently be undone. Will write protect the entire code region.
+    // Start over if you need to change the code (call FreeCodeSpace(),
+    // AllocCodeSpace()).
+    void WriteProtect() {
+        ASSERT(mprotect(region, region_size, PROT_READ | PROT_EXEC) != 0);
+    }
+    void ResetCodePtr() {
+        T::SetCodePtr(region);
+    }
+    size_t GetSpaceLeft() const {
+        ASSERT(static_cast<size_t>(T::GetCodePtr() - region) < region_size);
+        return region_size - (T::GetCodePtr() - region);
+    }
+
+    bool IsAlmostFull() const {
+        // This should be bigger than the biggest block ever.
+        return GetSpaceLeft() < 0x10000;
+    }
+
+    bool HasChildren() const {
+        return region_size != total_region_size;
+    }
+
+    u8* AllocChildCodeSpace(size_t child_size) {
+        ASSERT_MSG(child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
+        u8* child_region = region + region_size - child_size;
+        region_size -= child_size;
+        return child_region;
+    }
+
+    void AddChildCodeSpace(CodeBlock* child, size_t child_size) {
+        u8* child_region = AllocChildCodeSpace(child_size);
+        child->m_is_child = true;
+        child->region = child_region;
+        child->region_size = child_size;
+        child->total_region_size = child_size;
+        child->ResetCodePtr();
+        m_children.emplace_back(child);
+    }
+};
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/exception_handler.h
+++ b/src/dynarmic/backend/A64/exception_handler.h
@ -0,0 +1,39 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <functional>
+
+#include "backend/A64/a32_jitstate.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+class BlockOfCode;
+
+struct A64State {
+    std::array<u64, 32> X;
+    std::array<std::array<u64, 2>, 16> Q;
+};
+static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
+
+class ExceptionHandler final {
+public:
+    ExceptionHandler();
+    ~ExceptionHandler();
+
+    void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
+
+    bool SupportsFastmem() const;
+private:
+    struct Impl;
+    std::unique_ptr<Impl> impl;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/exception_handler_generic.cpp
+++ b/src/dynarmic/backend/A64/exception_handler_generic.cpp
@ -0,0 +1,25 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "backend/A64/exception_handler.h"
+
+namespace Dynarmic::BackendA64 {
+
+struct ExceptionHandler::Impl final {
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
+    // Do nothing
+}
+
+bool ExceptionHandler::SupportsFastmem() const {
+    return false;
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/exception_handler_posix.cpp
+++ b/src/dynarmic/backend/A64/exception_handler_posix.cpp
@ -0,0 +1,166 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+// Copyright 2008 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include <mutex>
+#include <vector>
+
+#include <csignal>
+#ifdef __APPLE__
+#include <sys/ucontext.h>
+#else
+#include <ucontext.h>
+#endif
+
+#include "backend/A64/a32_jitstate.h"
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/exception_handler.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace {
+
+struct CodeBlockInfo {
+    BlockOfCode* block;
+    std::function<void(CodePtr)> callback;
+};
+
+class SigHandler {
+public:
+    SigHandler();
+
+    ~SigHandler();
+
+    void AddCodeBlock(CodeBlockInfo info);
+
+    void RemoveCodeBlock(CodePtr PC);
+
+private:
+    auto FindCodeBlockInfo(CodePtr PC) {
+        return std::find_if(code_block_infos.begin(), code_block_infos.end(),
+                            [&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize() > PC; });
+    }
+
+    std::vector<CodeBlockInfo> code_block_infos;
+    std::mutex code_block_infos_mutex;
+
+    struct sigaction old_sa_segv;
+    struct sigaction old_sa_bus;
+
+    static void SigAction(int sig, siginfo_t* info, void* raw_context);
+};
+
+SigHandler sig_handler;
+
+SigHandler::SigHandler() {
+    // Method below from dolphin.
+
+    const size_t signal_stack_size = std::max<size_t>(SIGSTKSZ, 2 * 1024 * 1024);
+
+    stack_t signal_stack;
+    signal_stack.ss_sp = malloc(signal_stack_size);
+    signal_stack.ss_size = signal_stack_size;
+    signal_stack.ss_flags = 0;
+    ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
+               "dynarmic: POSIX SigHandler: init failure at sigaltstack");
+
+    struct sigaction sa;
+    sa.sa_handler = nullptr;
+    sa.sa_sigaction = &SigHandler::SigAction;
+    sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
+    sigemptyset(&sa.sa_mask);
+    sigaction(SIGSEGV, &sa, &old_sa_segv);
+}
+
+SigHandler::~SigHandler() {
+    // No cleanup required.
+}
+
+void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
+    code_block_infos.push_back(std::move(cb));
+}
+
+void SigHandler::RemoveCodeBlock(CodePtr PC) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    const auto iter = FindCodeBlockInfo(PC);
+    ASSERT(iter != code_block_infos.end());
+    code_block_infos.erase(iter);
+}
+
+void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
+    ASSERT(sig == SIGSEGV || sig == SIGBUS);
+
+    std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
+#ifdef __APPLE__
+    auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext->__ss.__pc);
+#else
+    auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
+#endif
+    const auto iter = sig_handler.FindCodeBlockInfo(PC);
+    if (iter != sig_handler.code_block_infos.end()) {
+        iter->callback(PC);
+        return;
+    }
+
+    fmt::print(
+        stderr,
+        "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
+        PC);
+
+    struct sigaction* retry_sa =
+        sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
+    if (retry_sa->sa_flags & SA_SIGINFO) {
+        retry_sa->sa_sigaction(sig, info, raw_context);
+        return;
+    }
+    if (retry_sa->sa_handler == SIG_DFL) {
+        signal(sig, SIG_DFL);
+        return;
+    }
+    if (retry_sa->sa_handler == SIG_IGN) {
+        return;
+    }
+    retry_sa->sa_handler(sig);
+}
+
+} // anonymous namespace
+
+struct ExceptionHandler::Impl final {
+    Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
+        code_begin = code.GetRegion();
+        sig_handler.AddCodeBlock({&code, std::move(cb)});
+    }
+
+    ~Impl() {
+        sig_handler.RemoveCodeBlock(code_begin);
+    }
+
+private:
+    CodePtr code_begin;
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
+    if (cb)
+        impl = std::make_unique<Impl>(code, std::move(cb));
+}
+
+bool ExceptionHandler::SupportsFastmem() const {
+    return static_cast<bool>(impl);
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/hostloc.cpp
+++ b/src/dynarmic/backend/A64/hostloc.cpp
@ -0,0 +1,21 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "backend/A64/hostloc.h"
+
+namespace Dynarmic::BackendA64 {
+
+Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc) {
+    ASSERT(HostLocIsGPR(loc));
+    return static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(Arm64Gen::X0) + static_cast<int>(loc));
+}
+
+Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) {
+    ASSERT(HostLocIsFPR(loc));
+    return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0)));
+}
+
+} // namespace Dynarmic::BackendX64
--- a/src/dynarmic/backend/A64/hostloc.h
+++ b/src/dynarmic/backend/A64/hostloc.h
@ -0,0 +1,176 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+#pragma once
+
+#include "backend/A64/emitter/a64_emitter.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+enum class HostLoc {
+    // Ordering of the registers is intentional. See also: HostLocToA64.
+
+    // 64bit GPR registers
+    X0,
+    X1,
+    X2,
+    X3,
+    X4,
+    X5,
+    X6,
+    X7,
+    X8,
+    X9,
+    X10,
+    X11,
+    X12,
+    X13,
+    X14,
+    X15,
+    X16,
+    X17,
+    X18,
+    X19,
+    X20,
+    X21,
+    X22,
+    X23,
+    X24,
+    X25,
+    X26,
+    X27,
+    X28,
+    X29,
+    X30,
+
+    SP, // 64bit stack pointer
+
+    // Qword FPR registers
+    Q0,
+    Q1,
+    Q2,
+    Q3,
+    Q4,
+    Q5,
+    Q6,
+    Q7,
+    Q8,
+    Q9,
+    Q10,
+    Q11,
+    Q12,
+    Q13,
+    Q14,
+    Q15,
+    Q16,
+    Q17,
+    Q18,
+    Q19,
+    Q20,
+    Q21,
+    Q22,
+    Q23,
+    Q24,
+    Q25,
+    Q26,
+    Q27,
+    Q28,
+    Q29,
+    Q30,
+    Q31,
+
+    FirstSpill,
+};
+
+constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
+
+inline bool HostLocIsGPR(HostLoc reg) {
+    return reg >= HostLoc::X0 && reg <= HostLoc::X30;
+}
+
+inline bool HostLocIsFPR(HostLoc reg) {
+    return reg >= HostLoc::Q0 && reg <= HostLoc::Q31;
+}
+
+inline bool HostLocIsRegister(HostLoc reg) {
+    return HostLocIsGPR(reg) || HostLocIsFPR(reg);
+}
+
+inline HostLoc HostLocRegIdx(int idx) {
+    ASSERT(idx >= 0 && idx <= 30);
+    return static_cast<HostLoc>(idx);
+}
+
+inline HostLoc HostLocFprIdx(int idx) {
+    ASSERT(idx >= 0 && idx <= 31);
+    return static_cast<HostLoc>(static_cast<size_t>(HostLoc::Q0) + idx);
+}
+
+inline HostLoc HostLocSpill(size_t i) {
+    return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
+}
+
+inline bool HostLocIsSpill(HostLoc reg) {
+    return reg >= HostLoc::FirstSpill;
+}
+
+inline size_t HostLocBitWidth(HostLoc loc) {
+    if (HostLocIsGPR(loc))
+        return 64;
+    if (HostLocIsFPR(loc))
+        return 128;
+    if (HostLocIsSpill(loc))
+        return 128;
+    UNREACHABLE();
+}
+
+using HostLocList = std::initializer_list<HostLoc>;
+
+// X18 may be reserved.(Windows and iOS)
+// X26 holds the cycle counter
+// X27 contains an emulated memory relate pointer
+// X28 used for holding the JitState.
+// X30 is the link register.
+// In order of desireablity based first on ABI
+constexpr HostLocList any_gpr = {
+    HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
+    HostLoc::X24, HostLoc::X25,
+
+    HostLoc::X8,  HostLoc::X9,  HostLoc::X10, HostLoc::X11, HostLoc::X12,
+    HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
+
+    HostLoc::X7,  HostLoc::X6,  HostLoc::X5,  HostLoc::X4,  HostLoc::X3,
+    HostLoc::X2,  HostLoc::X1,  HostLoc::X0,
+};
+
+constexpr HostLocList any_fpr = {
+    HostLoc::Q8,  HostLoc::Q9,  HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
+    HostLoc::Q14, HostLoc::Q15,
+
+    HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
+    HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
+    HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
+
+    HostLoc::Q7,  HostLoc::Q6,  HostLoc::Q5,  HostLoc::Q4,  HostLoc::Q3,  HostLoc::Q2,
+    HostLoc::Q1,  HostLoc::Q0,
+};
+
+Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
+Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc);
+
+template <typename JitStateType>
+size_t SpillToOpArg(HostLoc loc) {
+    ASSERT(HostLocIsSpill(loc));
+
+    size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
+    ASSERT_MSG(i < JitStateType::SpillCount,
+               "Spill index greater than number of available spill locations");
+
+    return JitStateType::GetSpillLocationOffsetFromIndex(i);
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/jitstate_info.h
+++ b/src/dynarmic/backend/A64/jitstate_info.h
@ -0,0 +1,44 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace Dynarmic::BackendA64 {
+
+struct JitStateInfo {
+    template <typename JitStateType>
+    JitStateInfo(const JitStateType&)
+        : offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
+        , offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
+        , offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR))
+        , offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr))
+        , offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr))
+        , offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
+        , rsb_ptr_mask(JitStateType::RSBPtrMask)
+        , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
+        , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
+        , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
+        , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
+        , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
+    {}
+
+    const size_t offsetof_cycles_remaining;
+    const size_t offsetof_cycles_to_run;
+    const size_t offsetof_save_host_FPCR;
+    const size_t offsetof_guest_fpcr;
+    const size_t offsetof_guest_fpsr;
+    const size_t offsetof_rsb_ptr;
+    const size_t rsb_ptr_mask;
+    const size_t offsetof_rsb_location_descriptors;
+    const size_t offsetof_rsb_codeptrs;
+    const size_t offsetof_cpsr_nzcv;
+    const size_t offsetof_fpsr_exc;
+    const size_t offsetof_fpsr_qc;
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/opcodes.inc
+++ b/src/dynarmic/backend/A64/opcodes.inc
@ -0,0 +1,651 @@
+//     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      ...
+
+OPCODE(Void,                                                Void,                                                                           )
+OPCODE(Identity,                                            Opaque,         Opaque                                                          )
+OPCODE(Breakpoint,                                          Void,                                                                           )
+
+// A32 Context getters/setters
+A32OPC(SetCheckBit,                                         Void,           U1                                                              )
+A32OPC(GetRegister,                                         U32,            A32Reg                                                          )
+A32OPC(GetExtendedRegister32,                               U32,            A32ExtReg                                                       )
+A32OPC(GetExtendedRegister64,                               U64,            A32ExtReg                                                       )
+A32OPC(SetRegister,                                         Void,           A32Reg,         U32                                             )
+A32OPC(SetExtendedRegister32,                               Void,           A32ExtReg,      U32                                             )
+A32OPC(SetExtendedRegister64,                               Void,           A32ExtReg,      U64                                             )
+A32OPC(GetCpsr,                                             U32,                                                                            )
+A32OPC(SetCpsr,                                             Void,           U32                                                             )
+A32OPC(SetCpsrNZCVRaw,                                       Void,           U32                                                             )
+A32OPC(SetCpsrNZCV,                                         Void,           NZCV                                                            )
+A32OPC(SetCpsrNZCVQ,                                        Void,           U32                                                             )
+A32OPC(GetNFlag,                                            U1,                                                                             )
+A32OPC(SetNFlag,                                            Void,           U1                                                              )
+A32OPC(GetZFlag,                                            U1,                                                                             )
+A32OPC(SetZFlag,                                            Void,           U1                                                              )
+A32OPC(GetCFlag,                                            U1,                                                                             )
+A32OPC(SetCFlag,                                            Void,           U1                                                              )
+A32OPC(GetVFlag,                                            U1,                                                                             )
+A32OPC(SetVFlag,                                            Void,           U1                                                              )
+A32OPC(OrQFlag,                                             Void,           U1                                                              )
+A32OPC(GetGEFlags,                                          U32,                                                                            )
+A32OPC(SetGEFlags,                                          Void,           U32                                                             )
+A32OPC(SetGEFlagsCompressed,                                Void,           U32                                                             )
+A32OPC(BXWritePC,                                           Void,           U32                                                             )
+A32OPC(CallSupervisor,                                      Void,           U32                                                             )
+A32OPC(ExceptionRaised,                                     Void,           U32,            U64                                             )
+A32OPC(GetFpscr,                                            U32,                                                                            )
+A32OPC(SetFpscr,                                            Void,           U32,                                                            )
+A32OPC(GetFpscrNZCV,                                        U32,                                                                            )
+A32OPC(SetFpscrNZCV,                                        Void,           NZCV                                                            )
+
+// A64 Context getters/setters
+//A64OPC(SetCheckBit,                                         Void,           U1                                                              )
+//A64OPC(GetCFlag,                                            U1,                                                                             )
+//A64OPC(GetNZCVRaw,                                          U32,                                                                            )
+//A64OPC(SetNZCVRaw,                                          Void,           U32                                                             )
+//A64OPC(SetNZCV,                                             Void,           NZCV                                                            )
+//A64OPC(GetW,                                                U32,            A64Reg                                                          )
+//A64OPC(GetX,                                                U64,            A64Reg                                                          )
+//A64OPC(GetS,                                                U128,           A64Vec                                                          )
+//A64OPC(GetD,                                                U128,           A64Vec                                                          )
+//A64OPC(GetQ,                                                U128,           A64Vec                                                          )
+//A64OPC(GetSP,                                               U64,                                                                            )
+//A64OPC(GetFPCR,                                             U32,                                                                            )
+//A64OPC(GetFPSR,                                             U32,                                                                            )
+//A64OPC(SetW,                                                Void,           A64Reg,         U32                                             )
+//A64OPC(SetX,                                                Void,           A64Reg,         U64                                             )
+//A64OPC(SetS,                                                Void,           A64Vec,         U128                                            )
+//A64OPC(SetD,                                                Void,           A64Vec,         U128                                            )
+//A64OPC(SetQ,                                                Void,           A64Vec,         U128                                            )
+//A64OPC(SetSP,                                               Void,           U64                                                             )
+//A64OPC(SetFPCR,                                             Void,           U32                                                             )
+//A64OPC(SetFPSR,                                             Void,           U32                                                             )
+//A64OPC(OrQC,                                                Void,           U1                                                              )
+//A64OPC(SetPC,                                               Void,           U64                                                             )
+//A64OPC(CallSupervisor,                                      Void,           U32                                                             )
+//A64OPC(ExceptionRaised,                                     Void,           U64,            U64                                             )
+//A64OPC(DataCacheOperationRaised,                            Void,           U64,            U64                                             )
+//A64OPC(DataSynchronizationBarrier,                          Void,                                                                           )
+//A64OPC(DataMemoryBarrier,                                   Void,                                                                           )
+//A64OPC(InstructionSynchronizationBarrier,                   Void,                                                                           )
+//A64OPC(GetCNTFRQ,                                           U32,                                                                            )
+//A64OPC(GetCNTPCT,                                           U64,                                                                            )
+//A64OPC(GetCTR,                                              U32,                                                                            )
+//A64OPC(GetDCZID,                                            U32,                                                                            )
+//A64OPC(GetTPIDR,                                            U64,                                                                            )
+//A64OPC(GetTPIDRRO,                                          U64,                                                                            )
+//A64OPC(SetTPIDR,                                            Void,           U64                                                             )
+
+// Hints
+OPCODE(PushRSB,                                             Void,           U64                                                             )
+
+// Pseudo-operation, handled specially at final emit
+OPCODE(GetCarryFromOp,                                      U1,             Opaque                                                          )
+OPCODE(GetOverflowFromOp,                                   U1,             Opaque                                                          )
+OPCODE(GetGEFromOp,                                         U32,            Opaque                                                          )
+OPCODE(GetNZCVFromOp,                                       NZCV,           Opaque                                                          )
+OPCODE(GetUpperFromOp,                                      U128,           Opaque                                                          )
+OPCODE(GetLowerFromOp,                                      U128,           Opaque                                                          )
+
+OPCODE(NZCVFromPackedFlags,                                 NZCV,           U32                                                             )
+
+// Calculations
+OPCODE(Pack2x32To1x64,                                      U64,            U32,            U32                                             )
+//OPCODE(Pack2x64To1x128,                                     U128,           U64,            U64                                             )
+OPCODE(LeastSignificantWord,                                U32,            U64                                                             )
+OPCODE(MostSignificantWord,                                 U32,            U64                                                             )
+OPCODE(LeastSignificantHalf,                                U16,            U32                                                             )
+OPCODE(LeastSignificantByte,                                U8,             U32                                                             )
+OPCODE(MostSignificantBit,                                  U1,             U32                                                             )
+OPCODE(IsZero32,                                            U1,             U32                                                             )
+OPCODE(IsZero64,                                            U1,             U64                                                             )
+OPCODE(TestBit,                                             U1,             U64,            U8                                              )
+OPCODE(ConditionalSelect32,                                 U32,            Cond,           U32,            U32                             )
+OPCODE(ConditionalSelect64,                                 U64,            Cond,           U64,            U64                             )
+OPCODE(ConditionalSelectNZCV,                               NZCV,           Cond,           NZCV,           NZCV                            )
+OPCODE(LogicalShiftLeft32,                                  U32,            U32,            U8,             U1                              )
+OPCODE(LogicalShiftLeft64,                                  U64,            U64,            U8                                              )
+OPCODE(LogicalShiftRight32,                                 U32,            U32,            U8,             U1                              )
+OPCODE(LogicalShiftRight64,                                 U64,            U64,            U8                                              )
+OPCODE(ArithmeticShiftRight32,                              U32,            U32,            U8,             U1                              )
+//OPCODE(ArithmeticShiftRight64,                              U64,            U64,            U8                                              )
+OPCODE(RotateRight32,                                       U32,            U32,            U8,             U1                              )
+OPCODE(RotateRight64,                                       U64,            U64,            U8                                              )
+OPCODE(RotateRightExtended,                                 U32,            U32,            U1                                              )
+OPCODE(Add32,                                               U32,            U32,            U32,            U1                              )
+OPCODE(Add64,                                               U64,            U64,            U64,            U1                              )
+OPCODE(Sub32,                                               U32,            U32,            U32,            U1                              )
+OPCODE(Sub64,                                               U64,            U64,            U64,            U1                              )
+OPCODE(Mul32,                                               U32,            U32,            U32                                             )
+OPCODE(Mul64,                                               U64,            U64,            U64                                             )
+//OPCODE(SignedMultiplyHigh64,                                U64,            U64,            U64                                             )
+//OPCODE(UnsignedMultiplyHigh64,                              U64,            U64,            U64                                             )
+OPCODE(UnsignedDiv32,                                       U32,            U32,            U32                                             )
+OPCODE(UnsignedDiv64,                                       U64,            U64,            U64                                             )
+OPCODE(SignedDiv32,                                         U32,            U32,            U32                                             )
+OPCODE(SignedDiv64,                                         U64,            U64,            U64                                             )
+OPCODE(And32,                                               U32,            U32,            U32                                             )
+OPCODE(And64,                                               U64,            U64,            U64                                             )
+OPCODE(Eor32,                                               U32,            U32,            U32                                             )
+OPCODE(Eor64,                                               U64,            U64,            U64                                             )
+OPCODE(Or32,                                                U32,            U32,            U32                                             )
+OPCODE(Or64,                                                U64,            U64,            U64                                             )
+OPCODE(Not32,                                               U32,            U32                                                             )
+OPCODE(Not64,                                               U64,            U64                                                             )
+OPCODE(SignExtendByteToWord,                                U32,            U8                                                              )
+OPCODE(SignExtendHalfToWord,                                U32,            U16                                                             )
+OPCODE(SignExtendByteToLong,                                U64,            U8                                                              )
+OPCODE(SignExtendHalfToLong,                                U64,            U16                                                             )
+OPCODE(SignExtendWordToLong,                                U64,            U32                                                             )
+OPCODE(ZeroExtendByteToWord,                                U32,            U8                                                              )
+OPCODE(ZeroExtendHalfToWord,                                U32,            U16                                                             )
+OPCODE(ZeroExtendByteToLong,                                U64,            U8                                                              )
+OPCODE(ZeroExtendHalfToLong,                                U64,            U16                                                             )
+OPCODE(ZeroExtendWordToLong,                                U64,            U32                                                             )
+//OPCODE(ZeroExtendLongToQuad,                                U128,           U64                                                             )
+//OPCODE(ByteReverseDual,                                     U64,            U64                                                             )
+OPCODE(ByteReverseWord,                                     U32,            U32                                                             )
+OPCODE(ByteReverseHalf,                                     U16,            U16                                                             )
+OPCODE(CountLeadingZeros32,                                 U32,            U32                                                             )
+OPCODE(CountLeadingZeros64,                                 U64,            U64                                                             )
+//OPCODE(ExtractRegister32,                                   U32,            U32,            U32,            U8                              )
+//OPCODE(ExtractRegister64,                                   U64,            U64,            U64,            U8                              )
+//OPCODE(MaxSigned32,                                         U32,            U32,            U32                                             )
+//OPCODE(MaxSigned64,                                         U64,            U64,            U64                                             )
+//OPCODE(MaxUnsigned32,                                       U32,            U32,            U32                                             )
+//OPCODE(MaxUnsigned64,                                       U64,            U64,            U64                                             )
+//OPCODE(MinSigned32,                                         U32,            U32,            U32                                             )
+//OPCODE(MinSigned64,                                         U64,            U64,            U64                                             )
+//OPCODE(MinUnsigned32,                                       U32,            U32,            U32                                             )
+//OPCODE(MinUnsigned64,                                       U64,            U64,            U64                                             )
+
+// Saturated instructions
+OPCODE(SignedSaturatedAdd8,                                 U8,             U8,             U8                                              )
+OPCODE(SignedSaturatedAdd16,                                U16,            U16,            U16                                             )
+OPCODE(SignedSaturatedAdd32,                                U32,            U32,            U32                                             )
+OPCODE(SignedSaturatedAdd64,                                U64,            U64,            U64                                             )
+//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16,         U16,            U16,            U16                                             )
+//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32,         U32,            U32,            U32                                             )
+OPCODE(SignedSaturatedSub8,                                 U8,             U8,             U8                                              )
+OPCODE(SignedSaturatedSub16,                                U16,            U16,            U16                                             )
+OPCODE(SignedSaturatedSub32,                                U32,            U32,            U32                                             )
+OPCODE(SignedSaturatedSub64,                                U64,            U64,            U64                                             )
+OPCODE(SignedSaturation,                                    U32,            U32,            U8                                              )
+//OPCODE(UnsignedSaturatedAdd8,                               U8,             U8,             U8                                              )
+//OPCODE(UnsignedSaturatedAdd16,                              U16,            U16,            U16                                             )
+//OPCODE(UnsignedSaturatedAdd32,                              U32,            U32,            U32                                             )
+//OPCODE(UnsignedSaturatedAdd64,                              U64,            U64,            U64                                             )
+//OPCODE(UnsignedSaturatedSub8,                               U8,             U8,             U8                                              )
+//OPCODE(UnsignedSaturatedSub16,                              U16,            U16,            U16                                             )
+//OPCODE(UnsignedSaturatedSub32,                              U32,            U32,            U32                                             )
+//OPCODE(UnsignedSaturatedSub64,                              U64,            U64,            U64                                             )
+OPCODE(UnsignedSaturation,                                  U32,            U32,            U8                                              )
+
+// Packed instructions
+OPCODE(PackedAddU8,                                         U32,            U32,            U32                                             )
+OPCODE(PackedAddS8,                                         U32,            U32,            U32                                             )
+OPCODE(PackedSubU8,                                         U32,            U32,            U32                                             )
+OPCODE(PackedSubS8,                                         U32,            U32,            U32                                             )
+OPCODE(PackedAddU16,                                        U32,            U32,            U32                                             )
+OPCODE(PackedAddS16,                                        U32,            U32,            U32                                             )
+OPCODE(PackedSubU16,                                        U32,            U32,            U32                                             )
+OPCODE(PackedSubS16,                                        U32,            U32,            U32                                             )
+OPCODE(PackedAddSubU16,                                     U32,            U32,            U32                                             )
+OPCODE(PackedAddSubS16,                                     U32,            U32,            U32                                             )
+OPCODE(PackedSubAddU16,                                     U32,            U32,            U32                                             )
+OPCODE(PackedSubAddS16,                                     U32,            U32,            U32                                             )
+OPCODE(PackedHalvingAddU8,                                  U32,            U32,            U32                                             )
+OPCODE(PackedHalvingAddS8,                                  U32,            U32,            U32                                             )
+OPCODE(PackedHalvingSubU8,                                  U32,            U32,            U32                                             )
+OPCODE(PackedHalvingSubS8,                                  U32,            U32,            U32                                             )
+OPCODE(PackedHalvingAddU16,                                 U32,            U32,            U32                                             )
+OPCODE(PackedHalvingAddS16,                                 U32,            U32,            U32                                             )
+OPCODE(PackedHalvingSubU16,                                 U32,            U32,            U32                                             )
+OPCODE(PackedHalvingSubS16,                                 U32,            U32,            U32                                             )
+OPCODE(PackedHalvingAddSubU16,                              U32,            U32,            U32                                             )
+OPCODE(PackedHalvingAddSubS16,                              U32,            U32,            U32                                             )
+OPCODE(PackedHalvingSubAddU16,                              U32,            U32,            U32                                             )
+OPCODE(PackedHalvingSubAddS16,                              U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedAddU8,                                U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedAddS8,                                U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedSubU8,                                U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedSubS8,                                U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedAddU16,                               U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedAddS16,                               U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedSubU16,                               U32,            U32,            U32                                             )
+OPCODE(PackedSaturatedSubS16,                               U32,            U32,            U32                                             )
+OPCODE(PackedAbsDiffSumS8,                                  U32,            U32,            U32                                             )
+OPCODE(PackedSelect,                                        U32,            U32,            U32,            U32                             )
+
+// CRC instructions
+//OPCODE(CRC32Castagnoli8,                                    U32,            U32,            U32                                             )
+//OPCODE(CRC32Castagnoli16,                                   U32,            U32,            U32                                             )
+//OPCODE(CRC32Castagnoli32,                                   U32,            U32,            U32                                             )
+//OPCODE(CRC32Castagnoli64,                                   U32,            U32,            U64                                             )
+//OPCODE(CRC32ISO8,                                           U32,            U32,            U32                                             )
+//OPCODE(CRC32ISO16,                                          U32,            U32,            U32                                             )
+//OPCODE(CRC32ISO32,                                          U32,            U32,            U32                                             )
+//OPCODE(CRC32ISO64,                                          U32,            U32,            U64                                             )
+
+// AES instructions
+//OPCODE(AESDecryptSingleRound,                               U128,           U128                                                            )
+//OPCODE(AESEncryptSingleRound,                               U128,           U128                                                            )
+//OPCODE(AESInverseMixColumns,                                U128,           U128                                                            )
+//OPCODE(AESMixColumns,                                       U128,           U128                                                            )
+
+// SM4 instructions
+//OPCODE(SM4AccessSubstitutionBox,                            U8,             U8                                                              )
+
+// Vector instructions
+//OPCODE(VectorGetElement8,                                   U8,             U128,           U8                                              )
+//OPCODE(VectorGetElement16,                                  U16,            U128,           U8                                              )
+//OPCODE(VectorGetElement32,                                  U32,            U128,           U8                                              )
+//OPCODE(VectorGetElement64,                                  U64,            U128,           U8                                              )
+//OPCODE(VectorSetElement8,                                   U128,           U128,           U8,             U8                              )
+//OPCODE(VectorSetElement16,                                  U128,           U128,           U8,             U16                             )
+//OPCODE(VectorSetElement32,                                  U128,           U128,           U8,             U32                             )
+//OPCODE(VectorSetElement64,                                  U128,           U128,           U8,             U64                             )
+//OPCODE(VectorAbs8,                                          U128,           U128                                                            )
+//OPCODE(VectorAbs16,                                         U128,           U128                                                            )
+//OPCODE(VectorAbs32,                                         U128,           U128                                                            )
+//OPCODE(VectorAbs64,                                         U128,           U128                                                            )
+//OPCODE(VectorAdd8,                                          U128,           U128,           U128                                            )
+//OPCODE(VectorAdd16,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorAdd32,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorAdd64,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorAnd,                                           U128,           U128,           U128                                            )
+//OPCODE(VectorArithmeticShiftRight8,                         U128,           U128,           U8                                              )
+//OPCODE(VectorArithmeticShiftRight16,                        U128,           U128,           U8                                              )
+//OPCODE(VectorArithmeticShiftRight32,                        U128,           U128,           U8                                              )
+//OPCODE(VectorArithmeticShiftRight64,                        U128,           U128,           U8                                              )
+//OPCODE(VectorArithmeticVShift8,                             U128,           U128,           U128                                            )
+//OPCODE(VectorArithmeticVShift16,                            U128,           U128,           U128                                            )
+//OPCODE(VectorArithmeticVShift32,                            U128,           U128,           U128                                            )
+//OPCODE(VectorArithmeticVShift64,                            U128,           U128,           U128                                            )
+//OPCODE(VectorBroadcastLower8,                               U128,           U8                                                              )
+//OPCODE(VectorBroadcastLower16,                              U128,           U16                                                             )
+//OPCODE(VectorBroadcastLower32,                              U128,           U32                                                             )
+//OPCODE(VectorBroadcast8,                                    U128,           U8                                                              )
+//OPCODE(VectorBroadcast16,                                   U128,           U16                                                             )
+//OPCODE(VectorBroadcast32,                                   U128,           U32                                                             )
+//OPCODE(VectorBroadcast64,                                   U128,           U64                                                             )
+//OPCODE(VectorCountLeadingZeros8,                            U128,           U128                                                            )
+//OPCODE(VectorCountLeadingZeros16,                           U128,           U128                                                            )
+//OPCODE(VectorCountLeadingZeros32,                           U128,           U128                                                            )
+//OPCODE(VectorDeinterleaveEven8,                             U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveEven16,                            U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveEven32,                            U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveEven64,                            U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveOdd8,                              U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveOdd16,                             U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveOdd32,                             U128,           U128,           U128                                            )
+//OPCODE(VectorDeinterleaveOdd64,                             U128,           U128,           U128                                            )
+//OPCODE(VectorEor,                                           U128,           U128,           U128                                            )
+//OPCODE(VectorEqual8,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorEqual16,                                       U128,           U128,           U128                                            )
+//OPCODE(VectorEqual32,                                       U128,           U128,           U128                                            )
+//OPCODE(VectorEqual64,                                       U128,           U128,           U128                                            )
+//OPCODE(VectorEqual128,                                      U128,           U128,           U128                                            )
+//OPCODE(VectorExtract,                                       U128,           U128,           U128,           U8                              )
+//OPCODE(VectorExtractLower,                                  U128,           U128,           U128,           U8                              )
+//OPCODE(VectorGreaterS8,                                     U128,           U128,           U128                                            )
+//OPCODE(VectorGreaterS16,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorGreaterS32,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorGreaterS64,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingAddS8,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingAddS16,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingAddS32,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingAddU8,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingAddU16,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingAddU32,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingSubS8,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingSubS16,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingSubS32,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingSubU8,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingSubU16,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorHalvingSubU32,                                 U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveLower8,                              U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveLower16,                             U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveLower32,                             U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveLower64,                             U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveUpper8,                              U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveUpper16,                             U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveUpper32,                             U128,           U128,           U128                                            )
+//OPCODE(VectorInterleaveUpper64,                             U128,           U128,           U128                                            )
+//OPCODE(VectorLogicalShiftLeft8,                             U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftLeft16,                            U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftLeft32,                            U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftLeft64,                            U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftRight8,                            U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftRight16,                           U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftRight32,                           U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalShiftRight64,                           U128,           U128,           U8                                              )
+//OPCODE(VectorLogicalVShift8,                                U128,           U128,           U128                                            )
+//OPCODE(VectorLogicalVShift16,                               U128,           U128,           U128                                            )
+//OPCODE(VectorLogicalVShift32,                               U128,           U128,           U128                                            )
+//OPCODE(VectorLogicalVShift64,                               U128,           U128,           U128                                            )
+//OPCODE(VectorMaxS8,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorMaxS16,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMaxS32,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMaxS64,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMaxU8,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorMaxU16,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMaxU32,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMaxU64,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMinS8,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorMinS16,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMinS32,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMinS64,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMinU8,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorMinU16,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMinU32,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMinU64,                                        U128,           U128,           U128                                            )
+//OPCODE(VectorMultiply8,                                     U128,           U128,           U128                                            )
+//OPCODE(VectorMultiply16,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorMultiply32,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorMultiply64,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorNarrow16,                                      U128,           U128                                                            )
+//OPCODE(VectorNarrow32,                                      U128,           U128                                                            )
+//OPCODE(VectorNarrow64,                                      U128,           U128                                                            )
+//OPCODE(VectorNot,                                           U128,           U128                                                            )
+//OPCODE(VectorOr,                                            U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAddLower8,                               U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAddLower16,                              U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAddLower32,                              U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAddSignedWiden8,                         U128,           U128                                                            )
+//OPCODE(VectorPairedAddSignedWiden16,                        U128,           U128                                                            )
+//OPCODE(VectorPairedAddSignedWiden32,                        U128,           U128                                                            )
+//OPCODE(VectorPairedAddUnsignedWiden8,                       U128,           U128                                                            )
+//OPCODE(VectorPairedAddUnsignedWiden16,                      U128,           U128                                                            )
+//OPCODE(VectorPairedAddUnsignedWiden32,                      U128,           U128                                                            )
+//OPCODE(VectorPairedAdd8,                                    U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAdd16,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAdd32,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedAdd64,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMaxS8,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMaxS16,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMaxS32,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMaxU8,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMaxU16,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMaxU32,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMinS8,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMinS16,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMinS32,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMinU8,                                   U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMinU16,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPairedMinU32,                                  U128,           U128,           U128                                            )
+//OPCODE(VectorPolynomialMultiply8,                           U128,           U128,           U128                                            )
+//OPCODE(VectorPolynomialMultiplyLong8,                       U128,           U128,           U128                                            )
+//OPCODE(VectorPolynomialMultiplyLong64,                      U128,           U128,           U128                                            )
+//OPCODE(VectorPopulationCount,                               U128,           U128                                                            )
+//OPCODE(VectorReverseBits,                                   U128,           U128                                                            )
+//OPCODE(VectorRoundingHalvingAddS8,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingHalvingAddS16,                         U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingHalvingAddS32,                         U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingHalvingAddU8,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingHalvingAddU16,                         U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingHalvingAddU32,                         U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftS8,                           U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftS16,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftS32,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftS64,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftU8,                           U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftU16,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftU32,                          U128,           U128,           U128                                            )
+//OPCODE(VectorRoundingShiftLeftU64,                          U128,           U128,           U128                                            )
+//OPCODE(VectorShuffleHighHalfwords,                          U128,           U128,           U8                                              )
+//OPCODE(VectorShuffleLowHalfwords,                           U128,           U128,           U8                                              )
+//OPCODE(VectorShuffleWords,                                  U128,           U128,           U8                                              )
+//OPCODE(VectorSignExtend8,                                   U128,           U128                                                            )
+//OPCODE(VectorSignExtend16,                                  U128,           U128                                                            )
+//OPCODE(VectorSignExtend32,                                  U128,           U128                                                            )
+//OPCODE(VectorSignExtend64,                                  U128,           U128                                                            )
+//OPCODE(VectorSignedAbsoluteDifference8,                     U128,           U128,           U128                                            )
+//OPCODE(VectorSignedAbsoluteDifference16,                    U128,           U128,           U128                                            )
+//OPCODE(VectorSignedAbsoluteDifference32,                    U128,           U128,           U128                                            )
+//OPCODE(VectorSignedMultiply16,                              Void,           U128,           U128                                            )
+//OPCODE(VectorSignedMultiply32,                              Void,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedAbs8,                           U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedAbs16,                          U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedAbs32,                          U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedAbs64,                          U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedAccumulateUnsigned8,            U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedAccumulateUnsigned16,           U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedAccumulateUnsigned32,           U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedAccumulateUnsigned64,           U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedDoublingMultiply16,             Void,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedDoublingMultiply32,             Void,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedDoublingMultiplyLong16,         U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedDoublingMultiplyLong32,         U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedNarrowToSigned16,               U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNarrowToSigned32,               U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNarrowToSigned64,               U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNarrowToUnsigned16,             U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNarrowToUnsigned32,             U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNarrowToUnsigned64,             U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNeg8,                           U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNeg16,                          U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNeg32,                          U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedNeg64,                          U128,           U128                                                            )
+//OPCODE(VectorSignedSaturatedShiftLeft8,                     U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeft16,                    U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeft32,                    U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeft64,                    U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeftUnsigned8,             U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeftUnsigned16,            U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeftUnsigned32,            U128,           U128,           U128                                            )
+//OPCODE(VectorSignedSaturatedShiftLeftUnsigned64,            U128,           U128,           U128                                            )
+//OPCODE(VectorSub8,                                          U128,           U128,           U128                                            )
+//OPCODE(VectorSub16,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorSub32,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorSub64,                                         U128,           U128,           U128                                            )
+//OPCODE(VectorTable,                                         Table,          U128,           Opaque,         Opaque,         Opaque          )
+//OPCODE(VectorTableLookup,                                   U128,           U128,           Table,          U128                            )
+//OPCODE(VectorUnsignedAbsoluteDifference8,                   U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedAbsoluteDifference16,                  U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedAbsoluteDifference32,                  U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedMultiply16,                            Void,           U128,           U128                                            )
+//OPCODE(VectorUnsignedMultiply32,                            Void,           U128,           U128                                            )
+//OPCODE(VectorUnsignedRecipEstimate,                         U128,           U128                                                            )
+//OPCODE(VectorUnsignedRecipSqrtEstimate,                     U128,           U128                                                            )
+//OPCODE(VectorUnsignedSaturatedAccumulateSigned8,            U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedAccumulateSigned16,           U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedAccumulateSigned32,           U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedAccumulateSigned64,           U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedNarrow16,                     U128,           U128                                                            )
+//OPCODE(VectorUnsignedSaturatedNarrow32,                     U128,           U128                                                            )
+//OPCODE(VectorUnsignedSaturatedNarrow64,                     U128,           U128                                                            )
+//OPCODE(VectorUnsignedSaturatedShiftLeft8,                   U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedShiftLeft16,                  U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedShiftLeft32,                  U128,           U128,           U128                                            )
+//OPCODE(VectorUnsignedSaturatedShiftLeft64,                  U128,           U128,           U128                                            )
+//OPCODE(VectorZeroExtend8,                                   U128,           U128                                                            )
+//OPCODE(VectorZeroExtend16,                                  U128,           U128                                                            )
+//OPCODE(VectorZeroExtend32,                                  U128,           U128                                                            )
+//OPCODE(VectorZeroExtend64,                                  U128,           U128                                                            )
+//OPCODE(VectorZeroUpper,                                     U128,           U128                                                            )
+//OPCODE(ZeroVector,                                          U128,                                                                           )
+
+// Floating-point operations
+//OPCODE(FPAbs16,                                             U16,            U16                                                             )
+OPCODE(FPAbs32,                                             U32,            U32                                                             )
+OPCODE(FPAbs64,                                             U64,            U64                                                             )
+OPCODE(FPAdd32,                                             U32,            U32,            U32                                             )
+OPCODE(FPAdd64,                                             U64,            U64,            U64                                             )
+OPCODE(FPCompare32,                                         NZCV,           U32,            U32,            U1                              )
+OPCODE(FPCompare64,                                         NZCV,           U64,            U64,            U1                              )
+OPCODE(FPDiv32,                                             U32,            U32,            U32                                             )
+OPCODE(FPDiv64,                                             U64,            U64,            U64                                             )
+//OPCODE(FPMax32,                                             U32,            U32,            U32                                             )
+//OPCODE(FPMax64,                                             U64,            U64,            U64                                             )
+//OPCODE(FPMaxNumeric32,                                      U32,            U32,            U32                                             )
+//OPCODE(FPMaxNumeric64,                                      U64,            U64,            U64                                             )
+//OPCODE(FPMin32,                                             U32,            U32,            U32                                             )
+//OPCODE(FPMin64,                                             U64,            U64,            U64                                             )
+//OPCODE(FPMinNumeric32,                                      U32,            U32,            U32                                             )
+//OPCODE(FPMinNumeric64,                                      U64,            U64,            U64                                             )
+OPCODE(FPMul32,                                             U32,            U32,            U32                                             )
+OPCODE(FPMul64,                                             U64,            U64,            U64                                             )
+//OPCODE(FPMulAdd16,                                          U16,            U16,            U16,            U16                             )
+//OPCODE(FPMulAdd32,                                          U32,            U32,            U32,            U32                             )
+//OPCODE(FPMulAdd64,                                          U64,            U64,            U64,            U64                             )
+//OPCODE(FPMulX32,                                            U32,            U32,            U32                                             )
+//OPCODE(FPMulX64,                                            U64,            U64,            U64                                             )
+//OPCODE(FPNeg16,                                             U16,            U16                                                             )
+OPCODE(FPNeg32,                                             U32,            U32                                                             )
+OPCODE(FPNeg64,                                             U64,            U64                                                             )
+//OPCODE(FPRecipEstimate16,                                   U16,            U16                                                             )
+//OPCODE(FPRecipEstimate32,                                   U32,            U32                                                             )
+//OPCODE(FPRecipEstimate64,                                   U64,            U64                                                             )
+//OPCODE(FPRecipExponent16,                                   U16,            U16                                                             )
+//OPCODE(FPRecipExponent32,                                   U32,            U32                                                             )
+//OPCODE(FPRecipExponent64,                                   U64,            U64                                                             )
+//OPCODE(FPRecipStepFused16,                                  U16,            U16,            U16                                             )
+//OPCODE(FPRecipStepFused32,                                  U32,            U32,            U32                                             )
+//OPCODE(FPRecipStepFused64,                                  U64,            U64,            U64                                             )
+//OPCODE(FPRoundInt16,                                        U16,            U16,            U8,             U1                              )
+//OPCODE(FPRoundInt32,                                        U32,            U32,            U8,             U1                              )
+//OPCODE(FPRoundInt64,                                        U64,            U64,            U8,             U1                              )
+//OPCODE(FPRSqrtEstimate16,                                   U16,            U16                                                             )
+//OPCODE(FPRSqrtEstimate32,                                   U32,            U32                                                             )
+//OPCODE(FPRSqrtEstimate64,                                   U64,            U64                                                             )
+//OPCODE(FPRSqrtStepFused16,                                  U16,            U16,            U16                                             )
+//OPCODE(FPRSqrtStepFused32,                                  U32,            U32,            U32                                             )
+//OPCODE(FPRSqrtStepFused64,                                  U64,            U64,            U64                                             )
+OPCODE(FPSqrt32,                                            U32,            U32                                                             )
+OPCODE(FPSqrt64,                                            U64,            U64                                                             )
+OPCODE(FPSub32,                                             U32,            U32,            U32                                             )
+OPCODE(FPSub64,                                             U64,            U64,            U64                                             )
+
+// Floating-point conversions
+OPCODE(FPHalfToDouble,                                      U64,            U16,            U8                                              )
+OPCODE(FPHalfToSingle,                                      U32,            U16,            U8                                              )
+OPCODE(FPSingleToDouble,                                    U64,            U32,            U8                                              )
+OPCODE(FPSingleToHalf,                                      U16,            U32,            U8                                              )
+OPCODE(FPDoubleToHalf,                                      U16,            U64,            U8                                              )
+OPCODE(FPDoubleToSingle,                                    U32,            U64,            U8                                              )
+OPCODE(FPDoubleToFixedS32,                                  U32,            U64,            U8,             U8                              )
+OPCODE(FPDoubleToFixedS64,                                  U64,            U64,            U8,             U8                              )
+OPCODE(FPDoubleToFixedU32,                                  U32,            U64,            U8,             U8                              )
+OPCODE(FPDoubleToFixedU64,                                  U64,            U64,            U8,             U8                              )
+//OPCODE(FPHalfToFixedS32,                                    U32,            U16,            U8,             U8                              )
+//OPCODE(FPHalfToFixedS64,                                    U64,            U16,            U8,             U8                              )
+//OPCODE(FPHalfToFixedU32,                                    U32,            U16,            U8,             U8                              )
+//OPCODE(FPHalfToFixedU64,                                    U64,            U16,            U8,             U8                              )
+OPCODE(FPSingleToFixedS32,                                  U32,            U32,            U8,             U8                              )
+OPCODE(FPSingleToFixedS64,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPSingleToFixedU32,                                  U32,            U32,            U8,             U8                              )
+OPCODE(FPSingleToFixedU64,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPFixedU32ToSingle,                                  U32,            U32,            U8,             U8                              )
+OPCODE(FPFixedS32ToSingle,                                  U32,            U32,            U8,             U8                              )
+OPCODE(FPFixedU32ToDouble,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPFixedU64ToDouble,                                  U64,            U64,            U8,             U8                              )
+OPCODE(FPFixedU64ToSingle,                                  U32,            U64,            U8,             U8                              )
+OPCODE(FPFixedS32ToDouble,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPFixedS64ToDouble,                                  U64,            U64,            U8,             U8                              )
+OPCODE(FPFixedS64ToSingle,                                  U32,            U64,            U8,             U8                              )
+
+// Floating-point vector instructions
+//OPCODE(FPVectorAbs16,                                       U128,           U128                                                            )
+//OPCODE(FPVectorAbs32,                                       U128,           U128                                                            )
+//OPCODE(FPVectorAbs64,                                       U128,           U128                                                            )
+//OPCODE(FPVectorAdd32,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorAdd64,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorDiv32,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorDiv64,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorEqual32,                                     U128,           U128,           U128                                            )
+//OPCODE(FPVectorEqual64,                                     U128,           U128,           U128                                            )
+//OPCODE(FPVectorFromSignedFixed32,                           U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorFromSignedFixed64,                           U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorFromUnsignedFixed32,                         U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorFromUnsignedFixed64,                         U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorGreater32,                                   U128,           U128,           U128                                            )
+//OPCODE(FPVectorGreater64,                                   U128,           U128,           U128                                            )
+//OPCODE(FPVectorGreaterEqual32,                              U128,           U128,           U128                                            )
+//OPCODE(FPVectorGreaterEqual64,                              U128,           U128,           U128                                            )
+//OPCODE(FPVectorMax32,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorMax64,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorMin32,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorMin64,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorMul32,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorMul64,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorMulAdd16,                                    U128,           U128,           U128,           U128                            )
+//OPCODE(FPVectorMulAdd32,                                    U128,           U128,           U128,           U128                            )
+//OPCODE(FPVectorMulAdd64,                                    U128,           U128,           U128,           U128                            )
+//OPCODE(FPVectorMulX32,                                      U128,           U128,           U128                                            )
+//OPCODE(FPVectorMulX64,                                      U128,           U128,           U128                                            )
+//OPCODE(FPVectorNeg16,                                       U128,           U128                                                            )
+//OPCODE(FPVectorNeg32,                                       U128,           U128                                                            )
+//OPCODE(FPVectorNeg64,                                       U128,           U128                                                            )
+//OPCODE(FPVectorPairedAdd32,                                 U128,           U128,           U128                                            )
+//OPCODE(FPVectorPairedAdd64,                                 U128,           U128,           U128                                            )
+//OPCODE(FPVectorPairedAddLower32,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorPairedAddLower64,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorRecipEstimate16,                             U128,           U128                                                            )
+//OPCODE(FPVectorRecipEstimate32,                             U128,           U128                                                            )
+//OPCODE(FPVectorRecipEstimate64,                             U128,           U128                                                            )
+//OPCODE(FPVectorRecipStepFused16,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorRecipStepFused32,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorRecipStepFused64,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorRoundInt16,                                  U128,           U128,           U8,             U1                              )
+//OPCODE(FPVectorRoundInt32,                                  U128,           U128,           U8,             U1                              )
+//OPCODE(FPVectorRoundInt64,                                  U128,           U128,           U8,             U1                              )
+//OPCODE(FPVectorRSqrtEstimate16,                             U128,           U128                                                            )
+//OPCODE(FPVectorRSqrtEstimate32,                             U128,           U128                                                            )
+//OPCODE(FPVectorRSqrtEstimate64,                             U128,           U128                                                            )
+//OPCODE(FPVectorRSqrtStepFused16,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorRSqrtStepFused32,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorRSqrtStepFused64,                            U128,           U128,           U128                                            )
+//OPCODE(FPVectorSqrt32,                                      U128,           U128                                                            )
+//OPCODE(FPVectorSqrt64,                                      U128,           U128                                                            )
+//OPCODE(FPVectorSub32,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorSub64,                                       U128,           U128,           U128                                            )
+//OPCODE(FPVectorToSignedFixed16,                             U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorToSignedFixed32,                             U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorToSignedFixed64,                             U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorToUnsignedFixed16,                           U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorToUnsignedFixed32,                           U128,           U128,           U8,             U8                              )
+//OPCODE(FPVectorToUnsignedFixed64,                           U128,           U128,           U8,             U8                              )
+
+// A32 Memory access
+A32OPC(ClearExclusive,                                      Void,                                                                           )
+A32OPC(SetExclusive,                                        Void,           U32,            U8                                              )
+A32OPC(ReadMemory8,                                         U8,             U32                                                             )
+A32OPC(ReadMemory16,                                        U16,            U32                                                             )
+A32OPC(ReadMemory32,                                        U32,            U32                                                             )
+A32OPC(ReadMemory64,                                        U64,            U32                                                             )
+A32OPC(WriteMemory8,                                        Void,           U32,            U8                                              )
+A32OPC(WriteMemory16,                                       Void,           U32,            U16                                             )
+A32OPC(WriteMemory32,                                       Void,           U32,            U32                                             )
+A32OPC(WriteMemory64,                                       Void,           U32,            U64                                             )
+A32OPC(ExclusiveWriteMemory8,                               U32,            U32,            U8                                              )
+A32OPC(ExclusiveWriteMemory16,                              U32,            U32,            U16                                             )
+A32OPC(ExclusiveWriteMemory32,                              U32,            U32,            U32                                             )
+A32OPC(ExclusiveWriteMemory64,                              U32,            U32,            U64                                             )
+
+// A64 Memory access
+//A64OPC(ClearExclusive,                                      Void,                                                                           )
+//A64OPC(SetExclusive,                                        Void,           U64,            U8                                              )
+//A64OPC(ReadMemory8,                                         U8,             U64                                                             )
+//A64OPC(ReadMemory16,                                        U16,            U64                                                             )
+//A64OPC(ReadMemory32,                                        U32,            U64                                                             )
+//A64OPC(ReadMemory64,                                        U64,            U64                                                             )
+//A64OPC(ReadMemory128,                                       U128,           U64                                                             )
+//A64OPC(WriteMemory8,                                        Void,           U64,            U8                                              )
+//A64OPC(WriteMemory16,                                       Void,           U64,            U16                                             )
+//A64OPC(WriteMemory32,                                       Void,           U64,            U32                                             )
+//A64OPC(WriteMemory64,                                       Void,           U64,            U64                                             )
+//A64OPC(WriteMemory128,                                      Void,           U64,            U128                                            )
+//A64OPC(ExclusiveWriteMemory8,                               U32,            U64,            U8                                              )
+//A64OPC(ExclusiveWriteMemory16,                              U32,            U64,            U16                                             )
+//A64OPC(ExclusiveWriteMemory32,                              U32,            U64,            U32                                             )
+//A64OPC(ExclusiveWriteMemory64,                              U32,            U64,            U64                                             )
+//A64OPC(ExclusiveWriteMemory128,                             U32,            U64,            U128                                            )
+
+// Coprocessor
+A32OPC(CoprocInternalOperation,                             Void,           CoprocInfo                                                      )
+A32OPC(CoprocSendOneWord,                                   Void,           CoprocInfo,     U32                                             )
+A32OPC(CoprocSendTwoWords,                                  Void,           CoprocInfo,     U32,            U32                             )
+A32OPC(CoprocGetOneWord,                                    U32,            CoprocInfo                                                      )
+A32OPC(CoprocGetTwoWords,                                   U64,            CoprocInfo                                                      )
+A32OPC(CoprocLoadWords,                                     Void,           CoprocInfo,     U32                                             )
+A32OPC(CoprocStoreWords,                                    Void,           CoprocInfo,     U32                                             )
--- a/src/dynarmic/backend/A64/perf_map.cpp
+++ b/src/dynarmic/backend/A64/perf_map.cpp
@ -0,0 +1,89 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <cstddef>
+#include <string>
+
+#ifdef __linux__
+
+#include <cstdio>
+#include <cstdlib>
+#include <mutex>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace {
+std::mutex mutex;
+std::FILE* file = nullptr;
+
+void OpenFile() {
+    const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
+    if (!perf_dir) {
+        file = nullptr;
+        return;
+    }
+
+    const pid_t pid = getpid();
+    const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
+
+    file = std::fopen(filename.c_str(), "w");
+    if (!file) {
+        return;
+    }
+
+    std::setvbuf(file, nullptr, _IONBF, 0);
+}
+} // anonymous namespace
+
+namespace detail {
+void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
+    std::lock_guard guard{mutex};
+
+    if (!file) {
+        OpenFile();
+        if (!file) {
+            return;
+        }
+    }
+
+    const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
+    std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
+}
+} // namespace detail
+
+void PerfMapClear() {
+    std::lock_guard guard{mutex};
+
+    if (!file) {
+        return;
+    }
+
+    std::fclose(file);
+    file = nullptr;
+    OpenFile();
+}
+
+} // namespace Dynarmic::BackendX64
+
+#else
+
+namespace Dynarmic::BackendA64 {
+
+namespace detail {
+void PerfMapRegister(const void*, const void*, const std::string&) {}
+} // namespace detail
+
+void PerfMapClear() {}
+
+} // namespace Dynarmic::BackendX64
+
+#endif
--- a/src/dynarmic/backend/A64/perf_map.h
+++ b/src/dynarmic/backend/A64/perf_map.h
@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <string>
+
+#include "common/cast_util.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace detail {
+void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
+} // namespace detail
+
+template<typename T>
+void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
+    detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
+}
+
+void PerfMapClear();
+
+} // namespace Dynarmic::BackendX64
--- a/src/dynarmic/backend/A64/reg_alloc.cpp
+++ b/src/dynarmic/backend/A64/reg_alloc.cpp
@ -0,0 +1,650 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#include <fmt/ostream.h>
+
+#include "backend/A64/abi.h"
+#include "backend/A64/reg_alloc.h"
+#include "common/assert.h"
+
+namespace Dynarmic::BackendA64 {
+
+static u64 ImmediateToU64(const IR::Value& imm) {
+    switch (imm.GetType()) {
+    case IR::Type::U1:
+        return u64(imm.GetU1());
+    case IR::Type::U8:
+        return u64(imm.GetU8());
+    case IR::Type::U16:
+        return u64(imm.GetU16());
+    case IR::Type::U32:
+        return u64(imm.GetU32());
+    case IR::Type::U64:
+        return u64(imm.GetU64());
+    default:
+        ASSERT_FALSE("This should never happen.");
+    }
+}
+
+static bool CanExchange(HostLoc a, HostLoc b) {
+    return HostLocIsGPR(a) && HostLocIsGPR(b);
+}
+
+// Minimum number of bits required to represent a type
+static size_t GetBitWidth(IR::Type type) {
+    switch (type) {
+    case IR::Type::A32Reg:
+    case IR::Type::A32ExtReg:
+    case IR::Type::A64Reg:
+    case IR::Type::A64Vec:
+    case IR::Type::CoprocInfo:
+    case IR::Type::Cond:
+    case IR::Type::Void:
+    case IR::Type::Table:
+        ASSERT_FALSE("Type {} cannot be represented at runtime", type);
+        return 0;
+    case IR::Type::Opaque:
+        ASSERT_FALSE("Not a concrete type");
+        return 0;
+    case IR::Type::U1:
+        return 8;
+    case IR::Type::U8:
+        return 8;
+    case IR::Type::U16:
+        return 16;
+    case IR::Type::U32:
+        return 32;
+    case IR::Type::U64:
+        return 64;
+    case IR::Type::U128:
+        return 128;
+    case IR::Type::NZCVFlags:
+        return 32; // TODO: Update to 16 when flags optimization is done
+    }
+    UNREACHABLE();
+    return 0;
+}
+
+static bool IsValuelessType(IR::Type type) {
+    switch (type) {
+    case IR::Type::Table:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool HostLocInfo::IsLocked() const {
+    return is_being_used_count > 0;
+}
+
+bool HostLocInfo::IsEmpty() const {
+    return is_being_used_count == 0 && values.empty();
+}
+
+bool HostLocInfo::IsLastUse() const {
+    return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
+}
+
+void HostLocInfo::ReadLock() {
+    ASSERT(!is_scratch);
+    is_being_used_count++;
+}
+
+void HostLocInfo::WriteLock() {
+    ASSERT(is_being_used_count == 0);
+    is_being_used_count++;
+    is_scratch = true;
+}
+
+void HostLocInfo::AddArgReference() {
+    current_references++;
+    ASSERT(accumulated_uses + current_references <= total_uses);
+}
+
+void HostLocInfo::ReleaseOne() {
+    is_being_used_count--;
+    is_scratch = false;
+
+    if (current_references == 0)
+        return;
+
+    accumulated_uses++;
+    current_references--;
+
+    if (current_references == 0)
+        ReleaseAll();
+}
+
+void HostLocInfo::ReleaseAll() {
+    accumulated_uses += current_references;
+    current_references = 0;
+
+    ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
+
+    if (total_uses == accumulated_uses) {
+        values.clear();
+        accumulated_uses = 0;
+        total_uses = 0;
+        max_bit_width = 0;
+    }
+
+    is_being_used_count = 0;
+    is_scratch = false;
+}
+
+bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
+    return std::find(values.begin(), values.end(), inst) != values.end();
+}
+
+size_t HostLocInfo::GetMaxBitWidth() const {
+    return max_bit_width;
+}
+
+void HostLocInfo::AddValue(IR::Inst* inst) {
+    values.push_back(inst);
+    total_uses += inst->UseCount();
+    max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
+}
+
+IR::Type Argument::GetType() const {
+    return value.GetType();
+}
+
+bool Argument::IsImmediate() const {
+    return value.IsImmediate();
+}
+
+bool Argument::IsVoid() const {
+    return GetType() == IR::Type::Void;
+}
+
+bool Argument::FitsInImmediateU32() const {
+    if (!IsImmediate())
+        return false;
+    u64 imm = ImmediateToU64(value);
+    return imm < 0x100000000;
+}
+
+bool Argument::FitsInImmediateS32() const {
+    if (!IsImmediate())
+        return false;
+    s64 imm = static_cast<s64>(ImmediateToU64(value));
+    return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
+}
+
+bool Argument::GetImmediateU1() const {
+    return value.GetU1();
+}
+
+u8 Argument::GetImmediateU8() const {
+    u64 imm = ImmediateToU64(value);
+    ASSERT(imm < 0x100);
+    return u8(imm);
+}
+
+u16 Argument::GetImmediateU16() const {
+    u64 imm = ImmediateToU64(value);
+    ASSERT(imm < 0x10000);
+    return u16(imm);
+}
+
+u32 Argument::GetImmediateU32() const {
+    u64 imm = ImmediateToU64(value);
+    ASSERT(imm < 0x100000000);
+    return u32(imm);
+}
+
+u64 Argument::GetImmediateS32() const {
+    ASSERT(FitsInImmediateS32());
+    u64 imm = ImmediateToU64(value);
+    return imm;
+}
+
+u64 Argument::GetImmediateU64() const {
+    return ImmediateToU64(value);
+}
+
+IR::Cond Argument::GetImmediateCond() const {
+    ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
+    return value.GetCond();
+}
+
+bool Argument::IsInGpr() const {
+    if (IsImmediate())
+        return false;
+    return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
+}
+
+bool Argument::IsInFpr() const {
+    if (IsImmediate())
+        return false;
+    return HostLocIsFPR(*reg_alloc.ValueLocation(value.GetInst()));
+}
+
+bool Argument::IsInMemory() const {
+    if (IsImmediate())
+        return false;
+    return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
+}
+
+RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
+    ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
+    for (size_t i = 0; i < inst->NumArgs(); i++) {
+        const IR::Value& arg = inst->GetArg(i);
+        ret[i].value = arg;
+        if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
+            ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
+            LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
+        }
+    }
+    return ret;
+}
+
+Arm64Gen::ARM64Reg RegAlloc::UseGpr(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToReg64(UseImpl(arg.value, any_gpr));
+}
+
+Arm64Gen::ARM64Reg RegAlloc::UseFpr(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToFpr(UseImpl(arg.value, any_fpr));
+}
+
+//OpArg RegAlloc::UseOpArg(Argument& arg) {
+//    return UseGpr(arg);
+//}
+
+void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    UseImpl(arg.value, {host_loc});
+}
+
+Arm64Gen::ARM64Reg RegAlloc::UseScratchGpr(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
+}
+
+Arm64Gen::ARM64Reg RegAlloc::UseScratchFpr(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToFpr(UseScratchImpl(arg.value, any_fpr));
+}
+
+void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    UseScratchImpl(arg.value, {host_loc});
+}
+
+void RegAlloc::DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg) {
+    ASSERT(IsVector(reg) || IsGPR(reg));
+    HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
+    DefineValueImpl(inst, hostloc);
+}
+
+void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    DefineValueImpl(inst, arg.value);
+}
+
+void RegAlloc::Release(const Arm64Gen::ARM64Reg& reg) {
+    ASSERT(IsVector(reg) || IsGPR(reg));
+    const HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
+    LocInfo(hostloc).ReleaseOne();
+}
+
+Arm64Gen::ARM64Reg RegAlloc::ScratchGpr(HostLocList desired_locations) {
+    return HostLocToReg64(ScratchImpl(desired_locations));
+}
+
+Arm64Gen::ARM64Reg RegAlloc::ScratchFpr(HostLocList desired_locations) {
+    return HostLocToFpr(ScratchImpl(desired_locations));
+}
+
+HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
+    if (use_value.IsImmediate()) {
+        return LoadImmediate(use_value, ScratchImpl(desired_locations));
+    }
+
+    const IR::Inst* use_inst = use_value.GetInst();
+    const HostLoc current_location = *ValueLocation(use_inst);
+    const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
+
+    const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
+    if (can_use_current_location) {
+        LocInfo(current_location).ReadLock();
+        return current_location;
+    }
+
+    if (LocInfo(current_location).IsLocked()) {
+        return UseScratchImpl(use_value, desired_locations);
+    }
+
+    const HostLoc destination_location = SelectARegister(desired_locations);
+    if (max_bit_width > HostLocBitWidth(destination_location)) {
+        return UseScratchImpl(use_value, desired_locations);
+    } else if (CanExchange(destination_location, current_location)) {
+        Exchange(destination_location, current_location);
+    } else {
+        MoveOutOfTheWay(destination_location);
+        Move(destination_location, current_location);
+    }
+    LocInfo(destination_location).ReadLock();
+    return destination_location;
+}
+
+HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
+    if (use_value.IsImmediate()) {
+        return LoadImmediate(use_value, ScratchImpl(desired_locations));
+    }
+
+    const IR::Inst* use_inst = use_value.GetInst();
+    const HostLoc current_location = *ValueLocation(use_inst);
+    const size_t bit_width = GetBitWidth(use_inst->GetType());
+
+    const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
+    if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
+        if (!LocInfo(current_location).IsLastUse()) {
+            MoveOutOfTheWay(current_location);
+        }
+        LocInfo(current_location).WriteLock();
+        return current_location;
+    }
+
+    const HostLoc destination_location = SelectARegister(desired_locations);
+    MoveOutOfTheWay(destination_location);
+    CopyToScratch(bit_width, destination_location, current_location);
+    LocInfo(destination_location).WriteLock();
+    return destination_location;
+}
+
+HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
+    HostLoc location = SelectARegister(desired_locations);
+    MoveOutOfTheWay(location);
+    LocInfo(location).WriteLock();
+    return location;
+}
+
+void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0, 
+                        std::optional<Argument::copyable_reference> arg1,
+                        std::optional<Argument::copyable_reference> arg2, 
+                        std::optional<Argument::copyable_reference> arg3, 
+                        std::optional<Argument::copyable_reference> arg4, 
+                        std::optional<Argument::copyable_reference> arg5, 
+                        std::optional<Argument::copyable_reference> arg6, 
+                        std::optional<Argument::copyable_reference> arg7) {
+    constexpr size_t args_count = 8;
+    constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 };
+    const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7};
+
+    static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
+        std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
+
+        for (auto hostloc : args_hostloc)
+            ret.erase(std::find(ret.begin(), ret.end(), hostloc));
+
+        return ret;
+    }();
+
+    for (size_t i = 0; i < args_count; i++) {
+        if (args[i]) {
+            UseScratch(*args[i], args_hostloc[i]);
+        }
+    }
+
+    for (size_t i = 0; i < args_count; i++) {
+        if (!args[i]) {
+            // TODO: Force spill
+            ScratchGpr({args_hostloc[i]});
+        }
+    }
+
+    for (HostLoc caller_saved : other_caller_save) {
+        ScratchImpl({caller_saved});
+    }
+
+     if (result_def) {
+        DefineValueImpl(result_def, ABI_RETURN);
+    }
+}
+
+void RegAlloc::EndOfAllocScope() {
+    for (auto& iter : hostloc_info) {
+        iter.ReleaseAll();
+    }
+}
+
+void RegAlloc::AssertNoMoreUses() {
+    ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
+}
+
+HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
+     std::vector<HostLoc> candidates = desired_locations;
+
+    // Find all locations that have not been allocated..
+    const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
+        return !this->LocInfo(loc).IsLocked();
+    });
+    candidates.erase(allocated_locs, candidates.end());
+    ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
+
+    // Selects the best location out of the available locations.
+    // TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
+
+    std::partition(candidates.begin(), candidates.end(), [this](auto loc){
+        return this->LocInfo(loc).IsEmpty();
+    });
+
+    return candidates.front();
+}
+
+std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
+    for (size_t i = 0; i < hostloc_info.size(); i++)
+        if (hostloc_info[i].ContainsValue(value))
+            return static_cast<HostLoc>(i);
+
+    return std::nullopt;
+}
+
+void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
+    ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
+    LocInfo(host_loc).AddValue(def_inst);
+}
+
+void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
+    ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
+
+    if (use_inst.IsImmediate()) {
+        HostLoc location = ScratchImpl(any_gpr);
+        DefineValueImpl(def_inst, location);
+        LoadImmediate(use_inst, location);
+        return;
+    }
+
+    ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
+    HostLoc location = *ValueLocation(use_inst.GetInst());
+    DefineValueImpl(def_inst, location);
+}
+
+HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
+    ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
+
+    if (HostLocIsGPR(host_loc)) {
+        Arm64Gen::ARM64Reg reg = HostLocToReg64(host_loc);
+        u64 imm_value = ImmediateToU64(imm);
+        code.MOVI2R(reg, imm_value);
+        return host_loc;
+    }
+
+    if (HostLocIsFPR(host_loc)) {
+        Arm64Gen::ARM64Reg reg = Arm64Gen::EncodeRegToDouble(HostLocToFpr(host_loc));
+        u64 imm_value = ImmediateToU64(imm);
+        if (imm_value == 0)
+            code.fp_emitter.FMOV(reg, 0);
+        else {
+            code.EmitPatchLDR(reg, imm_value);
+        }
+        return host_loc;
+    }
+
+    UNREACHABLE();
+}
+
+void RegAlloc::Move(HostLoc to, HostLoc from) {
+    const size_t bit_width = LocInfo(from).GetMaxBitWidth();
+
+    ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
+    ASSERT(bit_width <= HostLocBitWidth(to));
+
+    if (LocInfo(from).IsEmpty()) {
+        return;
+    }
+
+    EmitMove(bit_width, to, from);
+
+    LocInfo(to) = std::exchange(LocInfo(from), {});
+}
+
+void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
+    ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
+
+    EmitMove(bit_width, to, from);
+}
+
+void RegAlloc::Exchange(HostLoc a, HostLoc b) {
+    ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
+    ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
+    ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
+
+    if (LocInfo(a).IsEmpty()) {
+        Move(a, b);
+        return;
+    }
+
+    if (LocInfo(b).IsEmpty()) {
+        Move(b, a);
+        return;
+    }
+
+    EmitExchange(a, b);
+
+    std::swap(LocInfo(a), LocInfo(b));
+}
+
+void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
+    ASSERT(!LocInfo(reg).IsLocked());
+    if (!LocInfo(reg).IsEmpty()) {
+        SpillRegister(reg);
+    }
+}
+
+void RegAlloc::SpillRegister(HostLoc loc) {
+    ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
+    ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
+    ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
+
+    HostLoc new_loc = FindFreeSpill();
+    Move(new_loc, loc);
+}
+
+HostLoc RegAlloc::FindFreeSpill() const {
+    for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
+        HostLoc loc = static_cast<HostLoc>(i);
+        if (LocInfo(loc).IsEmpty())
+            return loc;
+    }
+
+    ASSERT_FALSE("All spill locations are full");
+}
+
+HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
+    ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
+    return hostloc_info[static_cast<size_t>(loc)];
+}
+
+const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
+    ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
+    return hostloc_info[static_cast<size_t>(loc)];
+}
+
+void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
+    if (HostLocIsFPR(to) && HostLocIsFPR(from)) {
+        // bit_width == 128
+        //mov(HostLocToFpr(to), HostLocToFpr(from));
+
+        ASSERT_FALSE("Unimplemented");
+    } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.MOV(HostLocToReg64(to), HostLocToReg64(from));
+        } else {
+            code.MOV(DecodeReg(HostLocToReg64(to)), DecodeReg(HostLocToReg64(from)));
+        }
+    } else if (HostLocIsFPR(to) && HostLocIsGPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.fp_emitter.FMOV(EncodeRegToDouble(HostLocToFpr(to)), HostLocToReg64(from));
+        } else {
+            code.fp_emitter.FMOV(EncodeRegToSingle(HostLocToFpr(to)), DecodeReg(HostLocToReg64(from)));
+        }
+    } else if (HostLocIsGPR(to) && HostLocIsFPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.fp_emitter.FMOV(HostLocToReg64(to), EncodeRegToDouble(HostLocToFpr(from)));
+        } else {
+            code.fp_emitter.FMOV(DecodeReg(HostLocToReg64(to)), EncodeRegToSingle(HostLocToFpr(from)));
+        }
+    } else if (HostLocIsFPR(to) && HostLocIsSpill(from)) {
+        s32 spill_addr = spill_to_addr(from);
+        // ASSERT(spill_addr.getBit() >= bit_width);
+        code.fp_emitter.LDR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(to), Arm64Gen::X28, spill_addr);
+    } else if (HostLocIsSpill(to) && HostLocIsFPR(from)) {
+        s32 spill_addr = spill_to_addr(to);
+        // ASSERT(spill_addr.getBit() >= bit_width);
+        code.fp_emitter.STR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(from), Arm64Gen::X28, spill_addr);
+    } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.LDR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(to), Arm64Gen::X28, spill_to_addr(from));
+        } else {
+            code.LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(to)), Arm64Gen::X28, spill_to_addr(from));
+        }
+    } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.STR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(from), Arm64Gen::X28, spill_to_addr(to));
+        } else {
+            code.STR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(from)), Arm64Gen::X28, spill_to_addr(to));
+        }
+    } else {
+        ASSERT_FALSE("Invalid RegAlloc::EmitMove");
+    }
+}
+
+void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
+    if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
+        // Is this the best way to do it?
+        code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
+        code.EOR(HostLocToReg64(b), HostLocToReg64(a), HostLocToReg64(b));
+        code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
+    } else if (HostLocIsFPR(a) && HostLocIsFPR(b)) {
+        ASSERT_FALSE("Check your code: Exchanging XMM registers is unnecessary");
+    } else {
+        ASSERT_FALSE("Invalid RegAlloc::EmitExchange");
+    }
+}
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/backend/A64/reg_alloc.h
+++ b/src/dynarmic/backend/A64/reg_alloc.h
@ -0,0 +1,167 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include <optional>
+
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/hostloc.h"
+//#include "backend/A64/oparg.h"
+#include "common/common_types.h"
+#include "frontend/ir/cond.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/value.h"
+
+namespace Dynarmic::BackendA64 {
+
+class RegAlloc;
+
+struct HostLocInfo {
+public:
+    bool IsLocked() const;
+    bool IsEmpty() const;
+    bool IsLastUse() const;
+
+    void ReadLock();
+    void WriteLock();
+    void AddArgReference();
+    void ReleaseOne();
+    void ReleaseAll();
+
+    bool ContainsValue(const IR::Inst* inst) const;
+    size_t GetMaxBitWidth() const;
+
+    void AddValue(IR::Inst* inst);
+
+private:
+    // Current instruction state
+    size_t is_being_used_count = 0;
+    bool is_scratch = false;
+
+    // Block state
+    size_t current_references = 0;
+    size_t accumulated_uses = 0;
+    size_t total_uses = 0;
+
+    // Value state
+    std::vector<IR::Inst*> values;
+    size_t max_bit_width = 0;
+};
+
+struct Argument {
+public:
+    using copyable_reference = std::reference_wrapper<Argument>;
+
+    IR::Type GetType() const;
+    bool IsImmediate() const;
+    bool IsVoid() const;
+
+    bool FitsInImmediateU32() const;
+    bool FitsInImmediateS32() const;
+
+    bool GetImmediateU1() const;
+    u8 GetImmediateU8() const;
+    u16 GetImmediateU16() const;
+    u32 GetImmediateU32() const;
+    u64 GetImmediateS32() const;
+    u64 GetImmediateU64() const;
+    IR::Cond GetImmediateCond() const;
+
+    /// Is this value currently in a GPR?
+    bool IsInGpr() const;
+    /// Is this value currently in a FPR?
+    bool IsInFpr() const;
+    /// Is this value currently in memory?
+    bool IsInMemory() const;
+
+private:
+    friend class RegAlloc;
+    explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
+
+    bool allocated = false;
+    RegAlloc& reg_alloc;
+    IR::Value value;
+};
+
+class RegAlloc final {
+public:
+    using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
+
+    explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<u64(HostLoc)> spill_to_addr)
+        : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
+
+    ArgumentInfo GetArgumentInfo(IR::Inst* inst);
+
+    Arm64Gen::ARM64Reg UseGpr(Argument& arg);
+    Arm64Gen::ARM64Reg UseFpr(Argument& arg);
+    //OpArg UseOpArg(Argument& arg);
+    void Use(Argument& arg, HostLoc host_loc);
+
+    Arm64Gen::ARM64Reg UseScratchGpr(Argument& arg);
+    Arm64Gen::ARM64Reg UseScratchFpr(Argument& arg);
+    void UseScratch(Argument& arg, HostLoc host_loc);
+
+    void DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg);
+    void DefineValue(IR::Inst* inst, Argument& arg);
+
+    void Release(const Arm64Gen::ARM64Reg& reg);
+
+    Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr);
+    Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr);
+
+    void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {},
+                  std::optional<Argument::copyable_reference> arg1 = {}, 
+                  std::optional<Argument::copyable_reference> arg2 = {},
+                  std::optional<Argument::copyable_reference> arg3 = {},
+                  std::optional<Argument::copyable_reference> arg4 = {},
+                  std::optional<Argument::copyable_reference> arg5 = {},
+                  std::optional<Argument::copyable_reference> arg6 = {},
+                  std::optional<Argument::copyable_reference> arg7 = {});
+
+    // TODO: Values in host flags
+
+    void EndOfAllocScope();
+
+    void AssertNoMoreUses();
+
+private:
+    friend struct Argument;
+
+    HostLoc SelectARegister(HostLocList desired_locations) const;
+    std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
+
+    HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
+    HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
+    HostLoc ScratchImpl(HostLocList desired_locations);
+    void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
+    void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
+
+    HostLoc LoadImmediate(IR::Value imm, HostLoc reg);
+    void Move(HostLoc to, HostLoc from);
+    void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
+    void Exchange(HostLoc a, HostLoc b);
+    void MoveOutOfTheWay(HostLoc reg);
+
+    void SpillRegister(HostLoc loc);
+    HostLoc FindFreeSpill() const;
+
+    std::vector<HostLocInfo> hostloc_info;
+    HostLocInfo& LocInfo(HostLoc loc);
+    const HostLocInfo& LocInfo(HostLoc loc) const;
+
+    BlockOfCode& code;
+    std::function<u32(HostLoc)> spill_to_addr;
+    void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
+    void EmitExchange(HostLoc a, HostLoc b);
+};
+
+} // namespace Dynarmic::BackendA64
--- a/src/dynarmic/common/math_util.h
+++ b/src/dynarmic/common/math_util.h
@ -44,4 +44,9 @@ u8 RecipEstimate(u64 a);
 */
 u8 RecipSqrtEstimate(u64 a);

+template <typename T>
+constexpr bool IsPow2(T imm){
+    return imm > 0 && (imm & (imm - 1)) == 0;
+}
+
 }  // namespace Dynarmic::Common