diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fc0220ca..0ce448e9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -376,7 +376,9 @@ elseif(ARCHITECTURE_Aarch64)
          backend/A64/reg_alloc.cpp
          backend/A64/reg_alloc.h
 
-         backend/A64/exception_handler_generic.cpp
+         backend/A64/exception_handler.h
+         #backend/A64/exception_handler_generic.cpp
+         backend/A64/exception_handler_posix.cpp
     )
 else()
     message(FATAL_ERROR "Unsupported architecture")
diff --git a/src/backend/A64/a32_emit_a64.cpp b/src/backend/A64/a32_emit_a64.cpp
index d5d1c440..8e0d35d7 100644
--- a/src/backend/A64/a32_emit_a64.cpp
+++ b/src/backend/A64/a32_emit_a64.cpp
@@ -4,6 +4,7 @@
  * General Public License version 2 or any later version.
  */
 
+#include <iterator>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
@@ -76,12 +77,18 @@ bool A32EmitContext::FPSCR_DN() const {
     return Location().FPSCR().DN();
 }
 
+std::ptrdiff_t A32EmitContext::GetInstOffset(IR::Inst* inst) const {
+    return std::distance(block.begin(), IR::Block::iterator(inst));
+}
+
 A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface)
     : EmitA64(code), config(std::move(config)), jit_interface(jit_interface) {
+    exception_handler.Register(code, [this](CodePtr PC){FastmemCallback(PC);});
     GenMemoryAccessors();
     GenTerminalHandlers();
     code.PreludeComplete();
     ClearFastDispatchTable();
+    fastmem_patch_info.clear();
 }
 
 A32EmitA64::~A32EmitA64() = default;
@@ -152,6 +159,7 @@ void A32EmitA64::ClearCache() {
     EmitA64::ClearCache();
     block_ranges.ClearCache();
     ClearFastDispatchTable();
+    fastmem_patch_info.clear();
 }
 
 void A32EmitA64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
@@ -781,137 +789,272 @@ void A32EmitA64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) {
     code.STR(INDEX_UNSIGNED, address, X28, offsetof(A32JitState, exclusive_address));
 }
 
-template <typename T, T (A32::UserCallbacks::*raw_fn)(A32::VAddr)>
-static void ReadMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) {
-    constexpr size_t bit_size = Common::BitSize<T>();
-    auto args = reg_alloc.GetArgumentInfo(inst);
-
-    if (!config.page_table) {
-        reg_alloc.HostCall(inst, {}, args[0]);
-        Devirtualize<raw_fn>(config.callbacks).EmitCall(code);
-        return;
-    }
-
-    reg_alloc.UseScratch(args[0], ABI_PARAM2);
-
-    Arm64Gen::ARM64Reg result = reg_alloc.ScratchGpr({ABI_RETURN});
-    Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
-    Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr();
-    Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr();
-
-    FixupBranch abort, end;
-
-    code.MOVP2R(result, config.page_table);
-    code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12});
-    code.LDR(result, result, ArithOption{page_index, true});
-    abort = code.CBZ(result);
-    code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095);
-    switch (bit_size) {
-    case 8:
-        code.LDRB(DecodeReg(result), result, ArithOption{ page_offset });
-        break;
-    case 16:
-        code.LDRH(DecodeReg(result), result, ArithOption{ page_offset });
-        break;
-    case 32:
-        code.LDR(DecodeReg(result), result, ArithOption{ page_offset });
-        break;
-    case 64:
-        code.LDR(result, result, ArithOption{ page_offset });
-        break;
-    default:
-        ASSERT_MSG(false, "Invalid bit_size");
-        break;
-    }
-    end = code.B();
-    code.SetJumpTarget(abort);
-    code.BL(wrapped_fn);
-    code.SetJumpTarget(end);
-
-    reg_alloc.DefineValue(inst, result);
+A32EmitA64::DoNotFastmemMarker A32EmitA64::GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst) {
+    return std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst));
 }
 
-template <typename T, void (A32::UserCallbacks::*raw_fn)(A32::VAddr, T)>
-static void WriteMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) {
-    constexpr size_t bit_size = Common::BitSize<T>();
-    auto args = reg_alloc.GetArgumentInfo(inst);
+bool A32EmitA64::ShouldFastmem(const DoNotFastmemMarker& marker) const {
+    return config.fastmem_pointer && exception_handler.SupportsFastmem() && do_not_fastmem.count(marker) == 0;
+}
 
-    if (!config.page_table) {
-        reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
-        Devirtualize<raw_fn>(config.callbacks).EmitCall(code);
+void A32EmitA64::DoNotFastmem(const DoNotFastmemMarker& marker) {
+    do_not_fastmem.emplace(marker);
+    InvalidateBasicBlocks({std::get<0>(marker)});
+}
+
+template <typename T>
+void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) {
+    constexpr size_t bit_size = Common::BitSize<T>();
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2);
+    ctx.reg_alloc.ScratchGpr({ABI_RETURN});
+
+    ARM64Reg result = ctx.reg_alloc.ScratchGpr();
+    ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
+    ARM64Reg tmp = code.ABI_RETURN;
+
+    const auto do_not_fastmem_marker = GenerateDoNotFastmemMarker(ctx, inst);
+
+    const auto page_table_lookup = [this, result, vaddr, tmp, callback_fn](FixupBranch& end) {
+        constexpr size_t bit_size = Common::BitSize<T>();
+
+        code.MOVP2R(result, config.page_table);
+        code.MOV(tmp, vaddr, ArithOption{vaddr, ST_LSR, 12});
+        code.LDR(result, result, ArithOption{tmp, true});
+        FixupBranch abort = code.CBZ(result);
+        code.ANDI2R(vaddr, vaddr, 4095);
+        switch (bit_size) {
+            case 8:
+                code.LDRB(DecodeReg(result), result, vaddr);
+                break;
+            case 16:
+                code.LDRH(DecodeReg(result), result, vaddr);
+                break;
+            case 32:
+                code.LDR(DecodeReg(result), result, vaddr);
+                break;
+            case 64:
+                code.LDR(result, result, vaddr);
+                break;
+            default:
+                ASSERT_MSG(false, "Invalid bit_size");
+                break;
+        }
+        end = code.B();
+        code.SetJumpTarget(abort);
+        code.BL(callback_fn);
+        code.MOV(result, code.ABI_RETURN);
+    };
+
+
+    if (ShouldFastmem(do_not_fastmem_marker)) {
+        const CodePtr patch_location = code.GetCodePtr();
+        switch (bit_size) {
+            case 8:
+                code.LDRB(DecodeReg(result), X27, vaddr);
+                break;
+            case 16:
+                code.LDRH(DecodeReg(result), X27, vaddr);
+                break;
+            case 32:
+                code.LDR(DecodeReg(result), X27, vaddr);
+                break;
+            case 64:
+                code.LDR(result, X27, vaddr);
+                break;
+            default:
+                ASSERT_MSG(false, "Invalid bit_size");
+                break;
+        }
+        code.EnsurePatchLocationSize(patch_location, 5);
+
+        fastmem_patch_info.emplace(
+                patch_location,
+                FastmemPatchInfo{
+                        [this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker]{
+                            CodePtr save_code_ptr = code.GetCodePtr();
+                            code.SetCodePtr(patch_location);
+                            FixupBranch thunk = code.B();
+                            u8* end_ptr = code.GetWritableCodePtr();
+                            code.EnsurePatchLocationSize(patch_location, 5);
+                            code.FlushIcacheSection(reinterpret_cast<const u8*>(patch_location), code.GetCodePtr());
+
+                            code.SetCodePtr(save_code_ptr);
+                            code.SwitchToFarCode();
+                            code.SetJumpTarget(thunk);
+                            if (config.page_table) {
+                                FixupBranch end{};
+                                page_table_lookup(end);
+                                code.SetJumpTarget(end, end_ptr);
+                            } else {
+                                code.BL(callback_fn);
+                                code.MOV(result, code.ABI_RETURN);
+                            }
+                            code.B(end_ptr);
+                            code.FlushIcache();
+                            code.SwitchToNearCode();
+
+                            DoNotFastmem(do_not_fastmem_marker);
+                        }
+                });
+
+        ctx.reg_alloc.DefineValue(inst, result);
         return;
     }
 
-    reg_alloc.ScratchGpr({ABI_RETURN});
-    reg_alloc.UseScratch(args[0], ABI_PARAM2);
-    reg_alloc.UseScratch(args[1], ABI_PARAM3);
-
-    Arm64Gen::ARM64Reg addr = reg_alloc.ScratchGpr();
-    Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
-    Arm64Gen::ARM64Reg value = code.ABI_PARAM3;
-    Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr();
-    Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr();
-
-    FixupBranch abort, end;
-
-    code.MOVI2R(addr, reinterpret_cast<u64>(config.page_table));
-    code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12});
-    code.LDR(addr, addr, ArithOption{ page_index, true });
-    abort = code.CBZ(addr);
-    code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095);
-    switch (bit_size) {
-    case 8:
-        code.STRB(DecodeReg(value), addr, ArithOption{ page_offset });
-        break;
-    case 16:
-        code.STRH(DecodeReg(value), addr, ArithOption{ page_offset });
-        break;
-    case 32:
-        code.STR(DecodeReg(value), addr, ArithOption{ page_offset });
-        break;
-    case 64:
-        code.STR(value, addr, ArithOption{ page_offset });
-        break;
-    default:
-        ASSERT_MSG(false, "Invalid bit_size");
-        break;
+    if (!config.page_table) {
+        code.BL(callback_fn);
+        code.MOV(result, code.ABI_RETURN);
+        ctx.reg_alloc.DefineValue(inst, result);
+        return;
     }
-    end = code.B();
-    code.SetJumpTarget(abort);
-    code.BL(wrapped_fn);
+
+    FixupBranch end{};
+    page_table_lookup(end);
+    code.SetJumpTarget(end);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+template<typename T>
+void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) {
+    constexpr size_t bit_size = Common::BitSize<T>();
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    ctx.reg_alloc.ScratchGpr({ABI_RETURN});
+    ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2);
+    ctx.reg_alloc.UseScratch(args[1], ABI_PARAM3);
+
+    ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
+    ARM64Reg value = code.ABI_PARAM3;
+    ARM64Reg page_index = ctx.reg_alloc.ScratchGpr();
+    ARM64Reg addr = ctx.reg_alloc.ScratchGpr();
+
+    const auto do_not_fastmem_marker = GenerateDoNotFastmemMarker(ctx, inst);
+
+    const auto page_table_lookup = [this, vaddr, value, page_index, addr, callback_fn](FixupBranch& end) {
+        constexpr size_t bit_size = Common::BitSize<T>();
+
+        code.MOVP2R(addr, config.page_table);
+        code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12});
+        code.LDR(addr, addr, ArithOption{page_index, true});
+        FixupBranch abort = code.CBZ(addr);
+        code.ANDI2R(vaddr, vaddr, 4095);
+        switch (bit_size) {
+            case 8:
+                code.STRB(DecodeReg(value), addr, vaddr);
+                break;
+            case 16:
+                code.STRH(DecodeReg(value), addr, vaddr);
+                break;
+            case 32:
+                code.STR(DecodeReg(value), addr, vaddr);;
+                break;
+            case 64:
+                code.STR(value, addr, vaddr);
+                break;
+            default:
+                ASSERT_MSG(false, "Invalid bit_size");
+                break;
+        }
+        end = code.B();
+        code.SetJumpTarget(abort);
+        code.BL(callback_fn);
+    };
+
+    if (ShouldFastmem(do_not_fastmem_marker)) {
+        const CodePtr patch_location = code.GetCodePtr();
+        switch (bit_size) {
+            case 8:
+                code.STRB(DecodeReg(value), X27, vaddr);
+                break;
+            case 16:
+                code.STRH(DecodeReg(value), X27, vaddr);
+                break;
+            case 32:
+                code.STR(DecodeReg(value), X27, vaddr);
+                break;
+            case 64:
+                code.STR(value, X27, vaddr);
+                break;
+            default:
+                ASSERT_MSG(false, "Invalid bit_size");
+                break;
+        }
+        code.EnsurePatchLocationSize(patch_location, 5);
+
+        fastmem_patch_info.emplace(
+                patch_location,
+                FastmemPatchInfo{
+                        [this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker]{
+                            CodePtr save_code_ptr = code.GetCodePtr();
+                            code.SetCodePtr(patch_location);
+                            FixupBranch thunk = code.B();
+                            u8* end_ptr = code.GetWritableCodePtr();
+                            code.EnsurePatchLocationSize(patch_location, 5);
+                            code.FlushIcacheSection(reinterpret_cast<const u8*>(patch_location), code.GetCodePtr());
+
+                            code.SetCodePtr(save_code_ptr);
+                            code.SwitchToFarCode();
+                            code.SetJumpTarget(thunk);
+                            if (config.page_table) {
+                                FixupBranch end{};
+                                page_table_lookup(end);
+                                code.SetJumpTarget(end, end_ptr);
+                            } else {
+                                code.BL(callback_fn);
+                            }
+                            code.B(end_ptr);
+                            code.FlushIcache();
+                            code.SwitchToNearCode();
+
+                            DoNotFastmem(do_not_fastmem_marker);
+                        }
+                });
+        return;
+    }
+
+    if (!config.page_table) {
+        code.BL(callback_fn);
+        return;
+    }
+
+    FixupBranch end{};
+    page_table_lookup(end);
     code.SetJumpTarget(end);
 }
 
 void A32EmitA64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
-    ReadMemory<u8, &A32::UserCallbacks::MemoryRead8>(code, ctx.reg_alloc, inst, config, read_memory_8);
+    ReadMemory<u8>(ctx, inst, read_memory_8);
 }
 
 void A32EmitA64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) {
-    ReadMemory<u16, &A32::UserCallbacks::MemoryRead16>(code, ctx.reg_alloc, inst, config, read_memory_16);
+    ReadMemory<u16>(ctx, inst, read_memory_16);
 }
 
 void A32EmitA64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) {
-    ReadMemory<u32, &A32::UserCallbacks::MemoryRead32>(code, ctx.reg_alloc, inst, config, read_memory_32);
+    ReadMemory<u32>(ctx, inst, read_memory_32);
 }
 
 void A32EmitA64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) {
-    ReadMemory<u64, &A32::UserCallbacks::MemoryRead64>(code, ctx.reg_alloc, inst, config, read_memory_64);
+    ReadMemory<u64>(ctx, inst, read_memory_64);
 }
 
 void A32EmitA64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) {
-    WriteMemory<u8, &A32::UserCallbacks::MemoryWrite8>(code, ctx.reg_alloc, inst, config, write_memory_8);
+    WriteMemory<u8>(ctx, inst, write_memory_8);
 }
 
 void A32EmitA64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) {
-    WriteMemory<u16, &A32::UserCallbacks::MemoryWrite16>(code, ctx.reg_alloc, inst, config, write_memory_16);
+    WriteMemory<u16>(ctx, inst, write_memory_16);
 }
 
 void A32EmitA64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) {
-    WriteMemory<u32, &A32::UserCallbacks::MemoryWrite32>(code, ctx.reg_alloc, inst, config, write_memory_32);
+    WriteMemory<u32>(ctx, inst, write_memory_32);
 }
 
 void A32EmitA64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
-    WriteMemory<u64, &A32::UserCallbacks::MemoryWrite64>(code, ctx.reg_alloc, inst, config, write_memory_64);
+    WriteMemory<u64>(ctx, inst, write_memory_64);
 }
 
 template <typename T, void (A32::UserCallbacks::*fn)(A32::VAddr, T)>
@@ -1241,6 +1384,13 @@ std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescr
                        descriptor.FPSCR().Value());
 }
 
+void A32EmitA64::FastmemCallback(CodePtr PC) {
+    const auto iter = fastmem_patch_info.find(PC);
+    ASSERT(iter != fastmem_patch_info.end());
+    iter->second.callback();
+    fastmem_patch_info.erase(iter);
+}
+
 void A32EmitA64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
     ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented");
     ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented");
diff --git a/src/backend/A64/a32_emit_a64.h b/src/backend/A64/a32_emit_a64.h
index 46beb730..99a4927a 100644
--- a/src/backend/A64/a32_emit_a64.h
+++ b/src/backend/A64/a32_emit_a64.h
@@ -7,12 +7,16 @@
 #pragma once
 
 #include <array>
-
+#include <functional>
 #include <optional>
+#include <set>
+#include <tuple>
+#include <unordered_map>
 
 #include "backend/A64/a32_jitstate.h"
 #include "backend/A64/block_range_information.h"
 #include "backend/A64/emit_a64.h"
+#include "backend/A64/exception_handler.h"
 #include "dynarmic/A32/a32.h"
 #include "dynarmic/A32/config.h"
 #include "frontend/A32/location_descriptor.h"
@@ -20,6 +24,7 @@
 
 namespace Dynarmic::BackendA64 {
 
+struct A64State;
 class RegAlloc;
 
 struct A32EmitContext final : public EmitContext {
@@ -29,6 +34,7 @@ struct A32EmitContext final : public EmitContext {
     u32 FPCR() const override;
     bool FPSCR_FTZ() const override;
     bool FPSCR_DN() const override;
+    std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
 };
 
 class A32EmitA64 final : public EmitA64 {
@@ -46,10 +52,13 @@ public:
 
     void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
 
+    void FastmemCallback(CodePtr PC);
+
 protected:
     const A32::UserConfig config;
     A32::Jit* jit_interface;
     BlockRangeInformation<u32> block_ranges;
+    ExceptionHandler exception_handler;
 
     struct FastDispatchEntry {
         u64 location_descriptor;
@@ -61,6 +70,12 @@ protected:
     std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
     void ClearFastDispatchTable();
 
+    using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
+    std::set<DoNotFastmemMarker> do_not_fastmem;
+    DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
+    void DoNotFastmem(const DoNotFastmemMarker& marker);
+    bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
+
     const void* read_memory_8;
     const void* read_memory_16;
     const void* read_memory_32;
@@ -70,6 +85,10 @@ protected:
     const void* write_memory_32;
     const void* write_memory_64;
     void GenMemoryAccessors();
+    template<typename T>
+    void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
+    template<typename T>
+    void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
 
     const void* terminal_handler_pop_rsb_hint;
     const void* terminal_handler_fast_dispatch_hint = nullptr;
@@ -87,6 +106,12 @@ protected:
     // Helpers
     std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
 
+    // Fastmem
+    struct FastmemPatchInfo {
+        std::function<void()> callback;
+    };
+    std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
+
     // Terminal instruction emitters
     void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
     void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
diff --git a/src/backend/A64/a32_interface.cpp b/src/backend/A64/a32_interface.cpp
index 0d7b098c..db8fd2b9 100644
--- a/src/backend/A64/a32_interface.cpp
+++ b/src/backend/A64/a32_interface.cpp
@@ -31,17 +31,18 @@ namespace Dynarmic::A32 {
 
 using namespace BackendA64;
 
-static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
+static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
     return RunCodeCallbacks{
         std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
-        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
-        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
+        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
+        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
+        reinterpret_cast<u64>(config.fastmem_pointer),
     };
 }
 
 struct Jit::Impl {
     Impl(Jit* jit, A32::UserConfig config)
-            : block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
+            : block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlock, this), JitStateInfo{jit_state})
             , emitter(block_of_code, config, jit)
             , config(std::move(config))
             , jit_interface(jit)
diff --git a/src/backend/A64/block_of_code.cpp b/src/backend/A64/block_of_code.cpp
index df78d06e..cb20736a 100644
--- a/src/backend/A64/block_of_code.cpp
+++ b/src/backend/A64/block_of_code.cpp
@@ -71,7 +71,6 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
     AllocCodeSpace(TOTAL_CODE_SIZE);
     EnableWriting();
     GenRunCode();
-    exception_handler.Register(*this);
 }
 
 void BlockOfCode::PreludeComplete() {
@@ -155,7 +154,8 @@ void BlockOfCode::GenRunCode() {
     ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
 
     MOV(Arm64Gen::X28, ABI_PARAM1);
-    MOV(Arm64Gen::X27, ABI_PARAM2); //  temporarily in non-volatile register
+    MOVI2R(Arm64Gen::X27, cb.value_in_X27);
+    MOV(Arm64Gen::X26, ABI_PARAM2); // save temporarily in non-volatile register
 
     cb.GetTicksRemaining->EmitCall(*this);
 
@@ -163,7 +163,7 @@ void BlockOfCode::GenRunCode() {
     STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
 
     SwitchFpscrOnEntry();
-    BR(Arm64Gen::X27);
+    BR(Arm64Gen::X26);
 
     AlignCode16();
     run_code = (RunCodeFuncType) GetWritableCodePtr();
@@ -175,6 +175,7 @@ void BlockOfCode::GenRunCode() {
     ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
 
     MOV(Arm64Gen::X28, ABI_PARAM1);
+    MOVI2R(Arm64Gen::X27, cb.value_in_X27);
 
     cb.GetTicksRemaining->EmitCall(*this);
     STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
@@ -291,6 +292,14 @@ CodePtr BlockOfCode::GetCodeBegin() const {
     return near_code_begin;
 }
 
+u8* BlockOfCode::GetRegion() const {
+    return region;
+}
+
+std::size_t BlockOfCode::GetRegionSize() const {
+    return total_region_size;
+};
+
 void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {    
     ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
 
@@ -323,4 +332,4 @@ void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
 //#endif
 //}
 
-} // namespace Dynarmic::BackendX64
+} // namespace Dynarmic::BackendA64
diff --git a/src/backend/A64/block_of_code.h b/src/backend/A64/block_of_code.h
index 8f7c499b..23c35b24 100644
--- a/src/backend/A64/block_of_code.h
+++ b/src/backend/A64/block_of_code.h
@@ -24,11 +24,14 @@ struct RunCodeCallbacks {
     std::unique_ptr<Callback> LookupBlock;
     std::unique_ptr<Callback> AddTicks;
     std::unique_ptr<Callback> GetTicksRemaining;
+    u64 value_in_X27;
 };
 
 class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
 public:
     BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
+    BlockOfCode(const BlockOfCode&) = delete;
+
 
     /// Call when external emitters have finished emitting their preludes.
     void PreludeComplete();
@@ -74,6 +77,8 @@ public:
     void SwitchToNearCode();
 
     CodePtr GetCodeBegin() const;
+    u8* GetRegion() const;
+    std::size_t GetRegionSize() const;
 
     const void* GetReturnFromRunCodeAddress() const {
         return return_from_run_code[0];
@@ -137,20 +142,6 @@ private:
     std::array<const void*, 4> return_from_run_code;
     void GenRunCode();
 
-
-
-    class ExceptionHandler final {
-    public:
-        ExceptionHandler();
-        ~ExceptionHandler();
-
-        void Register(BlockOfCode& code);
-    private:
-        struct Impl;
-        std::unique_ptr<Impl> impl;
-    };
-    ExceptionHandler exception_handler;
-
     //Xbyak::util::Cpu cpu_info;
 };
 
diff --git a/src/backend/A64/emitter/a64_emitter.cpp b/src/backend/A64/emitter/a64_emitter.cpp
index 8ad3e6fe..efdd5ffc 100644
--- a/src/backend/A64/emitter/a64_emitter.cpp
+++ b/src/backend/A64/emitter/a64_emitter.cpp
@@ -857,10 +857,12 @@ void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64
 }
 
 // FixupBranch branching
-void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) {
+void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) {
+    if(!target)
+        target = m_code;
     bool Not = false;
     u32 inst = 0;
-    s64 distance = static_cast<s64>(m_code - branch.ptr);
+    s64 distance = static_cast<s64>(target - branch.ptr);
     distance >>= 2;
 
     switch (branch.type) {
@@ -891,7 +893,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) {
         inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) |
                ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
     } break;
-    case 5: // B (uncoditional)
+    case 5: // B (unconditional)
         ASSERT_MSG(IsInRangeImm26(distance), "%s(%d): Received too large distance: %" PRIx64,
                    __func__, branch.type, distance);
         inst = (0x5 << 26) | MaskImm26(distance);
diff --git a/src/backend/A64/emitter/a64_emitter.h b/src/backend/A64/emitter/a64_emitter.h
index 1f08f651..e7d84638 100644
--- a/src/backend/A64/emitter/a64_emitter.h
+++ b/src/backend/A64/emitter/a64_emitter.h
@@ -524,7 +524,7 @@ public:
     u8* GetWritableCodePtr();
 
     // FixupBranch branching
-    void SetJumpTarget(FixupBranch const& branch);
+    void SetJumpTarget(FixupBranch const& branch, u8* target = nullptr);
     FixupBranch CBZ(ARM64Reg Rt);
     FixupBranch CBNZ(ARM64Reg Rt);
     FixupBranch B(CCFlags cond);
diff --git a/src/backend/A64/exception_handler.h b/src/backend/A64/exception_handler.h
new file mode 100644
index 00000000..04eb7d0c
--- /dev/null
+++ b/src/backend/A64/exception_handler.h
@@ -0,0 +1,39 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <functional>
+
+#include "backend/A64/a32_jitstate.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::BackendA64 {
+
+class BlockOfCode;
+
+struct A64State {
+    std::array<u64, 32> X;
+    std::array<std::array<u64, 2>, 16> Q;
+};
+static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
+
+class ExceptionHandler final {
+public:
+    ExceptionHandler();
+    ~ExceptionHandler();
+
+    void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
+
+    bool SupportsFastmem() const;
+private:
+    struct Impl;
+    std::unique_ptr<Impl> impl;
+};
+
+} // namespace Dynarmic::BackendA64
diff --git a/src/backend/A64/exception_handler_generic.cpp b/src/backend/A64/exception_handler_generic.cpp
index 2e066fa4..c5b17c07 100644
--- a/src/backend/A64/exception_handler_generic.cpp
+++ b/src/backend/A64/exception_handler_generic.cpp
@@ -4,18 +4,22 @@
  * General Public License version 2 or any later version.
  */
 
-#include "backend/A64/block_of_code.h"
+#include "backend/A64/exception_handler.h"
 
 namespace Dynarmic::BackendA64 {
 
-struct BlockOfCode::ExceptionHandler::Impl final {
+struct ExceptionHandler::Impl final {
 };
 
-BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
-BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
+ExceptionHandler::ExceptionHandler() = default;
+ExceptionHandler::~ExceptionHandler() = default;
 
-void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) {
+void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
     // Do nothing
 }
 
+bool ExceptionHandler::SupportsFastmem() const {
+    return false;
+}
+
 } // namespace Dynarmic::BackendA64
diff --git a/src/backend/A64/exception_handler_posix.cpp b/src/backend/A64/exception_handler_posix.cpp
new file mode 100644
index 00000000..57f24045
--- /dev/null
+++ b/src/backend/A64/exception_handler_posix.cpp
@@ -0,0 +1,161 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+// Copyright 2008 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include <mutex>
+#include <vector>
+
+#include <csignal>
+#include <ucontext.h>
+
+#include "backend/A64/a32_jitstate.h"
+#include "backend/A64/block_of_code.h"
+#include "backend/A64/exception_handler.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+#include "jni.h"
+
+namespace Dynarmic::BackendA64 {
+
+namespace {
+
+struct CodeBlockInfo {
+    BlockOfCode* block;
+    std::function<void(CodePtr)> callback;
+};
+
+class SigHandler {
+public:
+    SigHandler();
+
+    ~SigHandler();
+
+    void AddCodeBlock(CodeBlockInfo info);
+
+    void RemoveCodeBlock(CodePtr PC);
+
+private:
+    auto FindCodeBlockInfo(CodePtr PC) {
+        return std::find_if(code_block_infos.begin(), code_block_infos.end(),
+                            [&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize(); });
+    }
+
+    std::vector<CodeBlockInfo> code_block_infos;
+    std::mutex code_block_infos_mutex;
+
+    struct sigaction old_sa_segv;
+    struct sigaction old_sa_bus;
+
+    static void SigAction(int sig, siginfo_t* info, void* raw_context);
+};
+
+SigHandler sig_handler;
+
+SigHandler::SigHandler() {
+    // Method below from dolphin.
+
+    constexpr std::size_t signal_stack_size =
+        static_cast<std::size_t>(std::max(SIGSTKSZ, 2 * 1024 * 1024));
+
+    stack_t signal_stack;
+    signal_stack.ss_sp = malloc(signal_stack_size);
+    signal_stack.ss_size = signal_stack_size;
+    signal_stack.ss_flags = 0;
+    ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
+               "dynarmic: POSIX SigHandler: init failure at sigaltstack");
+
+    struct sigaction sa;
+    sa.sa_handler = nullptr;
+    sa.sa_sigaction = &SigHandler::SigAction;
+    sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
+    sigemptyset(&sa.sa_mask);
+    sigaction(SIGSEGV, &sa, &old_sa_segv);
+}
+
+SigHandler::~SigHandler() {
+    // No cleanup required.
+}
+
+void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
+    code_block_infos.push_back(std::move(cb));
+}
+
+void SigHandler::RemoveCodeBlock(CodePtr PC) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    const auto iter = FindCodeBlockInfo(PC);
+    ASSERT(iter != code_block_infos.end());
+    code_block_infos.erase(iter);
+}
+
+void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
+    ASSERT(sig == SIGSEGV || sig == SIGBUS);
+
+    std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
+    auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
+    const auto iter = sig_handler.FindCodeBlockInfo(PC);
+    if (iter != sig_handler.code_block_infos.end()) {
+        iter->callback(PC);
+        return;
+    }
+
+    fmt::print(
+        stderr,
+        "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
+        PC);
+
+    struct sigaction* retry_sa =
+        sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
+    if (retry_sa->sa_flags & SA_SIGINFO) {
+        retry_sa->sa_sigaction(sig, info, raw_context);
+        return;
+    }
+    if (retry_sa->sa_handler == SIG_DFL) {
+        signal(sig, SIG_DFL);
+        return;
+    }
+    if (retry_sa->sa_handler == SIG_IGN) {
+        return;
+    }
+    retry_sa->sa_handler(sig);
+}
+
+} // anonymous namespace
+
+struct ExceptionHandler::Impl final {
+    Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
+        code_begin = code.GetRegion();
+        sig_handler.AddCodeBlock({&code, std::move(cb)});
+    }
+
+    ~Impl() {
+        sig_handler.RemoveCodeBlock(code_begin);
+    }
+
+private:
+    CodePtr code_begin;
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
+    if (cb)
+        impl = std::make_unique<Impl>(code, std::move(cb));
+}
+
+bool ExceptionHandler::SupportsFastmem() const {
+    return static_cast<bool>(impl);
+}
+
+} // namespace Dynarmic::BackendA64
diff --git a/src/backend/A64/hostloc.h b/src/backend/A64/hostloc.h
index bf6e87b0..36a40f0d 100644
--- a/src/backend/A64/hostloc.h
+++ b/src/backend/A64/hostloc.h
@@ -130,22 +130,32 @@ inline size_t HostLocBitWidth(HostLoc loc) {
 
 using HostLocList = std::initializer_list<HostLoc>;
 
-
 // X18 may be reserved.(Windows and iOS)
+// X27 contains an emulated memory relate pointer
 // X28 used for holding the JitState.
 // X30 is the link register.
-const HostLocList any_gpr = {
-    HostLoc::X0,  HostLoc::X1,  HostLoc::X2,  HostLoc::X3,  HostLoc::X4,  HostLoc::X5,  HostLoc::X6,  HostLoc::X7,
-    HostLoc::X8,  HostLoc::X9,  HostLoc::X10, HostLoc::X11, HostLoc::X12, HostLoc::X13, HostLoc::X14, HostLoc::X15, 
-    HostLoc::X16, HostLoc::X17, HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, HostLoc::X24,
-    HostLoc::X25, HostLoc::X26, HostLoc::X27, // HostLoc::X29,
+// In order of desireablity based first on ABI
+constexpr HostLocList any_gpr = {
+    HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
+    HostLoc::X24, HostLoc::X25, HostLoc::X26,
+
+    HostLoc::X8,  HostLoc::X9,  HostLoc::X10, HostLoc::X11, HostLoc::X12,
+    HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
+
+    HostLoc::X7,  HostLoc::X6,  HostLoc::X5,  HostLoc::X4,  HostLoc::X3,
+    HostLoc::X2,  HostLoc::X1,  HostLoc::X0,
 };
 
-const HostLocList any_fpr = {
-    HostLoc::Q0,  HostLoc::Q1,  HostLoc::Q2,  HostLoc::Q3,  HostLoc::Q4,  HostLoc::Q5,  HostLoc::Q6,  HostLoc::Q7,
-    HostLoc::Q8,  HostLoc::Q9,  HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13, HostLoc::Q14, HostLoc::Q15,
-    HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21, HostLoc::Q22, HostLoc::Q23,
-    HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27, HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
+constexpr HostLocList any_fpr = {
+    HostLoc::Q8,  HostLoc::Q9,  HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
+    HostLoc::Q14, HostLoc::Q15,
+
+    HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
+    HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
+    HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
+
+    HostLoc::Q7,  HostLoc::Q6,  HostLoc::Q5,  HostLoc::Q4,  HostLoc::Q3,  HostLoc::Q2,
+    HostLoc::Q1,  HostLoc::Q0,
 };
 
 Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
@@ -156,9 +166,10 @@ size_t SpillToOpArg(HostLoc loc) {
     ASSERT(HostLocIsSpill(loc));
 
     size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
-    ASSERT_MSG(i < JitStateType::SpillCount, "Spill index greater than number of available spill locations");
+    ASSERT_MSG(i < JitStateType::SpillCount,
+               "Spill index greater than number of available spill locations");
 
     return JitStateType::GetSpillLocationOffsetFromIndex(i);
 }
 
-} // namespace Dynarmic::BackendX64
+} // namespace Dynarmic::BackendA64
diff --git a/src/backend/A64/reg_alloc.cpp b/src/backend/A64/reg_alloc.cpp
index e6f606f9..36b7f5ef 100644
--- a/src/backend/A64/reg_alloc.cpp
+++ b/src/backend/A64/reg_alloc.cpp
@@ -435,7 +435,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
      std::vector<HostLoc> candidates = desired_locations;
 
     // Find all locations that have not been allocated..
-    auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
+    const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
         return !this->LocInfo(loc).IsLocked();
     });
     candidates.erase(allocated_locs, candidates.end());