diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8ac83962..40b407c2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -373,7 +373,9 @@ elseif(ARCHITECTURE_Aarch64) backend/A64/reg_alloc.cpp backend/A64/reg_alloc.h - backend/A64/exception_handler_generic.cpp + backend/A64/exception_handler.h + #backend/A64/exception_handler_generic.cpp + backend/A64/exception_handler_posix.cpp ) else() message(FATAL_ERROR "Unsupported architecture") diff --git a/src/backend/A64/a32_emit_a64.cpp b/src/backend/A64/a32_emit_a64.cpp index d5d1c440..8e0d35d7 100644 --- a/src/backend/A64/a32_emit_a64.cpp +++ b/src/backend/A64/a32_emit_a64.cpp @@ -4,6 +4,7 @@ * General Public License version 2 or any later version. */ +#include #include #include #include @@ -76,12 +77,18 @@ bool A32EmitContext::FPSCR_DN() const { return Location().FPSCR().DN(); } +std::ptrdiff_t A32EmitContext::GetInstOffset(IR::Inst* inst) const { + return std::distance(block.begin(), IR::Block::iterator(inst)); +} + A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) : EmitA64(code), config(std::move(config)), jit_interface(jit_interface) { + exception_handler.Register(code, [this](CodePtr PC){FastmemCallback(PC);}); GenMemoryAccessors(); GenTerminalHandlers(); code.PreludeComplete(); ClearFastDispatchTable(); + fastmem_patch_info.clear(); } A32EmitA64::~A32EmitA64() = default; @@ -152,6 +159,7 @@ void A32EmitA64::ClearCache() { EmitA64::ClearCache(); block_ranges.ClearCache(); ClearFastDispatchTable(); + fastmem_patch_info.clear(); } void A32EmitA64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { @@ -781,137 +789,272 @@ void A32EmitA64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) { code.STR(INDEX_UNSIGNED, address, X28, offsetof(A32JitState, exclusive_address)); } -template -static void ReadMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { - constexpr size_t bit_size = Common::BitSize(); - auto args = reg_alloc.GetArgumentInfo(inst); - - if (!config.page_table) { - reg_alloc.HostCall(inst, {}, args[0]); - Devirtualize(config.callbacks).EmitCall(code); - return; - } - - reg_alloc.UseScratch(args[0], ABI_PARAM2); - - Arm64Gen::ARM64Reg result = reg_alloc.ScratchGpr({ABI_RETURN}); - Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2); - Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr(); - Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr(); - - FixupBranch abort, end; - - code.MOVP2R(result, config.page_table); - code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12}); - code.LDR(result, result, ArithOption{page_index, true}); - abort = code.CBZ(result); - code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095); - switch (bit_size) { - case 8: - code.LDRB(DecodeReg(result), result, ArithOption{ page_offset }); - break; - case 16: - code.LDRH(DecodeReg(result), result, ArithOption{ page_offset }); - break; - case 32: - code.LDR(DecodeReg(result), result, ArithOption{ page_offset }); - break; - case 64: - code.LDR(result, result, ArithOption{ page_offset }); - break; - default: - ASSERT_MSG(false, "Invalid bit_size"); - break; - } - end = code.B(); - code.SetJumpTarget(abort); - code.BL(wrapped_fn); - code.SetJumpTarget(end); - - reg_alloc.DefineValue(inst, result); +A32EmitA64::DoNotFastmemMarker A32EmitA64::GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst) { + return std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); } -template -static void WriteMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { - constexpr size_t bit_size = Common::BitSize(); - auto args = reg_alloc.GetArgumentInfo(inst); +bool A32EmitA64::ShouldFastmem(const DoNotFastmemMarker& marker) const { + return config.fastmem_pointer && exception_handler.SupportsFastmem() && do_not_fastmem.count(marker) == 0; +} - if (!config.page_table) { - reg_alloc.HostCall(nullptr, {}, args[0], args[1]); - Devirtualize(config.callbacks).EmitCall(code); +void A32EmitA64::DoNotFastmem(const DoNotFastmemMarker& marker) { + do_not_fastmem.emplace(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); +} + +template +void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) { + constexpr size_t bit_size = Common::BitSize(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2); + ctx.reg_alloc.ScratchGpr({ABI_RETURN}); + + ARM64Reg result = ctx.reg_alloc.ScratchGpr(); + ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2); + ARM64Reg tmp = code.ABI_RETURN; + + const auto do_not_fastmem_marker = GenerateDoNotFastmemMarker(ctx, inst); + + const auto page_table_lookup = [this, result, vaddr, tmp, callback_fn](FixupBranch& end) { + constexpr size_t bit_size = Common::BitSize(); + + code.MOVP2R(result, config.page_table); + code.MOV(tmp, vaddr, ArithOption{vaddr, ST_LSR, 12}); + code.LDR(result, result, ArithOption{tmp, true}); + FixupBranch abort = code.CBZ(result); + code.ANDI2R(vaddr, vaddr, 4095); + switch (bit_size) { + case 8: + code.LDRB(DecodeReg(result), result, vaddr); + break; + case 16: + code.LDRH(DecodeReg(result), result, vaddr); + break; + case 32: + code.LDR(DecodeReg(result), result, vaddr); + break; + case 64: + code.LDR(result, result, vaddr); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + end = code.B(); + code.SetJumpTarget(abort); + code.BL(callback_fn); + code.MOV(result, code.ABI_RETURN); + }; + + + if (ShouldFastmem(do_not_fastmem_marker)) { + const CodePtr patch_location = code.GetCodePtr(); + switch (bit_size) { + case 8: + code.LDRB(DecodeReg(result), X27, vaddr); + break; + case 16: + code.LDRH(DecodeReg(result), X27, vaddr); + break; + case 32: + code.LDR(DecodeReg(result), X27, vaddr); + break; + case 64: + code.LDR(result, X27, vaddr); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + code.EnsurePatchLocationSize(patch_location, 5); + + fastmem_patch_info.emplace( + patch_location, + FastmemPatchInfo{ + [this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker]{ + CodePtr save_code_ptr = code.GetCodePtr(); + code.SetCodePtr(patch_location); + FixupBranch thunk = code.B(); + u8* end_ptr = code.GetWritableCodePtr(); + code.EnsurePatchLocationSize(patch_location, 5); + code.FlushIcacheSection(reinterpret_cast(patch_location), code.GetCodePtr()); + + code.SetCodePtr(save_code_ptr); + code.SwitchToFarCode(); + code.SetJumpTarget(thunk); + if (config.page_table) { + FixupBranch end{}; + page_table_lookup(end); + code.SetJumpTarget(end, end_ptr); + } else { + code.BL(callback_fn); + code.MOV(result, code.ABI_RETURN); + } + code.B(end_ptr); + code.FlushIcache(); + code.SwitchToNearCode(); + + DoNotFastmem(do_not_fastmem_marker); + } + }); + + ctx.reg_alloc.DefineValue(inst, result); return; } - reg_alloc.ScratchGpr({ABI_RETURN}); - reg_alloc.UseScratch(args[0], ABI_PARAM2); - reg_alloc.UseScratch(args[1], ABI_PARAM3); - - Arm64Gen::ARM64Reg addr = reg_alloc.ScratchGpr(); - Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2); - Arm64Gen::ARM64Reg value = code.ABI_PARAM3; - Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr(); - Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr(); - - FixupBranch abort, end; - - code.MOVI2R(addr, reinterpret_cast(config.page_table)); - code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12}); - code.LDR(addr, addr, ArithOption{ page_index, true }); - abort = code.CBZ(addr); - code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095); - switch (bit_size) { - case 8: - code.STRB(DecodeReg(value), addr, ArithOption{ page_offset }); - break; - case 16: - code.STRH(DecodeReg(value), addr, ArithOption{ page_offset }); - break; - case 32: - code.STR(DecodeReg(value), addr, ArithOption{ page_offset }); - break; - case 64: - code.STR(value, addr, ArithOption{ page_offset }); - break; - default: - ASSERT_MSG(false, "Invalid bit_size"); - break; + if (!config.page_table) { + code.BL(callback_fn); + code.MOV(result, code.ABI_RETURN); + ctx.reg_alloc.DefineValue(inst, result); + return; } - end = code.B(); - code.SetJumpTarget(abort); - code.BL(wrapped_fn); + + FixupBranch end{}; + page_table_lookup(end); + code.SetJumpTarget(end); + + ctx.reg_alloc.DefineValue(inst, result); +} + +template +void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) { + constexpr size_t bit_size = Common::BitSize(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + ctx.reg_alloc.ScratchGpr({ABI_RETURN}); + ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2); + ctx.reg_alloc.UseScratch(args[1], ABI_PARAM3); + + ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2); + ARM64Reg value = code.ABI_PARAM3; + ARM64Reg page_index = ctx.reg_alloc.ScratchGpr(); + ARM64Reg addr = ctx.reg_alloc.ScratchGpr(); + + const auto do_not_fastmem_marker = GenerateDoNotFastmemMarker(ctx, inst); + + const auto page_table_lookup = [this, vaddr, value, page_index, addr, callback_fn](FixupBranch& end) { + constexpr size_t bit_size = Common::BitSize(); + + code.MOVP2R(addr, config.page_table); + code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12}); + code.LDR(addr, addr, ArithOption{page_index, true}); + FixupBranch abort = code.CBZ(addr); + code.ANDI2R(vaddr, vaddr, 4095); + switch (bit_size) { + case 8: + code.STRB(DecodeReg(value), addr, vaddr); + break; + case 16: + code.STRH(DecodeReg(value), addr, vaddr); + break; + case 32: + code.STR(DecodeReg(value), addr, vaddr);; + break; + case 64: + code.STR(value, addr, vaddr); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + end = code.B(); + code.SetJumpTarget(abort); + code.BL(callback_fn); + }; + + if (ShouldFastmem(do_not_fastmem_marker)) { + const CodePtr patch_location = code.GetCodePtr(); + switch (bit_size) { + case 8: + code.STRB(DecodeReg(value), X27, vaddr); + break; + case 16: + code.STRH(DecodeReg(value), X27, vaddr); + break; + case 32: + code.STR(DecodeReg(value), X27, vaddr); + break; + case 64: + code.STR(value, X27, vaddr); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + code.EnsurePatchLocationSize(patch_location, 5); + + fastmem_patch_info.emplace( + patch_location, + FastmemPatchInfo{ + [this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker]{ + CodePtr save_code_ptr = code.GetCodePtr(); + code.SetCodePtr(patch_location); + FixupBranch thunk = code.B(); + u8* end_ptr = code.GetWritableCodePtr(); + code.EnsurePatchLocationSize(patch_location, 5); + code.FlushIcacheSection(reinterpret_cast(patch_location), code.GetCodePtr()); + + code.SetCodePtr(save_code_ptr); + code.SwitchToFarCode(); + code.SetJumpTarget(thunk); + if (config.page_table) { + FixupBranch end{}; + page_table_lookup(end); + code.SetJumpTarget(end, end_ptr); + } else { + code.BL(callback_fn); + } + code.B(end_ptr); + code.FlushIcache(); + code.SwitchToNearCode(); + + DoNotFastmem(do_not_fastmem_marker); + } + }); + return; + } + + if (!config.page_table) { + code.BL(callback_fn); + return; + } + + FixupBranch end{}; + page_table_lookup(end); code.SetJumpTarget(end); } void A32EmitA64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { - ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_8); + ReadMemory(ctx, inst, read_memory_8); } void A32EmitA64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { - ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_16); + ReadMemory(ctx, inst, read_memory_16); } void A32EmitA64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { - ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_32); + ReadMemory(ctx, inst, read_memory_32); } void A32EmitA64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { - ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_64); + ReadMemory(ctx, inst, read_memory_64); } void A32EmitA64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { - WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_8); + WriteMemory(ctx, inst, write_memory_8); } void A32EmitA64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { - WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_16); + WriteMemory(ctx, inst, write_memory_16); } void A32EmitA64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { - WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_32); + WriteMemory(ctx, inst, write_memory_32); } void A32EmitA64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { - WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_64); + WriteMemory(ctx, inst, write_memory_64); } template @@ -1241,6 +1384,13 @@ std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescr descriptor.FPSCR().Value()); } +void A32EmitA64::FastmemCallback(CodePtr PC) { + const auto iter = fastmem_patch_info.find(PC); + ASSERT(iter != fastmem_patch_info.end()); + iter->second.callback(); + fastmem_patch_info.erase(iter); +} + void A32EmitA64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) { ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented"); ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented"); diff --git a/src/backend/A64/a32_emit_a64.h b/src/backend/A64/a32_emit_a64.h index 46beb730..99a4927a 100644 --- a/src/backend/A64/a32_emit_a64.h +++ b/src/backend/A64/a32_emit_a64.h @@ -7,12 +7,16 @@ #pragma once #include - +#include #include +#include +#include +#include #include "backend/A64/a32_jitstate.h" #include "backend/A64/block_range_information.h" #include "backend/A64/emit_a64.h" +#include "backend/A64/exception_handler.h" #include "dynarmic/A32/a32.h" #include "dynarmic/A32/config.h" #include "frontend/A32/location_descriptor.h" @@ -20,6 +24,7 @@ namespace Dynarmic::BackendA64 { +struct A64State; class RegAlloc; struct A32EmitContext final : public EmitContext { @@ -29,6 +34,7 @@ struct A32EmitContext final : public EmitContext { u32 FPCR() const override; bool FPSCR_FTZ() const override; bool FPSCR_DN() const override; + std::ptrdiff_t GetInstOffset(IR::Inst* inst) const; }; class A32EmitA64 final : public EmitA64 { @@ -46,10 +52,13 @@ public: void InvalidateCacheRanges(const boost::icl::interval_set& ranges); + void FastmemCallback(CodePtr PC); + protected: const A32::UserConfig config; A32::Jit* jit_interface; BlockRangeInformation block_ranges; + ExceptionHandler exception_handler; struct FastDispatchEntry { u64 location_descriptor; @@ -61,6 +70,12 @@ protected: std::array fast_dispatch_table; void ClearFastDispatchTable(); + using DoNotFastmemMarker = std::tuple; + std::set do_not_fastmem; + DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst); + void DoNotFastmem(const DoNotFastmemMarker& marker); + bool ShouldFastmem(const DoNotFastmemMarker& marker) const; + const void* read_memory_8; const void* read_memory_16; const void* read_memory_32; @@ -70,6 +85,10 @@ protected: const void* write_memory_32; const void* write_memory_64; void GenMemoryAccessors(); + template + void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn); + template + void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn); const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_fast_dispatch_hint = nullptr; @@ -87,6 +106,12 @@ protected: // Helpers std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override; + // Fastmem + struct FastmemPatchInfo { + std::function callback; + }; + std::unordered_map fastmem_patch_info; + // Terminal instruction emitters void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location); void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override; diff --git a/src/backend/A64/a32_interface.cpp b/src/backend/A64/a32_interface.cpp index 0d7b098c..db8fd2b9 100644 --- a/src/backend/A64/a32_interface.cpp +++ b/src/backend/A64/a32_interface.cpp @@ -31,17 +31,18 @@ namespace Dynarmic::A32 { using namespace BackendA64; -static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) { +static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) { return RunCodeCallbacks{ std::make_unique(LookupBlock, reinterpret_cast(arg)), - std::make_unique(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)), - std::make_unique(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)), + std::make_unique(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)), + std::make_unique(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)), + reinterpret_cast(config.fastmem_pointer), }; } struct Jit::Impl { Impl(Jit* jit, A32::UserConfig config) - : block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state}) + : block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlock, this), JitStateInfo{jit_state}) , emitter(block_of_code, config, jit) , config(std::move(config)) , jit_interface(jit) diff --git a/src/backend/A64/block_of_code.cpp b/src/backend/A64/block_of_code.cpp index df78d06e..cb20736a 100644 --- a/src/backend/A64/block_of_code.cpp +++ b/src/backend/A64/block_of_code.cpp @@ -71,7 +71,6 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi) AllocCodeSpace(TOTAL_CODE_SIZE); EnableWriting(); GenRunCode(); - exception_handler.Register(*this); } void BlockOfCode::PreludeComplete() { @@ -155,7 +154,8 @@ void BlockOfCode::GenRunCode() { ABI_PushCalleeSaveRegistersAndAdjustStack(*this); MOV(Arm64Gen::X28, ABI_PARAM1); - MOV(Arm64Gen::X27, ABI_PARAM2); // temporarily in non-volatile register + MOVI2R(Arm64Gen::X27, cb.value_in_X27); + MOV(Arm64Gen::X26, ABI_PARAM2); // save temporarily in non-volatile register cb.GetTicksRemaining->EmitCall(*this); @@ -163,7 +163,7 @@ void BlockOfCode::GenRunCode() { STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); SwitchFpscrOnEntry(); - BR(Arm64Gen::X27); + BR(Arm64Gen::X26); AlignCode16(); run_code = (RunCodeFuncType) GetWritableCodePtr(); @@ -175,6 +175,7 @@ void BlockOfCode::GenRunCode() { ABI_PushCalleeSaveRegistersAndAdjustStack(*this); MOV(Arm64Gen::X28, ABI_PARAM1); + MOVI2R(Arm64Gen::X27, cb.value_in_X27); cb.GetTicksRemaining->EmitCall(*this); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); @@ -291,6 +292,14 @@ CodePtr BlockOfCode::GetCodeBegin() const { return near_code_begin; } +u8* BlockOfCode::GetRegion() const { + return region; +} + +std::size_t BlockOfCode::GetRegionSize() const { + return total_region_size; +}; + void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) { ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG"); @@ -323,4 +332,4 @@ void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) { //#endif //} -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/block_of_code.h b/src/backend/A64/block_of_code.h index 8f7c499b..23c35b24 100644 --- a/src/backend/A64/block_of_code.h +++ b/src/backend/A64/block_of_code.h @@ -24,11 +24,14 @@ struct RunCodeCallbacks { std::unique_ptr LookupBlock; std::unique_ptr AddTicks; std::unique_ptr GetTicksRemaining; + u64 value_in_X27; }; class BlockOfCode final : public Arm64Gen::ARM64CodeBlock { public: BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi); + BlockOfCode(const BlockOfCode&) = delete; + /// Call when external emitters have finished emitting their preludes. void PreludeComplete(); @@ -74,6 +77,8 @@ public: void SwitchToNearCode(); CodePtr GetCodeBegin() const; + u8* GetRegion() const; + std::size_t GetRegionSize() const; const void* GetReturnFromRunCodeAddress() const { return return_from_run_code[0]; @@ -137,20 +142,6 @@ private: std::array return_from_run_code; void GenRunCode(); - - - class ExceptionHandler final { - public: - ExceptionHandler(); - ~ExceptionHandler(); - - void Register(BlockOfCode& code); - private: - struct Impl; - std::unique_ptr impl; - }; - ExceptionHandler exception_handler; - //Xbyak::util::Cpu cpu_info; }; diff --git a/src/backend/A64/emitter/a64_emitter.cpp b/src/backend/A64/emitter/a64_emitter.cpp index 8ad3e6fe..efdd5ffc 100644 --- a/src/backend/A64/emitter/a64_emitter.cpp +++ b/src/backend/A64/emitter/a64_emitter.cpp @@ -857,10 +857,12 @@ void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64 } // FixupBranch branching -void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) { +void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) { + if(!target) + target = m_code; bool Not = false; u32 inst = 0; - s64 distance = static_cast(m_code - branch.ptr); + s64 distance = static_cast(target - branch.ptr); distance >>= 2; switch (branch.type) { @@ -891,7 +893,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) { inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg; } break; - case 5: // B (uncoditional) + case 5: // B (unconditional) ASSERT_MSG(IsInRangeImm26(distance), "%s(%d): Received too large distance: %" PRIx64, __func__, branch.type, distance); inst = (0x5 << 26) | MaskImm26(distance); diff --git a/src/backend/A64/emitter/a64_emitter.h b/src/backend/A64/emitter/a64_emitter.h index 1f08f651..e7d84638 100644 --- a/src/backend/A64/emitter/a64_emitter.h +++ b/src/backend/A64/emitter/a64_emitter.h @@ -524,7 +524,7 @@ public: u8* GetWritableCodePtr(); // FixupBranch branching - void SetJumpTarget(FixupBranch const& branch); + void SetJumpTarget(FixupBranch const& branch, u8* target = nullptr); FixupBranch CBZ(ARM64Reg Rt); FixupBranch CBNZ(ARM64Reg Rt); FixupBranch B(CCFlags cond); diff --git a/src/backend/A64/exception_handler.h b/src/backend/A64/exception_handler.h new file mode 100644 index 00000000..04eb7d0c --- /dev/null +++ b/src/backend/A64/exception_handler.h @@ -0,0 +1,39 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include + +#include "backend/A64/a32_jitstate.h" +#include "common/common_types.h" + +namespace Dynarmic::BackendA64 { + +class BlockOfCode; + +struct A64State { + std::array X; + std::array, 16> Q; +}; +static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q)); + +class ExceptionHandler final { +public: + ExceptionHandler(); + ~ExceptionHandler(); + + void Register(BlockOfCode& code, std::function segv_callback = nullptr); + + bool SupportsFastmem() const; +private: + struct Impl; + std::unique_ptr impl; +}; + +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/exception_handler_generic.cpp b/src/backend/A64/exception_handler_generic.cpp index 2e066fa4..c5b17c07 100644 --- a/src/backend/A64/exception_handler_generic.cpp +++ b/src/backend/A64/exception_handler_generic.cpp @@ -4,18 +4,22 @@ * General Public License version 2 or any later version. */ -#include "backend/A64/block_of_code.h" +#include "backend/A64/exception_handler.h" namespace Dynarmic::BackendA64 { -struct BlockOfCode::ExceptionHandler::Impl final { +struct ExceptionHandler::Impl final { }; -BlockOfCode::ExceptionHandler::ExceptionHandler() = default; -BlockOfCode::ExceptionHandler::~ExceptionHandler() = default; +ExceptionHandler::ExceptionHandler() = default; +ExceptionHandler::~ExceptionHandler() = default; -void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) { +void ExceptionHandler::Register(BlockOfCode&, std::function) { // Do nothing } +bool ExceptionHandler::SupportsFastmem() const { + return false; +} + } // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/exception_handler_posix.cpp b/src/backend/A64/exception_handler_posix.cpp new file mode 100644 index 00000000..57f24045 --- /dev/null +++ b/src/backend/A64/exception_handler_posix.cpp @@ -0,0 +1,161 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2019 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include +#include + +#include "backend/A64/a32_jitstate.h" +#include "backend/A64/block_of_code.h" +#include "backend/A64/exception_handler.h" +#include "common/assert.h" +#include "common/cast_util.h" +#include "common/common_types.h" + +#include "jni.h" + +namespace Dynarmic::BackendA64 { + +namespace { + +struct CodeBlockInfo { + BlockOfCode* block; + std::function callback; +}; + +class SigHandler { +public: + SigHandler(); + + ~SigHandler(); + + void AddCodeBlock(CodeBlockInfo info); + + void RemoveCodeBlock(CodePtr PC); + +private: + auto FindCodeBlockInfo(CodePtr PC) { + return std::find_if(code_block_infos.begin(), code_block_infos.end(), + [&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize(); }); + } + + std::vector code_block_infos; + std::mutex code_block_infos_mutex; + + struct sigaction old_sa_segv; + struct sigaction old_sa_bus; + + static void SigAction(int sig, siginfo_t* info, void* raw_context); +}; + +SigHandler sig_handler; + +SigHandler::SigHandler() { + // Method below from dolphin. + + constexpr std::size_t signal_stack_size = + static_cast(std::max(SIGSTKSZ, 2 * 1024 * 1024)); + + stack_t signal_stack; + signal_stack.ss_sp = malloc(signal_stack_size); + signal_stack.ss_size = signal_stack_size; + signal_stack.ss_flags = 0; + ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0, + "dynarmic: POSIX SigHandler: init failure at sigaltstack"); + + struct sigaction sa; + sa.sa_handler = nullptr; + sa.sa_sigaction = &SigHandler::SigAction; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, &old_sa_segv); +} + +SigHandler::~SigHandler() { + // No cleanup required. +} + +void SigHandler::AddCodeBlock(CodeBlockInfo cb) { + std::lock_guard guard(code_block_infos_mutex); + ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end()); + code_block_infos.push_back(std::move(cb)); +} + +void SigHandler::RemoveCodeBlock(CodePtr PC) { + std::lock_guard guard(code_block_infos_mutex); + const auto iter = FindCodeBlockInfo(PC); + ASSERT(iter != code_block_infos.end()); + code_block_infos.erase(iter); +} + +void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { + ASSERT(sig == SIGSEGV || sig == SIGBUS); + + std::lock_guard guard(sig_handler.code_block_infos_mutex); + auto PC = reinterpret_cast(((ucontext_t*)raw_context)->uc_mcontext.pc); + const auto iter = sig_handler.FindCodeBlockInfo(PC); + if (iter != sig_handler.code_block_infos.end()) { + iter->callback(PC); + return; + } + + fmt::print( + stderr, + "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n", + PC); + + struct sigaction* retry_sa = + sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus; + if (retry_sa->sa_flags & SA_SIGINFO) { + retry_sa->sa_sigaction(sig, info, raw_context); + return; + } + if (retry_sa->sa_handler == SIG_DFL) { + signal(sig, SIG_DFL); + return; + } + if (retry_sa->sa_handler == SIG_IGN) { + return; + } + retry_sa->sa_handler(sig); +} + +} // anonymous namespace + +struct ExceptionHandler::Impl final { + Impl(BlockOfCode& code, std::function cb) { + code_begin = code.GetRegion(); + sig_handler.AddCodeBlock({&code, std::move(cb)}); + } + + ~Impl() { + sig_handler.RemoveCodeBlock(code_begin); + } + +private: + CodePtr code_begin; +}; + +ExceptionHandler::ExceptionHandler() = default; + +ExceptionHandler::~ExceptionHandler() = default; + +void ExceptionHandler::Register(BlockOfCode& code, std::function cb) { + if (cb) + impl = std::make_unique(code, std::move(cb)); +} + +bool ExceptionHandler::SupportsFastmem() const { + return static_cast(impl); +} + +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/hostloc.h b/src/backend/A64/hostloc.h index bf6e87b0..36a40f0d 100644 --- a/src/backend/A64/hostloc.h +++ b/src/backend/A64/hostloc.h @@ -130,22 +130,32 @@ inline size_t HostLocBitWidth(HostLoc loc) { using HostLocList = std::initializer_list; - // X18 may be reserved.(Windows and iOS) +// X27 contains an emulated memory relate pointer // X28 used for holding the JitState. // X30 is the link register. -const HostLocList any_gpr = { - HostLoc::X0, HostLoc::X1, HostLoc::X2, HostLoc::X3, HostLoc::X4, HostLoc::X5, HostLoc::X6, HostLoc::X7, - HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, HostLoc::X13, HostLoc::X14, HostLoc::X15, - HostLoc::X16, HostLoc::X17, HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, HostLoc::X24, - HostLoc::X25, HostLoc::X26, HostLoc::X27, // HostLoc::X29, +// In order of desireablity based first on ABI +constexpr HostLocList any_gpr = { + HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, + HostLoc::X24, HostLoc::X25, HostLoc::X26, + + HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, + HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17, + + HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3, + HostLoc::X2, HostLoc::X1, HostLoc::X0, }; -const HostLocList any_fpr = { - HostLoc::Q0, HostLoc::Q1, HostLoc::Q2, HostLoc::Q3, HostLoc::Q4, HostLoc::Q5, HostLoc::Q6, HostLoc::Q7, - HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13, HostLoc::Q14, HostLoc::Q15, - HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21, HostLoc::Q22, HostLoc::Q23, - HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27, HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31, +constexpr HostLocList any_fpr = { + HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13, + HostLoc::Q14, HostLoc::Q15, + + HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21, + HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27, + HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31, + + HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2, + HostLoc::Q1, HostLoc::Q0, }; Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc); @@ -156,9 +166,10 @@ size_t SpillToOpArg(HostLoc loc) { ASSERT(HostLocIsSpill(loc)); size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); - ASSERT_MSG(i < JitStateType::SpillCount, "Spill index greater than number of available spill locations"); + ASSERT_MSG(i < JitStateType::SpillCount, + "Spill index greater than number of available spill locations"); return JitStateType::GetSpillLocationOffsetFromIndex(i); } -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/reg_alloc.cpp b/src/backend/A64/reg_alloc.cpp index e6f606f9..36b7f5ef 100644 --- a/src/backend/A64/reg_alloc.cpp +++ b/src/backend/A64/reg_alloc.cpp @@ -435,7 +435,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { std::vector candidates = desired_locations; // Find all locations that have not been allocated.. - auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){ + const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){ return !this->LocInfo(loc).IsLocked(); }); candidates.erase(allocated_locs, candidates.end());