From 4b48391fd3e8d0a0efbc87603fb90354fa6d2404 Mon Sep 17 00:00:00 2001 From: SachinVin Date: Sat, 3 Aug 2019 10:32:20 +0530 Subject: [PATCH] backend/a64: Port block_of_code and emit_a64 --- src/backend/A64/block_of_code.cpp | 314 ++++++++++++++++++++++++++++++ src/backend/A64/block_of_code.h | 153 +++++++++++++++ src/backend/A64/emit_a64.cpp | 301 ++++++++++++++++++++++++++++ src/backend/A64/emit_a64.h | 125 ++++++++++++ 4 files changed, 893 insertions(+) create mode 100644 src/backend/A64/block_of_code.cpp create mode 100644 src/backend/A64/block_of_code.h create mode 100644 src/backend/A64/emit_a64.cpp create mode 100644 src/backend/A64/emit_a64.h diff --git a/src/backend/A64/block_of_code.cpp b/src/backend/A64/block_of_code.cpp new file mode 100644 index 00000000..8a8166bc --- /dev/null +++ b/src/backend/A64/block_of_code.cpp @@ -0,0 +1,314 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include +#include + +#include "backend/A64/a32_jitstate.h" +#include "backend/A64/abi.h" +#include "backend/A64/block_of_code.h" +#include "backend/A64/perf_map.h" +#include "common/assert.h" + +#ifdef _WIN32 + #include +#else + #include +#endif + +namespace Dynarmic::BackendA64 { + +const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1; + +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7; + +const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30; + +const std::array BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, + BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, + BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6, + BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8}; + +namespace { + +constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024; +constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024; +constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024; + +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT +void ProtectMemory(const void* base, size_t size, bool is_executable) { +#ifdef _WIN32 + DWORD oldProtect = 0; + VirtualProtect(const_cast(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect); +#else + static const size_t pageSize = sysconf(_SC_PAGESIZE); + const size_t iaddr = reinterpret_cast(base); + const size_t roundAddr = iaddr & ~(pageSize - static_cast(1)); + const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE); + mprotect(reinterpret_cast(roundAddr), size + (iaddr - roundAddr), mode); +#endif +} +#endif + +} // anonymous namespace + +BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi) + : fp_emitter(this) + , cb(std::move(cb)) + , jsi(jsi) + , constant_pool(*this, CONSTANT_POOL_SIZE) { + AllocCodeSpace(TOTAL_CODE_SIZE); + constant_pool.AllocatePool(); + EnableWriting(); + GenRunCode(); + exception_handler.Register(*this); +} + +void BlockOfCode::PreludeComplete() { + prelude_complete = true; + near_code_begin = GetCodePtr(); + far_code_begin = GetCodePtr() + FAR_CODE_OFFSET; + FlushIcache(); + ClearCache(); + DisableWriting(); +} + +void BlockOfCode::EnableWriting() { +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT + ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false); +#endif +} + +void BlockOfCode::DisableWriting() { +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT + ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true); +#endif +} + +void BlockOfCode::ClearCache() { + ASSERT(prelude_complete); + in_far_code = false; + near_code_ptr = near_code_begin; + far_code_ptr = far_code_begin; + SetCodePtr(near_code_begin); +} + +size_t BlockOfCode::SpaceRemaining() const { + ASSERT(prelude_complete); + // This function provides an underestimate of near-code-size but that's okay. + // (Why? The maximum size of near code should be measured from near_code_begin, not top_.) + // These are offsets from Xbyak::CodeArray::top_. + std::size_t far_code_offset, near_code_offset; + if (in_far_code) { + near_code_offset = static_cast(near_code_ptr) - static_cast(region); + far_code_offset = GetCodePtr() - static_cast(region); + } else { + near_code_offset = GetCodePtr() - static_cast(region); + far_code_offset = static_cast(far_code_ptr) - static_cast(region); + } + if (far_code_offset > TOTAL_CODE_SIZE) + return 0; + if (near_code_offset > FAR_CODE_OFFSET) + return 0; + return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset); +} + +void BlockOfCode::RunCode(void* jit_state) const { + run_code(jit_state); +} + +void BlockOfCode::RunCodeFrom(void* jit_state, CodePtr code_ptr) const { + run_code_from(jit_state, code_ptr); +} + +void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) { + size_t index = 0; + if (mxcsr_already_exited) + index |= MXCSR_ALREADY_EXITED; + B(return_from_run_code[index]); +} + +void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) { + size_t index = FORCE_RETURN; + if (mxcsr_already_exited) + index |= MXCSR_ALREADY_EXITED; //TODO: refactor to fpcr + B(return_from_run_code[index]); +} + +void BlockOfCode::GenRunCode() { + const u8* loop, *enter_mxcsr_then_loop; + + run_code_from = (RunCodeFromFuncType) const_cast(AlignCode16()); + + ABI_PushCalleeSaveRegistersAndAdjustStack(*this); + + MOV(Arm64Gen::X28, ABI_PARAM1); + MOV(Arm64Gen::X27, ABI_PARAM2); // temporarily in non-volatile register + + cb.GetTicksRemaining->EmitCall(*this); + + STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + + SwitchMxcsrOnEntry(); + BR(Arm64Gen::X27); + + run_code = (RunCodeFuncType) const_cast(AlignCode16()); + + // This serves two purposes: + // 1. It saves all the registers we as a callee need to save. + // 2. It aligns the stack so that the code the JIT emits can assume + // that the stack is appropriately aligned for CALLs. + ABI_PushCalleeSaveRegistersAndAdjustStack(*this); + + MOV(Arm64Gen::X28, ABI_PARAM1); + + cb.GetTicksRemaining->EmitCall(*this); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + + enter_mxcsr_then_loop = GetCodePtr(); + SwitchMxcsrOnEntry(); + loop = GetCodePtr(); + + cb.LookupBlock->EmitCall(*this); + BR(ABI_RETURN); + + // Return from run code variants + const auto emit_return_from_run_code = [this, &loop, &enter_mxcsr_then_loop](bool mxcsr_already_exited, bool force_return){ + if (!force_return) { + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + CMP(ABI_SCRATCH1, Arm64Gen::ZR); + B(CC_GT, mxcsr_already_exited ? enter_mxcsr_then_loop : loop); + } + + if (!mxcsr_already_exited) { + SwitchMxcsrOnExit(); + } + + cb.AddTicks->EmitCall(*this, [this](RegList param) { + LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + SUBS(param[0], param[0], ABI_SCRATCH1); + }); + + ABI_PopCalleeSaveRegistersAndAdjustStack(*this); + RET(); + }; + + return_from_run_code[0] = AlignCode16(); + emit_return_from_run_code(false, false); + + return_from_run_code[MXCSR_ALREADY_EXITED] = AlignCode16(); + emit_return_from_run_code(true, false); + + return_from_run_code[FORCE_RETURN] = AlignCode16(); + emit_return_from_run_code(false, true); + + return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16(); + emit_return_from_run_code(true, true); + + PerfMapRegister(run_code_from, GetCodePtr(), "dynarmic_dispatcher"); +} + +void BlockOfCode::SwitchMxcsrOnEntry() { + MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR); + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR); + _MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1); +} + +void BlockOfCode::SwitchMxcsrOnExit() { + MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR); + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR); + _MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1); +} + +void BlockOfCode::UpdateTicks() { + cb.AddTicks->EmitCall(*this, [this](RegList param) { + LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + SUBS(param[0], param[0], ABI_SCRATCH1); + }); + + cb.GetTicksRemaining->EmitCall(*this); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); + STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); +} + +void BlockOfCode::LookupBlock() { + cb.LookupBlock->EmitCall(*this); +} + +void* BlockOfCode::MConst(u64 lower, u64 upper) { + return constant_pool.GetConstant(lower, upper); +} + +void BlockOfCode::SwitchToFarCode() { + ASSERT(prelude_complete); + ASSERT(!in_far_code); + in_far_code = true; + near_code_ptr = GetCodePtr(); + SetCodePtr(far_code_ptr); + + ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!"); +} + +void BlockOfCode::SwitchToNearCode() { + ASSERT(prelude_complete); + ASSERT(in_far_code); + in_far_code = false; + far_code_ptr = GetCodePtr(); + SetCodePtr(near_code_ptr); +} + +CodePtr BlockOfCode::GetCodeBegin() const { + return near_code_begin; +} + +void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) { + ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG"); + + void* ret = const_cast(GetCodePtr()); + region_size += alloc_size; + SetCodePtr(GetCodePtr() + alloc_size); + memset(ret, 0, alloc_size); + return ret; +} + +void BlockOfCode::SetCodePtr(CodePtr code_ptr) { + u8* ptr = const_cast(reinterpret_cast(code_ptr)); + ARM64XEmitter::SetCodePtr(ptr); +} + +void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) { + size_t current_size = GetCodePtr() - reinterpret_cast(begin); + ASSERT(current_size <= size); + for (u32 i = 0; i < (size - current_size) / 4; i++) { + HINT(Arm64Gen::HINT_NOP); + } +} + +//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const { +//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION +// return cpu_info.has(type); +//#else +// (void)type; +// return false; +//#endif +//} + +} // namespace Dynarmic::BackendX64 diff --git a/src/backend/A64/block_of_code.h b/src/backend/A64/block_of_code.h new file mode 100644 index 00000000..7c48af7b --- /dev/null +++ b/src/backend/A64/block_of_code.h @@ -0,0 +1,153 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include + +#include "backend/A64/callback.h" +#include "backend/A64/constant_pool.h" +#include "backend/A64/jitstate_info.h" +#include "backend/A64/emitter/a64_emitter.h" +#include "common/common_types.h" + +namespace Dynarmic::BackendA64 { + +using CodePtr = const void*; + +struct RunCodeCallbacks { + std::unique_ptr LookupBlock; + std::unique_ptr AddTicks; + std::unique_ptr GetTicksRemaining; +}; + +class BlockOfCode final : public Arm64Gen::ARM64CodeBlock { +public: + BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi); + + /// Call when external emitters have finished emitting their preludes. + void PreludeComplete(); + + /// Change permissions to RW. This is required to support systems with W^X enforced. + void EnableWriting(); + /// Change permissions to RX. This is required to support systems with W^X enforced. + void DisableWriting(); + + /// Clears this block of code and resets code pointer to beginning. + void ClearCache(); + /// Calculates how much space is remaining to use. This is the minimum of near code and far code. + size_t SpaceRemaining() const; + + /// Runs emulated code. + void RunCode(void* jit_state) const; + /// Runs emulated code from code_ptr. + void RunCodeFrom(void* jit_state, CodePtr code_ptr) const; + /// Code emitter: Returns to dispatcher + void ReturnFromRunCode(bool mxcsr_already_exited = false); + /// Code emitter: Returns to dispatcher, forces return to host + void ForceReturnFromRunCode(bool mxcsr_already_exited = false); + /// Code emitter: Makes guest MXCSR the current MXCSR + void SwitchMxcsrOnEntry(); + /// Code emitter: Makes saved host MXCSR the current MXCSR + void SwitchMxcsrOnExit(); + /// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining + /// @note this clobbers ABI caller-save registers + void UpdateTicks(); + /// Code emitter: Performs a block lookup based on current state + /// @note this clobbers ABI caller-save registers + void LookupBlock(); + + void* MConst(u64 lower, u64 upper = 0); + + /// Far code sits far away from the near code. Execution remains primarily in near code. + /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary. + void SwitchToFarCode(); + void SwitchToNearCode(); + + CodePtr GetCodeBegin() const; + + const void* GetReturnFromRunCodeAddress() const { + return return_from_run_code[0]; + } + + const void* GetForceReturnFromRunCodeAddress() const { + return return_from_run_code[FORCE_RETURN]; + } + + /// Allocate memory of `size` bytes from the same block of memory the code is in. + /// This is useful for objects that need to be placed close to or within code. + /// The lifetime of this memory is the same as the code around it. + void* AllocateFromCodeSpace(size_t size); + + void SetCodePtr(CodePtr code_ptr); + void EnsurePatchLocationSize(CodePtr begin, size_t size); + + Arm64Gen::ARM64FloatEmitter fp_emitter; + + // ABI registers + + static const Arm64Gen::ARM64Reg ABI_RETURN; + static const Arm64Gen::ARM64Reg ABI_RETURN2; + static const Arm64Gen::ARM64Reg ABI_PARAM1; + static const Arm64Gen::ARM64Reg ABI_PARAM2; + static const Arm64Gen::ARM64Reg ABI_PARAM3; + static const Arm64Gen::ARM64Reg ABI_PARAM4; + static const Arm64Gen::ARM64Reg ABI_PARAM5; + static const Arm64Gen::ARM64Reg ABI_PARAM6; + static const Arm64Gen::ARM64Reg ABI_PARAM7; + static const Arm64Gen::ARM64Reg ABI_PARAM8; + + static const Arm64Gen::ARM64Reg ABI_SCRATCH1; + + static const std::array ABI_PARAMS; + + // bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const; + + JitStateInfo GetJitStateInfo() const { return jsi; } + +private: + RunCodeCallbacks cb; + JitStateInfo jsi; + + bool prelude_complete = false; + CodePtr near_code_begin; + CodePtr far_code_begin; + + ConstantPool constant_pool; + + bool in_far_code = false; + CodePtr near_code_ptr; + CodePtr far_code_ptr; + + using RunCodeFuncType = void(*)(void*); + using RunCodeFromFuncType = void(*)(void*, CodePtr); + RunCodeFuncType run_code = nullptr; + RunCodeFromFuncType run_code_from = nullptr; + static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0; + static constexpr size_t FORCE_RETURN = 1 << 1; + std::array return_from_run_code; + void GenRunCode(); + + + + class ExceptionHandler final { + public: + ExceptionHandler(); + ~ExceptionHandler(); + + void Register(BlockOfCode& code); + private: + struct Impl; + std::unique_ptr impl; + }; + ExceptionHandler exception_handler; + + //Xbyak::util::Cpu cpu_info; +}; + +} // namespace Dynarmic::BackendX64 diff --git a/src/backend/A64/emit_a64.cpp b/src/backend/A64/emit_a64.cpp new file mode 100644 index 00000000..27ecc812 --- /dev/null +++ b/src/backend/A64/emit_a64.cpp @@ -0,0 +1,301 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include + +#include "backend/A64/block_of_code.h" +#include "backend/A64/emit_a64.h" +#include "backend/A64/hostloc.h" +#include "backend/A64/perf_map.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/scope_exit.h" +#include "common/variant_util.h" +#include "frontend/ir/basic_block.h" +#include "frontend/ir/microinstruction.h" +#include "frontend/ir/opcodes.h" + +// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary. +// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart. + +namespace Dynarmic::BackendA64 { + +EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) + : reg_alloc(reg_alloc), block(block) {} + +void EmitContext::EraseInstruction(IR::Inst* inst) { + block.Instructions().erase(inst); + inst->ClearArgs(); +} + +EmitA64::EmitA64(BlockOfCode& code) + : code(code) {} + +EmitA64::~EmitA64() = default; + +std::optional EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const { + auto iter = block_descriptors.find(descriptor); + if (iter == block_descriptors.end()) + return std::nullopt; + return iter->second; +} + +void EmitA64::EmitVoid(EmitContext&, IR::Inst*) { +} + +void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) { + code.BRK(0); +} + +void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (!args[0].IsImmediate()) { + ctx.reg_alloc.DefineValue(inst, args[0]); + } +} + +void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) { + auto iter = block_descriptors.find(target); + CodePtr target_code_ptr = iter != block_descriptors.end() + ? iter->second.entrypoint + : code.GetReturnFromRunCodeAddress(); + + code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr); + + code.MOVI2R(loc_desc_reg, target.Value()); + + patch_information[target].mov_x0.emplace_back(code.GetCodePtr()); + EmitPatchMovX0(target_code_ptr); + + code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3}); + code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors); + code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs); + + code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1); + code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1); + code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr); +} + +void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[0].IsImmediate()); + u64 unique_hash_of_target = args[0].GetImmediateU64(); + + ctx.reg_alloc.ScratchGpr({HostLoc::X0}); + Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr(); + Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr(); + + PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target}); +} + +void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) { + ASSERT_MSG(false, "should never happen"); +} + +void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) { + ASSERT_MSG(false, "should never happen"); +} + +void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) { + ASSERT_MSG(false, "should never happen"); +} + +void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) { + ASSERT_MSG(false, "should never happen"); +} + +void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) { + ASSERT_MSG(false, "should never happen"); +} + +void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr(); + Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]); + code.CMP(value, ZR); + code.MRS(nzcv, FIELD_NZCV); + ctx.reg_alloc.DefineValue(inst, nzcv); +} + +void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (args[0].IsImmediate()) { + Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr()); + u32 value = 0; + value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0; + value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0; + value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0; + value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0; + code.MOVI2R(nzcv, value); + ctx.reg_alloc.DefineValue(inst, nzcv); + } else { + Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + // TODO: Optimize + code.LSR(nzcv, nzcv, 28); + code.MOVI2R(code.ABI_SCRATCH1, 0b00010000'10000001); + code.MUL(nzcv, nzcv, code.ABI_SCRATCH1); + code.ANDI2R(nzcv,nzcv, 1, code.ABI_SCRATCH1); + ctx.reg_alloc.DefineValue(inst, nzcv); + } +} + +void EmitA64::EmitAddCycles(size_t cycles) { + ASSERT(cycles < std::numeric_limits::max()); + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining); + code.SUBI2R(code.ABI_SCRATCH1, code.ABI_SCRATCH1, static_cast(cycles)); + code.STR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining); +} + +FixupBranch EmitA64::EmitCond(IR::Cond cond) { + FixupBranch label; + + const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1; + code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_CPSR_nzcv); + code._MSR(FIELD_NZCV, cpsr); + + switch (cond) { + case IR::Cond::EQ: //z + label = code.B(CC_EQ); + break; + case IR::Cond::NE: //!z + label = code.B(CC_NEQ); + break; + case IR::Cond::CS: //c + label = code.B(CC_CS); + break; + case IR::Cond::CC: //!c + label = code.B(CC_CC); + break; + case IR::Cond::MI: //n + label = code.B(CC_MI); + break; + case IR::Cond::PL: //!n + label = code.B(CC_PL); + break; + case IR::Cond::VS: //v + label = code.B(CC_VS); + break; + case IR::Cond::VC: //!v + label = code.B(CC_VC); + break; + case IR::Cond::HI: //c & !z + label = code.B(CC_HI); + break; + case IR::Cond::LS: //!c | z + label = code.B(CC_LS); + break; + case IR::Cond::GE: // n == v + label = code.B(CC_GE); + break; + case IR::Cond::LT: // n != v + label = code.B(CC_LT); + break; + case IR::Cond::GT: // !z & (n == v) + label = code.B(CC_GT); + break; + case IR::Cond::LE: // z | (n != v) + label = code.B(CC_LE); + break; + default: + ASSERT_MSG(false, "Unknown cond {}", static_cast(cond)); + break; + } + + return label; +} + +void EmitA64::EmitCondPrelude(const IR::Block& block) { + if (block.GetCondition() == IR::Cond::AL) { + ASSERT(!block.HasConditionFailedLocation()); + return; + } + + ASSERT(block.HasConditionFailedLocation()); + + FixupBranch pass = EmitCond(block.GetCondition()); + EmitAddCycles(block.ConditionFailedCycleCount()); + EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location()); + code.SetJumpTarget(pass); +} + +EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) { + PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor)); + Patch(descriptor, entrypoint); + BlockDescriptor block_desc{entrypoint, size}; + + block_descriptors.emplace(descriptor.Value(), block_desc); + return block_desc; +} + +void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) { + Common::VisitVariant(terminal, [this, &initial_location](auto x) { + using T = std::decay_t; + if constexpr (!std::is_same_v) { + this->EmitTerminalImpl(x, initial_location); + } else { + ASSERT_MSG(false, "Invalid terminal"); + } + }); +} + +void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) { + const CodePtr save_code_ptr = code.GetCodePtr(); + const PatchInformation& patch_info = patch_information[desc]; + + for (CodePtr location : patch_info.jg) { + code.SetCodePtr(location); + EmitPatchJg(desc, bb); + code.FlushIcache(); + } + + for (CodePtr location : patch_info.jmp) { + code.SetCodePtr(location); + EmitPatchJmp(desc, bb); + code.FlushIcache(); + } + + for (CodePtr location : patch_info.mov_x0) { + code.SetCodePtr(location); + EmitPatchMovX0(bb); + code.FlushIcache(); + } + + code.SetCodePtr(save_code_ptr); +} + +void EmitA64::Unpatch(const IR::LocationDescriptor& desc) { + Patch(desc, nullptr); +} + +void EmitA64::ClearCache() { + block_descriptors.clear(); + patch_information.clear(); + + PerfMapClear(); +} + +void EmitA64::InvalidateBasicBlocks(const std::unordered_set& locations) { + code.EnableWriting(); + SCOPE_EXIT { code.DisableWriting(); }; + + for (const auto &descriptor : locations) { + auto it = block_descriptors.find(descriptor); + if (it == block_descriptors.end()) { + continue; + } + + if (patch_information.count(descriptor)) { + Unpatch(descriptor); + } + block_descriptors.erase(it); + } +} + +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/emit_a64.h b/src/backend/A64/emit_a64.h new file mode 100644 index 00000000..85386e57 --- /dev/null +++ b/src/backend/A64/emit_a64.h @@ -0,0 +1,125 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "backend/A64/reg_alloc.h" +#include "backend/A64/emitter/a64_emitter.h" +#include "common/bit_util.h" +#include "common/fp/rounding_mode.h" +#include "frontend/ir/location_descriptor.h" +#include "frontend/ir/terminal.h" + +namespace Dynarmic::IR { +class Block; +class Inst; +} // namespace Dynarmic::IR + +namespace Dynarmic::BackendA64 { + +class BlockOfCode; + +using namespace Arm64Gen; + +using A64FullVectorWidth = std::integral_constant; + +// Array alias that always sizes itself according to the given type T +// relative to the size of a vector register. e.g. T = u32 would result +// in a std::array. +template +using VectorArray = std::array()>; + +struct EmitContext { + EmitContext(RegAlloc& reg_alloc, IR::Block& block); + + void EraseInstruction(IR::Inst* inst); + + virtual FP::RoundingMode FPSCR_RMode() const = 0; + virtual u32 FPCR() const = 0; + virtual bool FPSCR_FTZ() const = 0; + virtual bool FPSCR_DN() const = 0; + virtual bool AccurateNaN() const { return true; } + + RegAlloc& reg_alloc; + IR::Block& block; +}; + +class EmitA64 { +public: + struct BlockDescriptor { + CodePtr entrypoint; // Entrypoint of emitted code + size_t size; // Length in bytes of emitted code + }; + + EmitA64(BlockOfCode& code); + virtual ~EmitA64(); + + /// Looks up an emitted host block in the cache. + std::optional GetBasicBlock(IR::LocationDescriptor descriptor) const; + + /// Empties the entire cache. + virtual void ClearCache(); + + /// Invalidates a selection of basic blocks. + void InvalidateBasicBlocks(const std::unordered_set& locations); + +protected: + // Microinstruction emitters +#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); +#define A32OPC(...) +#define A64OPC(...) +#include "backend/A64/opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + + // Helpers + virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0; + void EmitAddCycles(size_t cycles); + FixupBranch EmitCond(IR::Cond cond); + void EmitCondPrelude(const IR::Block& block); + BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size); + void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target); + + // Terminal instruction emitters + void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location); + virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) = 0; + virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) = 0; + + // Patching + struct PatchInformation { + std::vector jg; + std::vector jmp; + std::vector mov_x0; + }; + void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr); + void Unpatch(const IR::LocationDescriptor& target_desc); + virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; + virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; + virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0; + + // State + BlockOfCode& code; + std::unordered_map block_descriptors; + std::unordered_map patch_information; +}; + +} // namespace Dynarmic::BackendX64