backend/a64: Port block_of_code and emit_a64

This commit is contained in:
SachinVin 2019-08-03 10:32:20 +05:30
parent 0708019057
commit 4b48391fd3
4 changed files with 893 additions and 0 deletions

View File

@ -0,0 +1,314 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include <cstring>
#include <limits>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Dynarmic::BackendA64 {
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
namespace {
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory(const void* base, size_t size, bool is_executable) {
#ifdef _WIN32
DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#else
static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif
}
#endif
} // anonymous namespace
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
: fp_emitter(this)
, cb(std::move(cb))
, jsi(jsi)
, constant_pool(*this, CONSTANT_POOL_SIZE) {
AllocCodeSpace(TOTAL_CODE_SIZE);
constant_pool.AllocatePool();
EnableWriting();
GenRunCode();
exception_handler.Register(*this);
}
void BlockOfCode::PreludeComplete() {
prelude_complete = true;
near_code_begin = GetCodePtr();
far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
FlushIcache();
ClearCache();
DisableWriting();
}
void BlockOfCode::EnableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
#endif
}
void BlockOfCode::DisableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
#endif
}
void BlockOfCode::ClearCache() {
ASSERT(prelude_complete);
in_far_code = false;
near_code_ptr = near_code_begin;
far_code_ptr = far_code_begin;
SetCodePtr(near_code_begin);
}
size_t BlockOfCode::SpaceRemaining() const {
ASSERT(prelude_complete);
// This function provides an underestimate of near-code-size but that's okay.
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
// These are offsets from Xbyak::CodeArray::top_.
std::size_t far_code_offset, near_code_offset;
if (in_far_code) {
near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
} else {
near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
}
if (far_code_offset > TOTAL_CODE_SIZE)
return 0;
if (near_code_offset > FAR_CODE_OFFSET)
return 0;
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
}
void BlockOfCode::RunCode(void* jit_state) const {
run_code(jit_state);
}
void BlockOfCode::RunCodeFrom(void* jit_state, CodePtr code_ptr) const {
run_code_from(jit_state, code_ptr);
}
void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
size_t index = 0;
if (mxcsr_already_exited)
index |= MXCSR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
size_t index = FORCE_RETURN;
if (mxcsr_already_exited)
index |= MXCSR_ALREADY_EXITED; //TODO: refactor to fpcr
B(return_from_run_code[index]);
}
void BlockOfCode::GenRunCode() {
const u8* loop, *enter_mxcsr_then_loop;
run_code_from = (RunCodeFromFuncType) const_cast<u8*>(AlignCode16());
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOV(Arm64Gen::X27, ABI_PARAM2); // temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
SwitchMxcsrOnEntry();
BR(Arm64Gen::X27);
run_code = (RunCodeFuncType) const_cast<u8*>(AlignCode16());
// This serves two purposes:
// 1. It saves all the registers we as a callee need to save.
// 2. It aligns the stack so that the code the JIT emits can assume
// that the stack is appropriately aligned for CALLs.
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
enter_mxcsr_then_loop = GetCodePtr();
SwitchMxcsrOnEntry();
loop = GetCodePtr();
cb.LookupBlock->EmitCall(*this);
BR(ABI_RETURN);
// Return from run code variants
const auto emit_return_from_run_code = [this, &loop, &enter_mxcsr_then_loop](bool mxcsr_already_exited, bool force_return){
if (!force_return) {
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
CMP(ABI_SCRATCH1, Arm64Gen::ZR);
B(CC_GT, mxcsr_already_exited ? enter_mxcsr_then_loop : loop);
}
if (!mxcsr_already_exited) {
SwitchMxcsrOnExit();
}
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
SUBS(param[0], param[0], ABI_SCRATCH1);
});
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
RET();
};
return_from_run_code[0] = AlignCode16();
emit_return_from_run_code(false, false);
return_from_run_code[MXCSR_ALREADY_EXITED] = AlignCode16();
emit_return_from_run_code(true, false);
return_from_run_code[FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(false, true);
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(true, true);
PerfMapRegister(run_code_from, GetCodePtr(), "dynarmic_dispatcher");
}
void BlockOfCode::SwitchMxcsrOnEntry() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
}
void BlockOfCode::SwitchMxcsrOnExit() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
}
void BlockOfCode::UpdateTicks() {
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
SUBS(param[0], param[0], ABI_SCRATCH1);
});
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
}
void BlockOfCode::LookupBlock() {
cb.LookupBlock->EmitCall(*this);
}
void* BlockOfCode::MConst(u64 lower, u64 upper) {
return constant_pool.GetConstant(lower, upper);
}
void BlockOfCode::SwitchToFarCode() {
ASSERT(prelude_complete);
ASSERT(!in_far_code);
in_far_code = true;
near_code_ptr = GetCodePtr();
SetCodePtr(far_code_ptr);
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
}
void BlockOfCode::SwitchToNearCode() {
ASSERT(prelude_complete);
ASSERT(in_far_code);
in_far_code = false;
far_code_ptr = GetCodePtr();
SetCodePtr(near_code_ptr);
}
CodePtr BlockOfCode::GetCodeBegin() const {
return near_code_begin;
}
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
void* ret = const_cast<u8*>(GetCodePtr());
region_size += alloc_size;
SetCodePtr(GetCodePtr() + alloc_size);
memset(ret, 0, alloc_size);
return ret;
}
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
ARM64XEmitter::SetCodePtr(ptr);
}
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
ASSERT(current_size <= size);
for (u32 i = 0; i < (size - current_size) / 4; i++) {
HINT(Arm64Gen::HINT_NOP);
}
}
//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
// return cpu_info.has(type);
//#else
// (void)type;
// return false;
//#endif
//}
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,153 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <type_traits>
#include "backend/A64/callback.h"
#include "backend/A64/constant_pool.h"
#include "backend/A64/jitstate_info.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using CodePtr = const void*;
struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
};
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
/// Call when external emitters have finished emitting their preludes.
void PreludeComplete();
/// Change permissions to RW. This is required to support systems with W^X enforced.
void EnableWriting();
/// Change permissions to RX. This is required to support systems with W^X enforced.
void DisableWriting();
/// Clears this block of code and resets code pointer to beginning.
void ClearCache();
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
size_t SpaceRemaining() const;
/// Runs emulated code.
void RunCode(void* jit_state) const;
/// Runs emulated code from code_ptr.
void RunCodeFrom(void* jit_state, CodePtr code_ptr) const;
/// Code emitter: Returns to dispatcher
void ReturnFromRunCode(bool mxcsr_already_exited = false);
/// Code emitter: Returns to dispatcher, forces return to host
void ForceReturnFromRunCode(bool mxcsr_already_exited = false);
/// Code emitter: Makes guest MXCSR the current MXCSR
void SwitchMxcsrOnEntry();
/// Code emitter: Makes saved host MXCSR the current MXCSR
void SwitchMxcsrOnExit();
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
/// @note this clobbers ABI caller-save registers
void UpdateTicks();
/// Code emitter: Performs a block lookup based on current state
/// @note this clobbers ABI caller-save registers
void LookupBlock();
void* MConst(u64 lower, u64 upper = 0);
/// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
void SwitchToFarCode();
void SwitchToNearCode();
CodePtr GetCodeBegin() const;
const void* GetReturnFromRunCodeAddress() const {
return return_from_run_code[0];
}
const void* GetForceReturnFromRunCodeAddress() const {
return return_from_run_code[FORCE_RETURN];
}
/// Allocate memory of `size` bytes from the same block of memory the code is in.
/// This is useful for objects that need to be placed close to or within code.
/// The lifetime of this memory is the same as the code around it.
void* AllocateFromCodeSpace(size_t size);
void SetCodePtr(CodePtr code_ptr);
void EnsurePatchLocationSize(CodePtr begin, size_t size);
Arm64Gen::ARM64FloatEmitter fp_emitter;
// ABI registers
static const Arm64Gen::ARM64Reg ABI_RETURN;
static const Arm64Gen::ARM64Reg ABI_RETURN2;
static const Arm64Gen::ARM64Reg ABI_PARAM1;
static const Arm64Gen::ARM64Reg ABI_PARAM2;
static const Arm64Gen::ARM64Reg ABI_PARAM3;
static const Arm64Gen::ARM64Reg ABI_PARAM4;
static const Arm64Gen::ARM64Reg ABI_PARAM5;
static const Arm64Gen::ARM64Reg ABI_PARAM6;
static const Arm64Gen::ARM64Reg ABI_PARAM7;
static const Arm64Gen::ARM64Reg ABI_PARAM8;
static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
// bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
JitStateInfo GetJitStateInfo() const { return jsi; }
private:
RunCodeCallbacks cb;
JitStateInfo jsi;
bool prelude_complete = false;
CodePtr near_code_begin;
CodePtr far_code_begin;
ConstantPool constant_pool;
bool in_far_code = false;
CodePtr near_code_ptr;
CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(void*);
using RunCodeFromFuncType = void(*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr;
RunCodeFromFuncType run_code_from = nullptr;
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
static constexpr size_t FORCE_RETURN = 1 << 1;
std::array<const void*, 4> return_from_run_code;
void GenRunCode();
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode& code);
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
ExceptionHandler exception_handler;
//Xbyak::util::Cpu cpu_info;
};
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,301 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_map>
#include <unordered_set>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/hostloc.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "common/variant_util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
namespace Dynarmic::BackendA64 {
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {}
void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst);
inst->ClearArgs();
}
EmitA64::EmitA64(BlockOfCode& code)
: code(code) {}
EmitA64::~EmitA64() = default;
std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
auto iter = block_descriptors.find(descriptor);
if (iter == block_descriptors.end())
return std::nullopt;
return iter->second;
}
void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
}
void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
code.BRK(0);
}
void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate()) {
ctx.reg_alloc.DefineValue(inst, args[0]);
}
}
void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
auto iter = block_descriptors.find(target);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code.GetReturnFromRunCodeAddress();
code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
code.MOVI2R(loc_desc_reg, target.Value());
patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
EmitPatchMovX0(target_code_ptr);
code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
}
void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr({HostLoc::X0});
Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
}
void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
code.CMP(value, ZR);
code.MRS(nzcv, FIELD_NZCV);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
u32 value = 0;
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code.MOVI2R(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
} else {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
// TODO: Optimize
code.LSR(nzcv, nzcv, 28);
code.MOVI2R(code.ABI_SCRATCH1, 0b00010000'10000001);
code.MUL(nzcv, nzcv, code.ABI_SCRATCH1);
code.ANDI2R(nzcv,nzcv, 1, code.ABI_SCRATCH1);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
}
void EmitA64::EmitAddCycles(size_t cycles) {
ASSERT(cycles < std::numeric_limits<u32>::max());
code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining);
code.SUBI2R(code.ABI_SCRATCH1, code.ABI_SCRATCH1, static_cast<u32>(cycles));
code.STR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining);
}
FixupBranch EmitA64::EmitCond(IR::Cond cond) {
FixupBranch label;
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_CPSR_nzcv);
code._MSR(FIELD_NZCV, cpsr);
switch (cond) {
case IR::Cond::EQ: //z
label = code.B(CC_EQ);
break;
case IR::Cond::NE: //!z
label = code.B(CC_NEQ);
break;
case IR::Cond::CS: //c
label = code.B(CC_CS);
break;
case IR::Cond::CC: //!c
label = code.B(CC_CC);
break;
case IR::Cond::MI: //n
label = code.B(CC_MI);
break;
case IR::Cond::PL: //!n
label = code.B(CC_PL);
break;
case IR::Cond::VS: //v
label = code.B(CC_VS);
break;
case IR::Cond::VC: //!v
label = code.B(CC_VC);
break;
case IR::Cond::HI: //c & !z
label = code.B(CC_HI);
break;
case IR::Cond::LS: //!c | z
label = code.B(CC_LS);
break;
case IR::Cond::GE: // n == v
label = code.B(CC_GE);
break;
case IR::Cond::LT: // n != v
label = code.B(CC_LT);
break;
case IR::Cond::GT: // !z & (n == v)
label = code.B(CC_GT);
break;
case IR::Cond::LE: // z | (n != v)
label = code.B(CC_LE);
break;
default:
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
break;
}
return label;
}
void EmitA64::EmitCondPrelude(const IR::Block& block) {
if (block.GetCondition() == IR::Cond::AL) {
ASSERT(!block.HasConditionFailedLocation());
return;
}
ASSERT(block.HasConditionFailedLocation());
FixupBranch pass = EmitCond(block.GetCondition());
EmitAddCycles(block.ConditionFailedCycleCount());
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
code.SetJumpTarget(pass);
}
EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.Value(), block_desc);
return block_desc;
}
void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location);
} else {
ASSERT_MSG(false, "Invalid terminal");
}
});
}
void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
const CodePtr save_code_ptr = code.GetCodePtr();
const PatchInformation& patch_info = patch_information[desc];
for (CodePtr location : patch_info.jg) {
code.SetCodePtr(location);
EmitPatchJg(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location);
EmitPatchJmp(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.mov_x0) {
code.SetCodePtr(location);
EmitPatchMovX0(bb);
code.FlushIcache();
}
code.SetCodePtr(save_code_ptr);
}
void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
Patch(desc, nullptr);
}
void EmitA64::ClearCache() {
block_descriptors.clear();
patch_information.clear();
PerfMapClear();
}
void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) {
auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) {
continue;
}
if (patch_information.count(descriptor)) {
Unpatch(descriptor);
}
block_descriptors.erase(it);
}
}
} // namespace Dynarmic::BackendA64

125
src/backend/A64/emit_a64.h Normal file
View File

@ -0,0 +1,125 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <optional>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "backend/A64/reg_alloc.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/bit_util.h"
#include "common/fp/rounding_mode.h"
#include "frontend/ir/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::IR {
class Block;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic::BackendA64 {
class BlockOfCode;
using namespace Arm64Gen;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template <typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
void EraseInstruction(IR::Inst* inst);
virtual FP::RoundingMode FPSCR_RMode() const = 0;
virtual u32 FPCR() const = 0;
virtual bool FPSCR_FTZ() const = 0;
virtual bool FPSCR_DN() const = 0;
virtual bool AccurateNaN() const { return true; }
RegAlloc& reg_alloc;
IR::Block& block;
};
class EmitA64 {
public:
struct BlockDescriptor {
CodePtr entrypoint; // Entrypoint of emitted code
size_t size; // Length in bytes of emitted code
};
EmitA64(BlockOfCode& code);
virtual ~EmitA64();
/// Looks up an emitted host block in the cache.
std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
/// Empties the entire cache.
virtual void ClearCache();
/// Invalidates a selection of basic blocks.
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
protected:
// Microinstruction emitters
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
#define A32OPC(...)
#define A64OPC(...)
#include "backend/A64/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
void EmitAddCycles(size_t cycles);
FixupBranch EmitCond(IR::Cond cond);
void EmitCondPrelude(const IR::Block& block);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
// Terminal instruction emitters
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location);
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) = 0;
// Patching
struct PatchInformation {
std::vector<CodePtr> jg;
std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_x0;
};
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
// State
BlockOfCode& code;
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
};
} // namespace Dynarmic::BackendX64