backend/a64: Port block_of_code and emit_a64
This commit is contained in:
parent
0708019057
commit
4b48391fd3
314
src/backend/A64/block_of_code.cpp
Normal file
314
src/backend/A64/block_of_code.cpp
Normal file
@ -0,0 +1,314 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
#include "backend/A64/a32_jitstate.h"
|
||||
#include "backend/A64/abi.h"
|
||||
#include "backend/A64/block_of_code.h"
|
||||
#include "backend/A64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace Dynarmic::BackendA64 {
|
||||
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
|
||||
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
|
||||
|
||||
const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
|
||||
|
||||
const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
|
||||
BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
|
||||
BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
|
||||
BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
|
||||
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
|
||||
constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||
void ProtectMemory(const void* base, size_t size, bool is_executable) {
|
||||
#ifdef _WIN32
|
||||
DWORD oldProtect = 0;
|
||||
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
|
||||
#else
|
||||
static const size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||
const size_t iaddr = reinterpret_cast<size_t>(base);
|
||||
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
|
||||
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
|
||||
: fp_emitter(this)
|
||||
, cb(std::move(cb))
|
||||
, jsi(jsi)
|
||||
, constant_pool(*this, CONSTANT_POOL_SIZE) {
|
||||
AllocCodeSpace(TOTAL_CODE_SIZE);
|
||||
constant_pool.AllocatePool();
|
||||
EnableWriting();
|
||||
GenRunCode();
|
||||
exception_handler.Register(*this);
|
||||
}
|
||||
|
||||
void BlockOfCode::PreludeComplete() {
|
||||
prelude_complete = true;
|
||||
near_code_begin = GetCodePtr();
|
||||
far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
|
||||
FlushIcache();
|
||||
ClearCache();
|
||||
DisableWriting();
|
||||
}
|
||||
|
||||
void BlockOfCode::EnableWriting() {
|
||||
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
|
||||
#endif
|
||||
}
|
||||
|
||||
void BlockOfCode::DisableWriting() {
|
||||
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
void BlockOfCode::ClearCache() {
|
||||
ASSERT(prelude_complete);
|
||||
in_far_code = false;
|
||||
near_code_ptr = near_code_begin;
|
||||
far_code_ptr = far_code_begin;
|
||||
SetCodePtr(near_code_begin);
|
||||
}
|
||||
|
||||
size_t BlockOfCode::SpaceRemaining() const {
|
||||
ASSERT(prelude_complete);
|
||||
// This function provides an underestimate of near-code-size but that's okay.
|
||||
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
|
||||
// These are offsets from Xbyak::CodeArray::top_.
|
||||
std::size_t far_code_offset, near_code_offset;
|
||||
if (in_far_code) {
|
||||
near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
|
||||
far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
|
||||
} else {
|
||||
near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
|
||||
far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
|
||||
}
|
||||
if (far_code_offset > TOTAL_CODE_SIZE)
|
||||
return 0;
|
||||
if (near_code_offset > FAR_CODE_OFFSET)
|
||||
return 0;
|
||||
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
|
||||
}
|
||||
|
||||
void BlockOfCode::RunCode(void* jit_state) const {
|
||||
run_code(jit_state);
|
||||
}
|
||||
|
||||
void BlockOfCode::RunCodeFrom(void* jit_state, CodePtr code_ptr) const {
|
||||
run_code_from(jit_state, code_ptr);
|
||||
}
|
||||
|
||||
void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
|
||||
size_t index = 0;
|
||||
if (mxcsr_already_exited)
|
||||
index |= MXCSR_ALREADY_EXITED;
|
||||
B(return_from_run_code[index]);
|
||||
}
|
||||
|
||||
void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
|
||||
size_t index = FORCE_RETURN;
|
||||
if (mxcsr_already_exited)
|
||||
index |= MXCSR_ALREADY_EXITED; //TODO: refactor to fpcr
|
||||
B(return_from_run_code[index]);
|
||||
}
|
||||
|
||||
void BlockOfCode::GenRunCode() {
|
||||
const u8* loop, *enter_mxcsr_then_loop;
|
||||
|
||||
run_code_from = (RunCodeFromFuncType) const_cast<u8*>(AlignCode16());
|
||||
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||
|
||||
MOV(Arm64Gen::X28, ABI_PARAM1);
|
||||
MOV(Arm64Gen::X27, ABI_PARAM2); // temporarily in non-volatile register
|
||||
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
BR(Arm64Gen::X27);
|
||||
|
||||
run_code = (RunCodeFuncType) const_cast<u8*>(AlignCode16());
|
||||
|
||||
// This serves two purposes:
|
||||
// 1. It saves all the registers we as a callee need to save.
|
||||
// 2. It aligns the stack so that the code the JIT emits can assume
|
||||
// that the stack is appropriately aligned for CALLs.
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||
|
||||
MOV(Arm64Gen::X28, ABI_PARAM1);
|
||||
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
|
||||
|
||||
enter_mxcsr_then_loop = GetCodePtr();
|
||||
SwitchMxcsrOnEntry();
|
||||
loop = GetCodePtr();
|
||||
|
||||
cb.LookupBlock->EmitCall(*this);
|
||||
BR(ABI_RETURN);
|
||||
|
||||
// Return from run code variants
|
||||
const auto emit_return_from_run_code = [this, &loop, &enter_mxcsr_then_loop](bool mxcsr_already_exited, bool force_return){
|
||||
if (!force_return) {
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
|
||||
CMP(ABI_SCRATCH1, Arm64Gen::ZR);
|
||||
B(CC_GT, mxcsr_already_exited ? enter_mxcsr_then_loop : loop);
|
||||
}
|
||||
|
||||
if (!mxcsr_already_exited) {
|
||||
SwitchMxcsrOnExit();
|
||||
}
|
||||
|
||||
cb.AddTicks->EmitCall(*this, [this](RegList param) {
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
|
||||
SUBS(param[0], param[0], ABI_SCRATCH1);
|
||||
});
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
|
||||
RET();
|
||||
};
|
||||
|
||||
return_from_run_code[0] = AlignCode16();
|
||||
emit_return_from_run_code(false, false);
|
||||
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED] = AlignCode16();
|
||||
emit_return_from_run_code(true, false);
|
||||
|
||||
return_from_run_code[FORCE_RETURN] = AlignCode16();
|
||||
emit_return_from_run_code(false, true);
|
||||
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
|
||||
emit_return_from_run_code(true, true);
|
||||
|
||||
PerfMapRegister(run_code_from, GetCodePtr(), "dynarmic_dispatcher");
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR);
|
||||
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnExit() {
|
||||
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR);
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
|
||||
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
|
||||
}
|
||||
|
||||
void BlockOfCode::UpdateTicks() {
|
||||
cb.AddTicks->EmitCall(*this, [this](RegList param) {
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
|
||||
SUBS(param[0], param[0], ABI_SCRATCH1);
|
||||
});
|
||||
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
|
||||
}
|
||||
|
||||
void BlockOfCode::LookupBlock() {
|
||||
cb.LookupBlock->EmitCall(*this);
|
||||
}
|
||||
|
||||
void* BlockOfCode::MConst(u64 lower, u64 upper) {
|
||||
return constant_pool.GetConstant(lower, upper);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchToFarCode() {
|
||||
ASSERT(prelude_complete);
|
||||
ASSERT(!in_far_code);
|
||||
in_far_code = true;
|
||||
near_code_ptr = GetCodePtr();
|
||||
SetCodePtr(far_code_ptr);
|
||||
|
||||
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchToNearCode() {
|
||||
ASSERT(prelude_complete);
|
||||
ASSERT(in_far_code);
|
||||
in_far_code = false;
|
||||
far_code_ptr = GetCodePtr();
|
||||
SetCodePtr(near_code_ptr);
|
||||
}
|
||||
|
||||
CodePtr BlockOfCode::GetCodeBegin() const {
|
||||
return near_code_begin;
|
||||
}
|
||||
|
||||
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
|
||||
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
|
||||
|
||||
void* ret = const_cast<u8*>(GetCodePtr());
|
||||
region_size += alloc_size;
|
||||
SetCodePtr(GetCodePtr() + alloc_size);
|
||||
memset(ret, 0, alloc_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
|
||||
u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
|
||||
ARM64XEmitter::SetCodePtr(ptr);
|
||||
}
|
||||
|
||||
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
|
||||
size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
|
||||
ASSERT(current_size <= size);
|
||||
for (u32 i = 0; i < (size - current_size) / 4; i++) {
|
||||
HINT(Arm64Gen::HINT_NOP);
|
||||
}
|
||||
}
|
||||
|
||||
//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
|
||||
//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
|
||||
// return cpu_info.has(type);
|
||||
//#else
|
||||
// (void)type;
|
||||
// return false;
|
||||
//#endif
|
||||
//}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
153
src/backend/A64/block_of_code.h
Normal file
153
src/backend/A64/block_of_code.h
Normal file
@ -0,0 +1,153 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include "backend/A64/callback.h"
|
||||
#include "backend/A64/constant_pool.h"
|
||||
#include "backend/A64/jitstate_info.h"
|
||||
#include "backend/A64/emitter/a64_emitter.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendA64 {
|
||||
|
||||
using CodePtr = const void*;
|
||||
|
||||
struct RunCodeCallbacks {
|
||||
std::unique_ptr<Callback> LookupBlock;
|
||||
std::unique_ptr<Callback> AddTicks;
|
||||
std::unique_ptr<Callback> GetTicksRemaining;
|
||||
};
|
||||
|
||||
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
|
||||
public:
|
||||
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
|
||||
|
||||
/// Call when external emitters have finished emitting their preludes.
|
||||
void PreludeComplete();
|
||||
|
||||
/// Change permissions to RW. This is required to support systems with W^X enforced.
|
||||
void EnableWriting();
|
||||
/// Change permissions to RX. This is required to support systems with W^X enforced.
|
||||
void DisableWriting();
|
||||
|
||||
/// Clears this block of code and resets code pointer to beginning.
|
||||
void ClearCache();
|
||||
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
|
||||
size_t SpaceRemaining() const;
|
||||
|
||||
/// Runs emulated code.
|
||||
void RunCode(void* jit_state) const;
|
||||
/// Runs emulated code from code_ptr.
|
||||
void RunCodeFrom(void* jit_state, CodePtr code_ptr) const;
|
||||
/// Code emitter: Returns to dispatcher
|
||||
void ReturnFromRunCode(bool mxcsr_already_exited = false);
|
||||
/// Code emitter: Returns to dispatcher, forces return to host
|
||||
void ForceReturnFromRunCode(bool mxcsr_already_exited = false);
|
||||
/// Code emitter: Makes guest MXCSR the current MXCSR
|
||||
void SwitchMxcsrOnEntry();
|
||||
/// Code emitter: Makes saved host MXCSR the current MXCSR
|
||||
void SwitchMxcsrOnExit();
|
||||
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
|
||||
/// @note this clobbers ABI caller-save registers
|
||||
void UpdateTicks();
|
||||
/// Code emitter: Performs a block lookup based on current state
|
||||
/// @note this clobbers ABI caller-save registers
|
||||
void LookupBlock();
|
||||
|
||||
void* MConst(u64 lower, u64 upper = 0);
|
||||
|
||||
/// Far code sits far away from the near code. Execution remains primarily in near code.
|
||||
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
|
||||
void SwitchToFarCode();
|
||||
void SwitchToNearCode();
|
||||
|
||||
CodePtr GetCodeBegin() const;
|
||||
|
||||
const void* GetReturnFromRunCodeAddress() const {
|
||||
return return_from_run_code[0];
|
||||
}
|
||||
|
||||
const void* GetForceReturnFromRunCodeAddress() const {
|
||||
return return_from_run_code[FORCE_RETURN];
|
||||
}
|
||||
|
||||
/// Allocate memory of `size` bytes from the same block of memory the code is in.
|
||||
/// This is useful for objects that need to be placed close to or within code.
|
||||
/// The lifetime of this memory is the same as the code around it.
|
||||
void* AllocateFromCodeSpace(size_t size);
|
||||
|
||||
void SetCodePtr(CodePtr code_ptr);
|
||||
void EnsurePatchLocationSize(CodePtr begin, size_t size);
|
||||
|
||||
Arm64Gen::ARM64FloatEmitter fp_emitter;
|
||||
|
||||
// ABI registers
|
||||
|
||||
static const Arm64Gen::ARM64Reg ABI_RETURN;
|
||||
static const Arm64Gen::ARM64Reg ABI_RETURN2;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM1;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM2;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM3;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM4;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM5;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM6;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM7;
|
||||
static const Arm64Gen::ARM64Reg ABI_PARAM8;
|
||||
|
||||
static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
|
||||
|
||||
static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
|
||||
|
||||
// bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
|
||||
|
||||
JitStateInfo GetJitStateInfo() const { return jsi; }
|
||||
|
||||
private:
|
||||
RunCodeCallbacks cb;
|
||||
JitStateInfo jsi;
|
||||
|
||||
bool prelude_complete = false;
|
||||
CodePtr near_code_begin;
|
||||
CodePtr far_code_begin;
|
||||
|
||||
ConstantPool constant_pool;
|
||||
|
||||
bool in_far_code = false;
|
||||
CodePtr near_code_ptr;
|
||||
CodePtr far_code_ptr;
|
||||
|
||||
using RunCodeFuncType = void(*)(void*);
|
||||
using RunCodeFromFuncType = void(*)(void*, CodePtr);
|
||||
RunCodeFuncType run_code = nullptr;
|
||||
RunCodeFromFuncType run_code_from = nullptr;
|
||||
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
|
||||
static constexpr size_t FORCE_RETURN = 1 << 1;
|
||||
std::array<const void*, 4> return_from_run_code;
|
||||
void GenRunCode();
|
||||
|
||||
|
||||
|
||||
class ExceptionHandler final {
|
||||
public:
|
||||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
void Register(BlockOfCode& code);
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
};
|
||||
ExceptionHandler exception_handler;
|
||||
|
||||
//Xbyak::util::Cpu cpu_info;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
301
src/backend/A64/emit_a64.cpp
Normal file
301
src/backend/A64/emit_a64.cpp
Normal file
@ -0,0 +1,301 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "backend/A64/block_of_code.h"
|
||||
#include "backend/A64/emit_a64.h"
|
||||
#include "backend/A64/hostloc.h"
|
||||
#include "backend/A64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/variant_util.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
|
||||
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
|
||||
|
||||
namespace Dynarmic::BackendA64 {
|
||||
|
||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||
: reg_alloc(reg_alloc), block(block) {}
|
||||
|
||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||
block.Instructions().erase(inst);
|
||||
inst->ClearArgs();
|
||||
}
|
||||
|
||||
EmitA64::EmitA64(BlockOfCode& code)
|
||||
: code(code) {}
|
||||
|
||||
EmitA64::~EmitA64() = default;
|
||||
|
||||
std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
|
||||
auto iter = block_descriptors.find(descriptor);
|
||||
if (iter == block_descriptors.end())
|
||||
return std::nullopt;
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
|
||||
}
|
||||
|
||||
void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
|
||||
code.BRK(0);
|
||||
}
|
||||
|
||||
void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (!args[0].IsImmediate()) {
|
||||
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
|
||||
auto iter = block_descriptors.find(target);
|
||||
CodePtr target_code_ptr = iter != block_descriptors.end()
|
||||
? iter->second.entrypoint
|
||||
: code.GetReturnFromRunCodeAddress();
|
||||
|
||||
code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
|
||||
|
||||
code.MOVI2R(loc_desc_reg, target.Value());
|
||||
|
||||
patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
|
||||
EmitPatchMovX0(target_code_ptr);
|
||||
|
||||
code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
|
||||
code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
|
||||
code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
|
||||
|
||||
code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
|
||||
code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
|
||||
code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
|
||||
}
|
||||
|
||||
void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[0].IsImmediate());
|
||||
u64 unique_hash_of_target = args[0].GetImmediateU64();
|
||||
|
||||
ctx.reg_alloc.ScratchGpr({HostLoc::X0});
|
||||
Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
|
||||
Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
|
||||
}
|
||||
|
||||
void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
|
||||
Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
|
||||
code.CMP(value, ZR);
|
||||
code.MRS(nzcv, FIELD_NZCV);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
}
|
||||
|
||||
void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||
u32 value = 0;
|
||||
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
|
||||
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
|
||||
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
|
||||
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
|
||||
code.MOVI2R(nzcv, value);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
} else {
|
||||
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
|
||||
// TODO: Optimize
|
||||
code.LSR(nzcv, nzcv, 28);
|
||||
code.MOVI2R(code.ABI_SCRATCH1, 0b00010000'10000001);
|
||||
code.MUL(nzcv, nzcv, code.ABI_SCRATCH1);
|
||||
code.ANDI2R(nzcv,nzcv, 1, code.ABI_SCRATCH1);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitA64::EmitAddCycles(size_t cycles) {
|
||||
ASSERT(cycles < std::numeric_limits<u32>::max());
|
||||
code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining);
|
||||
code.SUBI2R(code.ABI_SCRATCH1, code.ABI_SCRATCH1, static_cast<u32>(cycles));
|
||||
code.STR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining);
|
||||
}
|
||||
|
||||
FixupBranch EmitA64::EmitCond(IR::Cond cond) {
|
||||
FixupBranch label;
|
||||
|
||||
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
|
||||
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_CPSR_nzcv);
|
||||
code._MSR(FIELD_NZCV, cpsr);
|
||||
|
||||
switch (cond) {
|
||||
case IR::Cond::EQ: //z
|
||||
label = code.B(CC_EQ);
|
||||
break;
|
||||
case IR::Cond::NE: //!z
|
||||
label = code.B(CC_NEQ);
|
||||
break;
|
||||
case IR::Cond::CS: //c
|
||||
label = code.B(CC_CS);
|
||||
break;
|
||||
case IR::Cond::CC: //!c
|
||||
label = code.B(CC_CC);
|
||||
break;
|
||||
case IR::Cond::MI: //n
|
||||
label = code.B(CC_MI);
|
||||
break;
|
||||
case IR::Cond::PL: //!n
|
||||
label = code.B(CC_PL);
|
||||
break;
|
||||
case IR::Cond::VS: //v
|
||||
label = code.B(CC_VS);
|
||||
break;
|
||||
case IR::Cond::VC: //!v
|
||||
label = code.B(CC_VC);
|
||||
break;
|
||||
case IR::Cond::HI: //c & !z
|
||||
label = code.B(CC_HI);
|
||||
break;
|
||||
case IR::Cond::LS: //!c | z
|
||||
label = code.B(CC_LS);
|
||||
break;
|
||||
case IR::Cond::GE: // n == v
|
||||
label = code.B(CC_GE);
|
||||
break;
|
||||
case IR::Cond::LT: // n != v
|
||||
label = code.B(CC_LT);
|
||||
break;
|
||||
case IR::Cond::GT: // !z & (n == v)
|
||||
label = code.B(CC_GT);
|
||||
break;
|
||||
case IR::Cond::LE: // z | (n != v)
|
||||
label = code.B(CC_LE);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
|
||||
break;
|
||||
}
|
||||
|
||||
return label;
|
||||
}
|
||||
|
||||
void EmitA64::EmitCondPrelude(const IR::Block& block) {
|
||||
if (block.GetCondition() == IR::Cond::AL) {
|
||||
ASSERT(!block.HasConditionFailedLocation());
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(block.HasConditionFailedLocation());
|
||||
|
||||
FixupBranch pass = EmitCond(block.GetCondition());
|
||||
EmitAddCycles(block.ConditionFailedCycleCount());
|
||||
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
||||
code.SetJumpTarget(pass);
|
||||
}
|
||||
|
||||
EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
|
||||
PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
|
||||
Patch(descriptor, entrypoint);
|
||||
BlockDescriptor block_desc{entrypoint, size};
|
||||
|
||||
block_descriptors.emplace(descriptor.Value(), block_desc);
|
||||
return block_desc;
|
||||
}
|
||||
|
||||
void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
||||
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
|
||||
using T = std::decay_t<decltype(x)>;
|
||||
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
|
||||
this->EmitTerminalImpl(x, initial_location);
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid terminal");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
||||
const CodePtr save_code_ptr = code.GetCodePtr();
|
||||
const PatchInformation& patch_info = patch_information[desc];
|
||||
|
||||
for (CodePtr location : patch_info.jg) {
|
||||
code.SetCodePtr(location);
|
||||
EmitPatchJg(desc, bb);
|
||||
code.FlushIcache();
|
||||
}
|
||||
|
||||
for (CodePtr location : patch_info.jmp) {
|
||||
code.SetCodePtr(location);
|
||||
EmitPatchJmp(desc, bb);
|
||||
code.FlushIcache();
|
||||
}
|
||||
|
||||
for (CodePtr location : patch_info.mov_x0) {
|
||||
code.SetCodePtr(location);
|
||||
EmitPatchMovX0(bb);
|
||||
code.FlushIcache();
|
||||
}
|
||||
|
||||
code.SetCodePtr(save_code_ptr);
|
||||
}
|
||||
|
||||
void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
|
||||
Patch(desc, nullptr);
|
||||
}
|
||||
|
||||
void EmitA64::ClearCache() {
|
||||
block_descriptors.clear();
|
||||
patch_information.clear();
|
||||
|
||||
PerfMapClear();
|
||||
}
|
||||
|
||||
void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
|
||||
code.EnableWriting();
|
||||
SCOPE_EXIT { code.DisableWriting(); };
|
||||
|
||||
for (const auto &descriptor : locations) {
|
||||
auto it = block_descriptors.find(descriptor);
|
||||
if (it == block_descriptors.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (patch_information.count(descriptor)) {
|
||||
Unpatch(descriptor);
|
||||
}
|
||||
block_descriptors.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendA64
|
125
src/backend/A64/emit_a64.h
Normal file
125
src/backend/A64/emit_a64.h
Normal file
@ -0,0 +1,125 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/A64/reg_alloc.h"
|
||||
#include "backend/A64/emitter/a64_emitter.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/fp/rounding_mode.h"
|
||||
#include "frontend/ir/location_descriptor.h"
|
||||
#include "frontend/ir/terminal.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
class Inst;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::BackendA64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
|
||||
|
||||
// Array alias that always sizes itself according to the given type T
|
||||
// relative to the size of a vector register. e.g. T = u32 would result
|
||||
// in a std::array<u32, 4>.
|
||||
template <typename T>
|
||||
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
|
||||
|
||||
struct EmitContext {
|
||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||
|
||||
void EraseInstruction(IR::Inst* inst);
|
||||
|
||||
virtual FP::RoundingMode FPSCR_RMode() const = 0;
|
||||
virtual u32 FPCR() const = 0;
|
||||
virtual bool FPSCR_FTZ() const = 0;
|
||||
virtual bool FPSCR_DN() const = 0;
|
||||
virtual bool AccurateNaN() const { return true; }
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
};
|
||||
|
||||
class EmitA64 {
|
||||
public:
|
||||
struct BlockDescriptor {
|
||||
CodePtr entrypoint; // Entrypoint of emitted code
|
||||
size_t size; // Length in bytes of emitted code
|
||||
};
|
||||
|
||||
EmitA64(BlockOfCode& code);
|
||||
virtual ~EmitA64();
|
||||
|
||||
/// Looks up an emitted host block in the cache.
|
||||
std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
|
||||
|
||||
/// Empties the entire cache.
|
||||
virtual void ClearCache();
|
||||
|
||||
/// Invalidates a selection of basic blocks.
|
||||
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
|
||||
|
||||
protected:
|
||||
// Microinstruction emitters
|
||||
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
|
||||
#define A32OPC(...)
|
||||
#define A64OPC(...)
|
||||
#include "backend/A64/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Helpers
|
||||
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
|
||||
void EmitAddCycles(size_t cycles);
|
||||
FixupBranch EmitCond(IR::Cond cond);
|
||||
void EmitCondPrelude(const IR::Block& block);
|
||||
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
|
||||
void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location);
|
||||
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
|
||||
// Patching
|
||||
struct PatchInformation {
|
||||
std::vector<CodePtr> jg;
|
||||
std::vector<CodePtr> jmp;
|
||||
std::vector<CodePtr> mov_x0;
|
||||
};
|
||||
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
|
||||
void Unpatch(const IR::LocationDescriptor& target_desc);
|
||||
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
|
||||
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
|
||||
virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
|
||||
|
||||
// State
|
||||
BlockOfCode& code;
|
||||
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
|
||||
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
Loading…
x
Reference in New Issue
Block a user