backend/A64: add fastmem support

fix crash on game close

fix generic exception handler

reorder hostloc gpr list

use temp register instead of X0 for writes

go back to regular std::partition
This commit is contained in:
BreadFish64 2019-10-25 21:07:19 -05:00 committed by SachinVin
parent 45a758a6f2
commit b6733a089a
13 changed files with 541 additions and 146 deletions

View File

@ -373,7 +373,9 @@ elseif(ARCHITECTURE_Aarch64)
backend/A64/reg_alloc.cpp
backend/A64/reg_alloc.h
backend/A64/exception_handler_generic.cpp
backend/A64/exception_handler.h
#backend/A64/exception_handler_generic.cpp
backend/A64/exception_handler_posix.cpp
)
else()
message(FATAL_ERROR "Unsupported architecture")

View File

@ -4,6 +4,7 @@
* General Public License version 2 or any later version.
*/
#include <iterator>
#include <unordered_map>
#include <unordered_set>
#include <utility>
@ -76,12 +77,18 @@ bool A32EmitContext::FPSCR_DN() const {
return Location().FPSCR().DN();
}
std::ptrdiff_t A32EmitContext::GetInstOffset(IR::Inst* inst) const {
return std::distance(block.begin(), IR::Block::iterator(inst));
}
A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface)
: EmitA64(code), config(std::move(config)), jit_interface(jit_interface) {
exception_handler.Register(code, [this](CodePtr PC){FastmemCallback(PC);});
GenMemoryAccessors();
GenTerminalHandlers();
code.PreludeComplete();
ClearFastDispatchTable();
fastmem_patch_info.clear();
}
A32EmitA64::~A32EmitA64() = default;
@ -152,6 +159,7 @@ void A32EmitA64::ClearCache() {
EmitA64::ClearCache();
block_ranges.ClearCache();
ClearFastDispatchTable();
fastmem_patch_info.clear();
}
void A32EmitA64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
@ -781,137 +789,272 @@ void A32EmitA64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) {
code.STR(INDEX_UNSIGNED, address, X28, offsetof(A32JitState, exclusive_address));
}
template <typename T, T (A32::UserCallbacks::*raw_fn)(A32::VAddr)>
static void ReadMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) {
constexpr size_t bit_size = Common::BitSize<T>();
auto args = reg_alloc.GetArgumentInfo(inst);
if (!config.page_table) {
reg_alloc.HostCall(inst, {}, args[0]);
Devirtualize<raw_fn>(config.callbacks).EmitCall(code);
return;
}
reg_alloc.UseScratch(args[0], ABI_PARAM2);
Arm64Gen::ARM64Reg result = reg_alloc.ScratchGpr({ABI_RETURN});
Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr();
FixupBranch abort, end;
code.MOVP2R(result, config.page_table);
code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12});
code.LDR(result, result, ArithOption{page_index, true});
abort = code.CBZ(result);
code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095);
switch (bit_size) {
case 8:
code.LDRB(DecodeReg(result), result, ArithOption{ page_offset });
break;
case 16:
code.LDRH(DecodeReg(result), result, ArithOption{ page_offset });
break;
case 32:
code.LDR(DecodeReg(result), result, ArithOption{ page_offset });
break;
case 64:
code.LDR(result, result, ArithOption{ page_offset });
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
end = code.B();
code.SetJumpTarget(abort);
code.BL(wrapped_fn);
code.SetJumpTarget(end);
reg_alloc.DefineValue(inst, result);
A32EmitA64::DoNotFastmemMarker A32EmitA64::GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst) {
return std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst));
}
template <typename T, void (A32::UserCallbacks::*raw_fn)(A32::VAddr, T)>
static void WriteMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) {
constexpr size_t bit_size = Common::BitSize<T>();
auto args = reg_alloc.GetArgumentInfo(inst);
bool A32EmitA64::ShouldFastmem(const DoNotFastmemMarker& marker) const {
return config.fastmem_pointer && exception_handler.SupportsFastmem() && do_not_fastmem.count(marker) == 0;
}
if (!config.page_table) {
reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
Devirtualize<raw_fn>(config.callbacks).EmitCall(code);
void A32EmitA64::DoNotFastmem(const DoNotFastmemMarker& marker) {
do_not_fastmem.emplace(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
template <typename T>
void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) {
constexpr size_t bit_size = Common::BitSize<T>();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2);
ctx.reg_alloc.ScratchGpr({ABI_RETURN});
ARM64Reg result = ctx.reg_alloc.ScratchGpr();
ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
ARM64Reg tmp = code.ABI_RETURN;
const auto do_not_fastmem_marker = GenerateDoNotFastmemMarker(ctx, inst);
const auto page_table_lookup = [this, result, vaddr, tmp, callback_fn](FixupBranch& end) {
constexpr size_t bit_size = Common::BitSize<T>();
code.MOVP2R(result, config.page_table);
code.MOV(tmp, vaddr, ArithOption{vaddr, ST_LSR, 12});
code.LDR(result, result, ArithOption{tmp, true});
FixupBranch abort = code.CBZ(result);
code.ANDI2R(vaddr, vaddr, 4095);
switch (bit_size) {
case 8:
code.LDRB(DecodeReg(result), result, vaddr);
break;
case 16:
code.LDRH(DecodeReg(result), result, vaddr);
break;
case 32:
code.LDR(DecodeReg(result), result, vaddr);
break;
case 64:
code.LDR(result, result, vaddr);
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
end = code.B();
code.SetJumpTarget(abort);
code.BL(callback_fn);
code.MOV(result, code.ABI_RETURN);
};
if (ShouldFastmem(do_not_fastmem_marker)) {
const CodePtr patch_location = code.GetCodePtr();
switch (bit_size) {
case 8:
code.LDRB(DecodeReg(result), X27, vaddr);
break;
case 16:
code.LDRH(DecodeReg(result), X27, vaddr);
break;
case 32:
code.LDR(DecodeReg(result), X27, vaddr);
break;
case 64:
code.LDR(result, X27, vaddr);
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
code.EnsurePatchLocationSize(patch_location, 5);
fastmem_patch_info.emplace(
patch_location,
FastmemPatchInfo{
[this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker]{
CodePtr save_code_ptr = code.GetCodePtr();
code.SetCodePtr(patch_location);
FixupBranch thunk = code.B();
u8* end_ptr = code.GetWritableCodePtr();
code.EnsurePatchLocationSize(patch_location, 5);
code.FlushIcacheSection(reinterpret_cast<const u8*>(patch_location), code.GetCodePtr());
code.SetCodePtr(save_code_ptr);
code.SwitchToFarCode();
code.SetJumpTarget(thunk);
if (config.page_table) {
FixupBranch end{};
page_table_lookup(end);
code.SetJumpTarget(end, end_ptr);
} else {
code.BL(callback_fn);
code.MOV(result, code.ABI_RETURN);
}
code.B(end_ptr);
code.FlushIcache();
code.SwitchToNearCode();
DoNotFastmem(do_not_fastmem_marker);
}
});
ctx.reg_alloc.DefineValue(inst, result);
return;
}
reg_alloc.ScratchGpr({ABI_RETURN});
reg_alloc.UseScratch(args[0], ABI_PARAM2);
reg_alloc.UseScratch(args[1], ABI_PARAM3);
Arm64Gen::ARM64Reg addr = reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
Arm64Gen::ARM64Reg value = code.ABI_PARAM3;
Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr();
FixupBranch abort, end;
code.MOVI2R(addr, reinterpret_cast<u64>(config.page_table));
code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12});
code.LDR(addr, addr, ArithOption{ page_index, true });
abort = code.CBZ(addr);
code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095);
switch (bit_size) {
case 8:
code.STRB(DecodeReg(value), addr, ArithOption{ page_offset });
break;
case 16:
code.STRH(DecodeReg(value), addr, ArithOption{ page_offset });
break;
case 32:
code.STR(DecodeReg(value), addr, ArithOption{ page_offset });
break;
case 64:
code.STR(value, addr, ArithOption{ page_offset });
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
if (!config.page_table) {
code.BL(callback_fn);
code.MOV(result, code.ABI_RETURN);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
end = code.B();
code.SetJumpTarget(abort);
code.BL(wrapped_fn);
FixupBranch end{};
page_table_lookup(end);
code.SetJumpTarget(end);
ctx.reg_alloc.DefineValue(inst, result);
}
template<typename T>
void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) {
constexpr size_t bit_size = Common::BitSize<T>();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({ABI_RETURN});
ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2);
ctx.reg_alloc.UseScratch(args[1], ABI_PARAM3);
ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2);
ARM64Reg value = code.ABI_PARAM3;
ARM64Reg page_index = ctx.reg_alloc.ScratchGpr();
ARM64Reg addr = ctx.reg_alloc.ScratchGpr();
const auto do_not_fastmem_marker = GenerateDoNotFastmemMarker(ctx, inst);
const auto page_table_lookup = [this, vaddr, value, page_index, addr, callback_fn](FixupBranch& end) {
constexpr size_t bit_size = Common::BitSize<T>();
code.MOVP2R(addr, config.page_table);
code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12});
code.LDR(addr, addr, ArithOption{page_index, true});
FixupBranch abort = code.CBZ(addr);
code.ANDI2R(vaddr, vaddr, 4095);
switch (bit_size) {
case 8:
code.STRB(DecodeReg(value), addr, vaddr);
break;
case 16:
code.STRH(DecodeReg(value), addr, vaddr);
break;
case 32:
code.STR(DecodeReg(value), addr, vaddr);;
break;
case 64:
code.STR(value, addr, vaddr);
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
end = code.B();
code.SetJumpTarget(abort);
code.BL(callback_fn);
};
if (ShouldFastmem(do_not_fastmem_marker)) {
const CodePtr patch_location = code.GetCodePtr();
switch (bit_size) {
case 8:
code.STRB(DecodeReg(value), X27, vaddr);
break;
case 16:
code.STRH(DecodeReg(value), X27, vaddr);
break;
case 32:
code.STR(DecodeReg(value), X27, vaddr);
break;
case 64:
code.STR(value, X27, vaddr);
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
code.EnsurePatchLocationSize(patch_location, 5);
fastmem_patch_info.emplace(
patch_location,
FastmemPatchInfo{
[this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker]{
CodePtr save_code_ptr = code.GetCodePtr();
code.SetCodePtr(patch_location);
FixupBranch thunk = code.B();
u8* end_ptr = code.GetWritableCodePtr();
code.EnsurePatchLocationSize(patch_location, 5);
code.FlushIcacheSection(reinterpret_cast<const u8*>(patch_location), code.GetCodePtr());
code.SetCodePtr(save_code_ptr);
code.SwitchToFarCode();
code.SetJumpTarget(thunk);
if (config.page_table) {
FixupBranch end{};
page_table_lookup(end);
code.SetJumpTarget(end, end_ptr);
} else {
code.BL(callback_fn);
}
code.B(end_ptr);
code.FlushIcache();
code.SwitchToNearCode();
DoNotFastmem(do_not_fastmem_marker);
}
});
return;
}
if (!config.page_table) {
code.BL(callback_fn);
return;
}
FixupBranch end{};
page_table_lookup(end);
code.SetJumpTarget(end);
}
void A32EmitA64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
ReadMemory<u8, &A32::UserCallbacks::MemoryRead8>(code, ctx.reg_alloc, inst, config, read_memory_8);
ReadMemory<u8>(ctx, inst, read_memory_8);
}
void A32EmitA64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) {
ReadMemory<u16, &A32::UserCallbacks::MemoryRead16>(code, ctx.reg_alloc, inst, config, read_memory_16);
ReadMemory<u16>(ctx, inst, read_memory_16);
}
void A32EmitA64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) {
ReadMemory<u32, &A32::UserCallbacks::MemoryRead32>(code, ctx.reg_alloc, inst, config, read_memory_32);
ReadMemory<u32>(ctx, inst, read_memory_32);
}
void A32EmitA64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) {
ReadMemory<u64, &A32::UserCallbacks::MemoryRead64>(code, ctx.reg_alloc, inst, config, read_memory_64);
ReadMemory<u64>(ctx, inst, read_memory_64);
}
void A32EmitA64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<u8, &A32::UserCallbacks::MemoryWrite8>(code, ctx.reg_alloc, inst, config, write_memory_8);
WriteMemory<u8>(ctx, inst, write_memory_8);
}
void A32EmitA64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<u16, &A32::UserCallbacks::MemoryWrite16>(code, ctx.reg_alloc, inst, config, write_memory_16);
WriteMemory<u16>(ctx, inst, write_memory_16);
}
void A32EmitA64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<u32, &A32::UserCallbacks::MemoryWrite32>(code, ctx.reg_alloc, inst, config, write_memory_32);
WriteMemory<u32>(ctx, inst, write_memory_32);
}
void A32EmitA64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<u64, &A32::UserCallbacks::MemoryWrite64>(code, ctx.reg_alloc, inst, config, write_memory_64);
WriteMemory<u64>(ctx, inst, write_memory_64);
}
template <typename T, void (A32::UserCallbacks::*fn)(A32::VAddr, T)>
@ -1241,6 +1384,13 @@ std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescr
descriptor.FPSCR().Value());
}
void A32EmitA64::FastmemCallback(CodePtr PC) {
const auto iter = fastmem_patch_info.find(PC);
ASSERT(iter != fastmem_patch_info.end());
iter->second.callback();
fastmem_patch_info.erase(iter);
}
void A32EmitA64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented");
ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented");

View File

@ -7,12 +7,16 @@
#pragma once
#include <array>
#include <functional>
#include <optional>
#include <set>
#include <tuple>
#include <unordered_map>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_range_information.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/exception_handler.h"
#include "dynarmic/A32/a32.h"
#include "dynarmic/A32/config.h"
#include "frontend/A32/location_descriptor.h"
@ -20,6 +24,7 @@
namespace Dynarmic::BackendA64 {
struct A64State;
class RegAlloc;
struct A32EmitContext final : public EmitContext {
@ -29,6 +34,7 @@ struct A32EmitContext final : public EmitContext {
u32 FPCR() const override;
bool FPSCR_FTZ() const override;
bool FPSCR_DN() const override;
std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
};
class A32EmitA64 final : public EmitA64 {
@ -46,10 +52,13 @@ public:
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
void FastmemCallback(CodePtr PC);
protected:
const A32::UserConfig config;
A32::Jit* jit_interface;
BlockRangeInformation<u32> block_ranges;
ExceptionHandler exception_handler;
struct FastDispatchEntry {
u64 location_descriptor;
@ -61,6 +70,12 @@ protected:
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable();
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
std::set<DoNotFastmemMarker> do_not_fastmem;
DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
void DoNotFastmem(const DoNotFastmemMarker& marker);
bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
const void* read_memory_8;
const void* read_memory_16;
const void* read_memory_32;
@ -70,6 +85,10 @@ protected:
const void* write_memory_32;
const void* write_memory_64;
void GenMemoryAccessors();
template<typename T>
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
template<typename T>
void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr;
@ -87,6 +106,12 @@ protected:
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Fastmem
struct FastmemPatchInfo {
std::function<void()> callback;
};
std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
// Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;

View File

@ -31,17 +31,18 @@ namespace Dynarmic::A32 {
using namespace BackendA64;
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
reinterpret_cast<u64>(config.fastmem_pointer),
};
}
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig config)
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
: block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlock, this), JitStateInfo{jit_state})
, emitter(block_of_code, config, jit)
, config(std::move(config))
, jit_interface(jit)

View File

@ -71,7 +71,6 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
AllocCodeSpace(TOTAL_CODE_SIZE);
EnableWriting();
GenRunCode();
exception_handler.Register(*this);
}
void BlockOfCode::PreludeComplete() {
@ -155,7 +154,8 @@ void BlockOfCode::GenRunCode() {
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOV(Arm64Gen::X27, ABI_PARAM2); // temporarily in non-volatile register
MOVI2R(Arm64Gen::X27, cb.value_in_X27);
MOV(Arm64Gen::X26, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
@ -163,7 +163,7 @@ void BlockOfCode::GenRunCode() {
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining);
SwitchFpscrOnEntry();
BR(Arm64Gen::X27);
BR(Arm64Gen::X26);
AlignCode16();
run_code = (RunCodeFuncType) GetWritableCodePtr();
@ -175,6 +175,7 @@ void BlockOfCode::GenRunCode() {
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X27, cb.value_in_X27);
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
@ -291,6 +292,14 @@ CodePtr BlockOfCode::GetCodeBegin() const {
return near_code_begin;
}
u8* BlockOfCode::GetRegion() const {
return region;
}
std::size_t BlockOfCode::GetRegionSize() const {
return total_region_size;
};
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
@ -323,4 +332,4 @@ void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
//#endif
//}
} // namespace Dynarmic::BackendX64
} // namespace Dynarmic::BackendA64

View File

@ -24,11 +24,14 @@ struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
u64 value_in_X27;
};
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
BlockOfCode(const BlockOfCode&) = delete;
/// Call when external emitters have finished emitting their preludes.
void PreludeComplete();
@ -74,6 +77,8 @@ public:
void SwitchToNearCode();
CodePtr GetCodeBegin() const;
u8* GetRegion() const;
std::size_t GetRegionSize() const;
const void* GetReturnFromRunCodeAddress() const {
return return_from_run_code[0];
@ -137,20 +142,6 @@ private:
std::array<const void*, 4> return_from_run_code;
void GenRunCode();
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode& code);
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
ExceptionHandler exception_handler;
//Xbyak::util::Cpu cpu_info;
};

View File

@ -857,10 +857,12 @@ void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64
}
// FixupBranch branching
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) {
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) {
if(!target)
target = m_code;
bool Not = false;
u32 inst = 0;
s64 distance = static_cast<s64>(m_code - branch.ptr);
s64 distance = static_cast<s64>(target - branch.ptr);
distance >>= 2;
switch (branch.type) {
@ -891,7 +893,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) {
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) |
((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
} break;
case 5: // B (uncoditional)
case 5: // B (unconditional)
ASSERT_MSG(IsInRangeImm26(distance), "%s(%d): Received too large distance: %" PRIx64,
__func__, branch.type, distance);
inst = (0x5 << 26) | MaskImm26(distance);

View File

@ -524,7 +524,7 @@ public:
u8* GetWritableCodePtr();
// FixupBranch branching
void SetJumpTarget(FixupBranch const& branch);
void SetJumpTarget(FixupBranch const& branch, u8* target = nullptr);
FixupBranch CBZ(ARM64Reg Rt);
FixupBranch CBNZ(ARM64Reg Rt);
FixupBranch B(CCFlags cond);

View File

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <functional>
#include "backend/A64/a32_jitstate.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
struct A64State {
std::array<u64, 32> X;
std::array<std::array<u64, 2>, 16> Q;
};
static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
bool SupportsFastmem() const;
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
} // namespace Dynarmic::BackendA64

View File

@ -4,18 +4,22 @@
* General Public License version 2 or any later version.
*/
#include "backend/A64/block_of_code.h"
#include "backend/A64/exception_handler.h"
namespace Dynarmic::BackendA64 {
struct BlockOfCode::ExceptionHandler::Impl final {
struct ExceptionHandler::Impl final {
};
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) {
void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
// Do nothing
}
bool ExceptionHandler::SupportsFastmem() const {
return false;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,161 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <mutex>
#include <vector>
#include <csignal>
#include <ucontext.h>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/exception_handler.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
#include "jni.h"
namespace Dynarmic::BackendA64 {
namespace {
struct CodeBlockInfo {
BlockOfCode* block;
std::function<void(CodePtr)> callback;
};
class SigHandler {
public:
SigHandler();
~SigHandler();
void AddCodeBlock(CodeBlockInfo info);
void RemoveCodeBlock(CodePtr PC);
private:
auto FindCodeBlockInfo(CodePtr PC) {
return std::find_if(code_block_infos.begin(), code_block_infos.end(),
[&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize(); });
}
std::vector<CodeBlockInfo> code_block_infos;
std::mutex code_block_infos_mutex;
struct sigaction old_sa_segv;
struct sigaction old_sa_bus;
static void SigAction(int sig, siginfo_t* info, void* raw_context);
};
SigHandler sig_handler;
SigHandler::SigHandler() {
// Method below from dolphin.
constexpr std::size_t signal_stack_size =
static_cast<std::size_t>(std::max(SIGSTKSZ, 2 * 1024 * 1024));
stack_t signal_stack;
signal_stack.ss_sp = malloc(signal_stack_size);
signal_stack.ss_size = signal_stack_size;
signal_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
"dynarmic: POSIX SigHandler: init failure at sigaltstack");
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigHandler::SigAction;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &old_sa_segv);
}
SigHandler::~SigHandler() {
// No cleanup required.
}
void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
code_block_infos.push_back(std::move(cb));
}
void SigHandler::RemoveCodeBlock(CodePtr PC) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(PC);
ASSERT(iter != code_block_infos.end());
code_block_infos.erase(iter);
}
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS);
std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
const auto iter = sig_handler.FindCodeBlockInfo(PC);
if (iter != sig_handler.code_block_infos.end()) {
iter->callback(PC);
return;
}
fmt::print(
stderr,
"dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
PC);
struct sigaction* retry_sa =
sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context);
return;
}
if (retry_sa->sa_handler == SIG_DFL) {
signal(sig, SIG_DFL);
return;
}
if (retry_sa->sa_handler == SIG_IGN) {
return;
}
retry_sa->sa_handler(sig);
}
} // anonymous namespace
struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
code_begin = code.GetRegion();
sig_handler.AddCodeBlock({&code, std::move(cb)});
}
~Impl() {
sig_handler.RemoveCodeBlock(code_begin);
}
private:
CodePtr code_begin;
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
if (cb)
impl = std::make_unique<Impl>(code, std::move(cb));
}
bool ExceptionHandler::SupportsFastmem() const {
return static_cast<bool>(impl);
}
} // namespace Dynarmic::BackendA64

View File

@ -130,22 +130,32 @@ inline size_t HostLocBitWidth(HostLoc loc) {
using HostLocList = std::initializer_list<HostLoc>;
// X18 may be reserved.(Windows and iOS)
// X27 contains an emulated memory relate pointer
// X28 used for holding the JitState.
// X30 is the link register.
const HostLocList any_gpr = {
HostLoc::X0, HostLoc::X1, HostLoc::X2, HostLoc::X3, HostLoc::X4, HostLoc::X5, HostLoc::X6, HostLoc::X7,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, HostLoc::X13, HostLoc::X14, HostLoc::X15,
HostLoc::X16, HostLoc::X17, HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, HostLoc::X24,
HostLoc::X25, HostLoc::X26, HostLoc::X27, // HostLoc::X29,
// In order of desireablity based first on ABI
constexpr HostLocList any_gpr = {
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
HostLoc::X24, HostLoc::X25, HostLoc::X26,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12,
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3,
HostLoc::X2, HostLoc::X1, HostLoc::X0,
};
const HostLocList any_fpr = {
HostLoc::Q0, HostLoc::Q1, HostLoc::Q2, HostLoc::Q3, HostLoc::Q4, HostLoc::Q5, HostLoc::Q6, HostLoc::Q7,
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13, HostLoc::Q14, HostLoc::Q15,
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21, HostLoc::Q22, HostLoc::Q23,
HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27, HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
constexpr HostLocList any_fpr = {
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
HostLoc::Q14, HostLoc::Q15,
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2,
HostLoc::Q1, HostLoc::Q0,
};
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
@ -156,9 +166,10 @@ size_t SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < JitStateType::SpillCount, "Spill index greater than number of available spill locations");
ASSERT_MSG(i < JitStateType::SpillCount,
"Spill index greater than number of available spill locations");
return JitStateType::GetSpillLocationOffsetFromIndex(i);
}
} // namespace Dynarmic::BackendX64
} // namespace Dynarmic::BackendA64

View File

@ -435,7 +435,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
std::vector<HostLoc> candidates = desired_locations;
// Find all locations that have not been allocated..
auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return !this->LocInfo(loc).IsLocked();
});
candidates.erase(allocated_locs, candidates.end());