Debt: backport A64 backend

enable W^X on Apple silicon
This commit is contained in:
SachinVin 2022-05-22 23:26:14 +05:30
parent 97edb626c7
commit df9d373a84
40 changed files with 13387 additions and 53 deletions

View File

@ -8,9 +8,25 @@ if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(MASTER_PROJECT ON)
endif()
# Add the module directory to the list of paths
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
# Arch detection
include(DetectArchitecture)
if (NOT DEFINED ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
set(REQUIRES_NO_EXECUTE_SUPPORT OFF)
# Apple Silicon chips require W^X
if(APPLE AND ARCHITECTURE STREQUAL "arm64")
set(REQUIRES_NO_EXECUTE_SUPPORT ON)
endif()
# Dynarmic project options
option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON)
option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF)
option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" ${REQUIRES_NO_EXECUTE_SUPPORT})
option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF)
option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF)
option(DYNARMIC_TESTS "Build tests" ${MASTER_PROJECT})
@ -39,9 +55,6 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(SEND_ERROR "In-source builds are not allowed.")
endif()
# Add the module directory to the list of paths
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
# Compiler flags
if (MSVC)
set(DYNARMIC_CXX_FLAGS
@ -105,13 +118,6 @@ else()
endif()
endif()
# Arch detection
include(DetectArchitecture)
if (NOT DEFINED ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
# Include Boost
if (NOT TARGET boost)
if (NOT Boost_INCLUDE_DIRS)

View File

@ -365,55 +365,66 @@ if (ARCHITECTURE STREQUAL "x86_64")
else()
target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
endif()
elseif(ARCHITECTURE STREQUAL "arm64")
target_link_libraries(dynarmic PRIVATE $<BUILD_INTERFACE:merry::oaknut>)
target_sources(dynarmic PRIVATE
backend/arm64/a32_jitstate.cpp
backend/arm64/a32_jitstate.h
backend/arm64/abi.cpp
backend/arm64/abi.h
backend/arm64/devirtualize.h
backend/arm64/emit_arm64.cpp
backend/arm64/emit_arm64.h
backend/arm64/emit_arm64_a32.cpp
backend/arm64/emit_arm64_a32_coprocessor.cpp
backend/arm64/emit_arm64_a32_memory.cpp
backend/arm64/emit_arm64_a64.cpp
backend/arm64/emit_arm64_a64_memory.cpp
backend/arm64/emit_arm64_cryptography.cpp
backend/arm64/emit_arm64_data_processing.cpp
backend/arm64/emit_arm64_floating_point.cpp
backend/arm64/emit_arm64_packed.cpp
backend/arm64/emit_arm64_saturation.cpp
backend/arm64/emit_arm64_vector.cpp
backend/arm64/emit_arm64_vector_floating_point.cpp
backend/arm64/emit_arm64_vector_saturation.cpp
backend/arm64/emit_context.h
backend/arm64/exclusive_monitor.cpp
backend/arm64/fpsr_manager.cpp
backend/arm64/fpsr_manager.h
backend/arm64/reg_alloc.cpp
backend/arm64/reg_alloc.h
backend/arm64/stack_layout.h
common/spin_lock_arm64.cpp
common/spin_lock_arm64.h
backend/A64/emitter/a64_emitter.cpp
backend/A64/emitter/a64_emitter.h
backend/A64/emitter/arm_common.h
backend/A64/emitter/code_block.h
# backend/A64/a64_emit_a64.cpp
# backend/A64/a64_emit_a64.h
# backend/A64/a64_exclusive_monitor.cpp
# backend/A64/a64_interface.cpp
# backend/A64/a64_jitstate.cpp
# backend/A64/a64_jitstate.h
backend/A64/abi.cpp
backend/A64/abi.h
backend/A64/block_of_code.cpp
backend/A64/block_of_code.h
backend/A64/block_range_information.cpp
backend/A64/block_range_information.h
backend/A64/callback.cpp
backend/A64/callback.h
backend/A64/constant_pool.cpp
backend/A64/constant_pool.h
backend/A64/devirtualize.h
backend/A64/emit_a64.cpp
backend/A64/emit_a64.h
# backend/A64/emit_a64_aes.cpp
# backend/A64/emit_a64_crc32.cpp
backend/A64/emit_a64_data_processing.cpp
backend/A64/emit_a64_floating_point.cpp
backend/A64/emit_a64_packed.cpp
backend/A64/emit_a64_saturation.cpp
# backend/A64/emit_a64_sm4.cpp
# backend/A64/emit_a64_vector.cpp
# backend/A64/emit_a64_vector_floating_point.cpp
backend/A64/exception_handler.h
backend/A64/hostloc.cpp
backend/A64/hostloc.h
backend/A64/jitstate_info.h
backend/A64/opcodes.inc
backend/A64/perf_map.cpp
backend/A64/perf_map.h
backend/A64/reg_alloc.cpp
backend/A64/reg_alloc.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/arm64/a32_address_space.cpp
backend/arm64/a32_address_space.h
backend/arm64/a32_core.h
backend/arm64/a32_interface.cpp
# Move this to the list below when implemented
backend/arm64/a64_interface.cpp
backend/A64/a32_emit_a64.cpp
backend/A64/a32_emit_a64.h
backend/A64/a32_interface.cpp
backend/A64/a32_jitstate.cpp
backend/A64/a32_jitstate.h
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
if (UNIX)
target_sources(dynarmic PRIVATE backend/A64/exception_handler_posix.cpp)
else()
target_sources(dynarmic PRIVATE backend/A64/exception_handler_generic.cpp)
endif()
else()
message(FATAL_ERROR "Unsupported architecture")

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,138 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <functional>
#include <optional>
#include <set>
#include <tuple>
#include <unordered_map>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_range_information.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/exception_handler.h"
#include "dynarmic/A32/a32.h"
#include "dynarmic/A32/config.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::BackendA64 {
struct A64State;
class RegAlloc;
struct A32EmitContext final : public EmitContext {
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
A32::LocationDescriptor Location() const;
bool IsSingleStep() const;
FP::RoundingMode FPSCR_RMode() const override;
u32 FPCR() const override;
bool FPSCR_FTZ() const override;
bool FPSCR_DN() const override;
std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
};
class A32EmitA64 final : public EmitA64 {
public:
A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
~A32EmitA64() override;
/**
* Emit host machine code for a basic block with intermediate representation `ir`.
* @note ir is modified.
*/
BlockDescriptor Emit(IR::Block& ir);
void ClearCache() override;
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
void FastmemCallback(CodePtr PC);
protected:
const A32::UserConfig config;
A32::Jit* jit_interface;
BlockRangeInformation<u32> block_ranges;
ExceptionHandler exception_handler;
void EmitCondPrelude(const A32EmitContext& ctx);
struct FastDispatchEntry {
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr = nullptr;
};
static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
static constexpr size_t fast_dispatch_table_size = 0x10000;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable();
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
std::set<DoNotFastmemMarker> do_not_fastmem;
DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
void DoNotFastmem(const DoNotFastmemMarker& marker);
bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
const void* read_memory_8;
const void* read_memory_16;
const void* read_memory_32;
const void* read_memory_64;
const void* write_memory_8;
const void* write_memory_16;
const void* write_memory_32;
const void* write_memory_64;
void GenMemoryAccessors();
template<typename T>
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
template<typename T>
void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
void GenTerminalHandlers();
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
#define A64OPC(...)
#include "frontend/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Fastmem
struct FastmemPatchInfo {
std::function<void()> callback;
};
std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
// Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,323 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <memory>
#include <boost/icl/interval_set.hpp>
#include <fmt/format.h>
#include <dynarmic/A32/a32.h>
#include <dynarmic/A32/context.h>
#include "backend/A64/a32_emit_a64.h"
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/callback.h"
#include "backend/A64/devirtualize.h"
#include "backend/A64/jitstate_info.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/llvm_disassemble.h"
#include "common/scope_exit.h"
#include "frontend/A32/translate/translate.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/location_descriptor.h"
#include "ir_opt/passes.h"
namespace Dynarmic::A32 {
using namespace BackendA64;
static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
reinterpret_cast<u64>(config.fastmem_pointer),
};
}
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig config)
: block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
, emitter(block_of_code, config, jit)
, config(std::move(config))
, jit_interface(jit)
{}
A32JitState jit_state;
BlockOfCode block_of_code;
A32EmitA64 emitter;
const A32::UserConfig config;
// Requests made during execution to invalidate the cache are queued up here.
size_t invalid_cache_generation = 0;
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
void Execute() {
const CodePtr current_codeptr = [this]{
// RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
jit_state.rsb_ptr = new_rsb_ptr;
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
}
return GetCurrentBlock();
}();
block_of_code.RunCode(&jit_state, current_codeptr);
}
void Step() {
block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
}
std::string Disassemble(const IR::LocationDescriptor& descriptor) {
auto block = GetBasicBlock(descriptor);
std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
#ifdef DYNARMIC_USE_LLVM
for (const u32* pos = reinterpret_cast<const u32*>(block.entrypoint);
reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(block.entrypoint) + block.size; pos += 1) {
fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
}
#endif
return result;
}
void PerformCacheInvalidation() {
if (invalidate_entire_cache) {
jit_state.ResetRSB();
block_of_code.ClearCache();
emitter.ClearCache();
invalid_cache_ranges.clear();
invalidate_entire_cache = false;
invalid_cache_generation++;
return;
}
if (invalid_cache_ranges.empty()) {
return;
}
jit_state.ResetRSB();
emitter.InvalidateCacheRanges(invalid_cache_ranges);
invalid_cache_ranges.clear();
invalid_cache_generation++;
}
void RequestCacheInvalidation() {
if (jit_interface->is_executing) {
jit_state.halt_requested = true;
return;
}
PerformCacheInvalidation();
}
private:
Jit* jit_interface;
static CodePtr GetCurrentBlockThunk(void* this_voidptr) {
Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
return this_.GetCurrentBlock();
}
IR::LocationDescriptor GetCurrentLocation() const {
return IR::LocationDescriptor{jit_state.GetUniqueHash()};
}
CodePtr GetCurrentBlock() {
return GetBasicBlock(GetCurrentLocation()).entrypoint;
}
CodePtr GetCurrentSingleStep() {
return GetBasicBlock(A32::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true)).entrypoint;
}
A32EmitA64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
auto block = emitter.GetBasicBlock(descriptor);
if (block)
return *block;
constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
invalidate_entire_cache = true;
PerformCacheInvalidation();
}
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions});
if (config.enable_optimizations) {
Optimization::A32GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32MergeInterpretBlocksPass(ir_block, config.callbacks);
}
Optimization::VerificationPass(ir_block);
return emitter.Emit(ir_block);
}
};
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {}
Jit::~Jit() = default;
void Jit::Run() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = false;
impl->Execute();
impl->PerformCacheInvalidation();
}
void Jit::Step() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = true;
impl->Step();
impl->PerformCacheInvalidation();
}
void Jit::ClearCache() {
impl->invalidate_entire_cache = true;
impl->RequestCacheInvalidation();
}
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
impl->RequestCacheInvalidation();
}
void Jit::Reset() {
ASSERT(!is_executing);
impl->jit_state = {};
}
void Jit::HaltExecution() {
impl->jit_state.halt_requested = true;
}
std::array<u32, 16>& Jit::Regs() {
return impl->jit_state.Reg;
}
const std::array<u32, 16>& Jit::Regs() const {
return impl->jit_state.Reg;
}
std::array<u32, 64>& Jit::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<u32, 64>& Jit::ExtRegs() const {
return impl->jit_state.ExtReg;
}
u32 Jit::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Jit::SetCpsr(u32 value) {
return impl->jit_state.SetCpsr(value);
}
u32 Jit::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Jit::SetFpscr(u32 value) {
return impl->jit_state.SetFpscr(value);
}
Context Jit::SaveContext() const {
Context ctx;
SaveContext(ctx);
return ctx;
}
struct Context::Impl {
A32JitState jit_state;
size_t invalid_cache_generation;
};
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl;
return *this;
}
Context& Context::operator=(Context&& ctx) noexcept {
impl = std::move(ctx.impl);
return *this;
}
std::array<std::uint32_t, 16>& Context::Regs() {
return impl->jit_state.Reg;
}
const std::array<std::uint32_t, 16>& Context::Regs() const {
return impl->jit_state.Reg;
}
std::array<std::uint32_t, 64>& Context::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
return impl->jit_state.ExtReg;
}
std::uint32_t Context::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Context::SetCpsr(std::uint32_t value) {
impl->jit_state.SetCpsr(value);
}
std::uint32_t Context::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Context::SetFpscr(std::uint32_t value) {
return impl->jit_state.SetFpscr(value);
}
void Jit::SaveContext(Context& ctx) const {
ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
}
void Jit::LoadContext(const Context& ctx) {
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
}
std::string Jit::Disassemble() const {
std::string result;
#ifdef DYNARMIC_USE_LLVM
for (const u32* pos = reinterpret_cast<const u32*>(impl->block_of_code.GetCodeBegin());
reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(impl->block_of_code.GetCodePtr()); pos += 1) {
fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
}
#endif
return result;
}
} // namespace Dynarmic::A32

View File

@ -0,0 +1,172 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/A32/location_descriptor.h"
namespace Dynarmic::BackendA64 {
/**
* CPSR Bits
* =========
*
* ARM CPSR flags
* --------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
* Q bit 27 Saturation flag
* IT[1:0] bits 25-26 If-Then execution state (lower 2 bits)
* J bit 24 Jazelle instruction set flag
* GE bits 16-19 Greater than or Equal flags
* IT[7:2] bits 10-15 If-Then execution state (upper 6 bits)
* E bit 9 Data Endianness flag
* A bit 8 Disable imprecise Aborts
* I bit 7 Disable IRQ interrupts
* F bit 6 Disable FIQ interrupts
* T bit 5 Thumb instruction set flag
* M bits 0-4 Processor Mode bits
*
* A64 flags
* -------------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
*/
u32 A32JitState::Cpsr() const {
DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
DEBUG_ASSERT((cpsr_q & ~1) == 0);
DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
u32 cpsr = 0;
// NZCV flags
cpsr |= cpsr_nzcv;
// Q flag
cpsr |= cpsr_q ? 1 << 27 : 0;
// GE flags
cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
// IT state
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
// Other flags
cpsr |= cpsr_jaifm;
return cpsr;
}
void A32JitState::SetCpsr(u32 cpsr) {
// NZCV flags
cpsr_nzcv = cpsr & 0xF0000000;
// Q flag
cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
// GE flags
cpsr_ge = 0;
cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
upper_location_descriptor &= 0xFFFF0000;
// E flag, T flag
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
// IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags
cpsr_jaifm = cpsr & 0x010001DF;
}
void A32JitState::ResetRSB() {
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
rsb_codeptrs.fill(0);
}
/**
* FPSCR
* =========================
*
* VFP FPSCR cumulative exception bits
* -----------------------------------
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
* IXC bit 4 Inexact cumulative exception bit
* UFC bit 3 Underflow cumulative exception bit
* OFC bit 2 Overflow cumulative exception bit
* DZC bit 1 Division by Zero cumulative exception bit
* IOC bit 0 Invalid Operation cumulative exception bit
*
* VFP FPSCR exception trap enables
* --------------------------------
* IDE bit 15 Input Denormal exception trap enable
* IXE bit 12 Inexact exception trap enable
* UFE bit 11 Underflow exception trap enable
* OFE bit 10 Overflow exception trap enable
* DZE bit 9 Division by Zero exception trap enable
* IOE bit 8 Invalid Operation exception trap enable
*
* VFP FPSCR mode bits
* -------------------
* AHP bit 26 Alternate half-precision
* DN bit 25 Default NaN
* FZ bit 24 Flush to Zero
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
* Stride bits 20-21 Vector stride
* Len bits 16-18 Vector length
*/
// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
u32 A32JitState::Fpscr() const {
DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
u32 FPSCR = fpcr_mode | fpsr_nzcv;
FPSCR |= (guest_fpsr & 0x1F);
FPSCR |= fpsr_exc;
return FPSCR;
}
void A32JitState::SetFpscr(u32 FPSCR) {
// Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
upper_location_descriptor &= 0x0000FFFF;
upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_fpcr = 0;
guest_fpsr = 0;
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
fpsr_exc = FPSCR & 0x9F;
// Mode Bits
guest_fpcr |= FPSCR & 0x07C09F00;
// Exceptions
guest_fpsr |= FPSCR & 0x9F;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,111 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
#endif
struct A32JitState {
using ProgramCounterType = u32;
A32JitState() { ResetRSB(); }
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
u32 upper_location_descriptor = 0;
u32 cpsr_ge = 0;
u32 cpsr_q = 0;
u32 cpsr_nzcv = 0;
u32 cpsr_jaifm = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
static constexpr size_t SpillCount = 64;
std::array<u64, SpillCount> Spill{}; // Spill.
static size_t GetSpillLocationOffsetFromIndex(size_t i) {
return static_cast<u64>(offsetof(A32JitState, Spill) + i * sizeof(u64));
}
// For internal use (See: BlockOfCode::RunCode)
u64 guest_fpcr = 0;
u64 guest_fpsr = 0;
u64 save_host_FPCR = 0;
s64 cycles_to_run = 0;
s64 cycles_remaining = 0;
bool halt_requested = false;
bool check_bit = false;
// Exclusive state
static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
u32 exclusive_state = 0;
u32 exclusive_address = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1;
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
u32 fpsr_exc = 0;
u32 fpsr_qc = 0; // Dummy value
u32 fpsr_nzcv = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);
u64 GetUniqueHash() const noexcept {
return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
}
void TransferJitState(const A32JitState& src, bool reset_rsb) {
Reg = src.Reg;
upper_location_descriptor = src.upper_location_descriptor;
cpsr_ge = src.cpsr_ge;
cpsr_q = src.cpsr_q;
cpsr_nzcv = src.cpsr_nzcv;
cpsr_jaifm = src.cpsr_jaifm;
ExtReg = src.ExtReg;
guest_fpcr = src.guest_fpcr;
guest_fpsr = src.guest_fpsr;
fpsr_exc = src.fpsr_exc;
fpsr_qc = src.fpsr_qc;
fpsr_nzcv = src.fpsr_nzcv;
exclusive_state = 0;
exclusive_address = 0;
if (reset_rsb) {
ResetRSB();
} else {
rsb_ptr = src.rsb_ptr;
rsb_location_descriptors = src.rsb_location_descriptors;
rsb_codeptrs = src.rsb_codeptrs;
}
}
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
using CodePtr = const void*;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,87 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// 20th Sep 2018: This code was modified for Dynarmic.
#include <algorithm>
#include <vector>
#include "backend/A64/abi.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/iterator_util.h"
namespace Dynarmic::BackendA64 {
template<typename RegisterArrayT>
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0 , fprs = 0;
for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) {
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
} else if (HostLocIsFPR(reg)) {
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
}
}
code.fp_emitter.ABI_PushRegisters(fprs);
code.ABI_PushRegisters(gprs);
}
template<typename RegisterArrayT>
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0, fprs = 0;
for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) {
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
} else if (HostLocIsFPR(reg)) {
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
}
}
code.ABI_PopRegisters(gprs);
code.fp_emitter.ABI_PopRegisters(fprs);
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
}
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
}
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
std::vector<HostLoc> regs;
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
ABI_PushRegistersAndAdjustStack(code, regs);
}
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
std::vector<HostLoc> regs;
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
ABI_PopRegistersAndAdjustStack(code, regs);
}
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,110 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "backend/A64/block_of_code.h"
#include "backend/A64/hostloc.h"
namespace Dynarmic::BackendA64 {
constexpr HostLoc ABI_RETURN = HostLoc::X0;
constexpr HostLoc ABI_PARAM1 = HostLoc::X0;
constexpr HostLoc ABI_PARAM2 = HostLoc::X1;
constexpr HostLoc ABI_PARAM3 = HostLoc::X2;
constexpr HostLoc ABI_PARAM4 = HostLoc::X3;
constexpr HostLoc ABI_PARAM5 = HostLoc::X4;
constexpr HostLoc ABI_PARAM6 = HostLoc::X5;
constexpr HostLoc ABI_PARAM7 = HostLoc::X6;
constexpr HostLoc ABI_PARAM8 = HostLoc::X7;
constexpr std::array<HostLoc, 43> ABI_ALL_CALLER_SAVE = {
HostLoc::X0,
HostLoc::X1,
HostLoc::X2,
HostLoc::X3,
HostLoc::X4,
HostLoc::X5,
HostLoc::X6,
HostLoc::X7,
HostLoc::X8,
HostLoc::X9,
HostLoc::X10,
HostLoc::X11,
HostLoc::X12,
HostLoc::X13,
HostLoc::X14,
HostLoc::X15,
HostLoc::X16,
HostLoc::X17,
HostLoc::X18,
HostLoc::Q0,
HostLoc::Q1,
HostLoc::Q2,
HostLoc::Q3,
HostLoc::Q4,
HostLoc::Q5,
HostLoc::Q6,
HostLoc::Q7,
HostLoc::Q16,
HostLoc::Q17,
HostLoc::Q18,
HostLoc::Q19,
HostLoc::Q20,
HostLoc::Q21,
HostLoc::Q22,
HostLoc::Q23,
HostLoc::Q24,
HostLoc::Q25,
HostLoc::Q26,
HostLoc::Q27,
HostLoc::Q28,
HostLoc::Q29,
HostLoc::Q30,
HostLoc::Q31,
};
constexpr std::array<HostLoc, 20> ABI_ALL_CALLEE_SAVE = {
HostLoc::X19,
HostLoc::X20,
HostLoc::X21,
HostLoc::X22,
HostLoc::X23,
HostLoc::X24,
HostLoc::X25,
HostLoc::X26,
HostLoc::X27,
HostLoc::X28,
HostLoc::X29,
HostLoc::X30,
HostLoc::Q8,
HostLoc::Q9,
HostLoc::Q10,
HostLoc::Q11,
HostLoc::Q12,
HostLoc::Q13,
HostLoc::Q14,
HostLoc::Q15,
};
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 63, "Invalid total number of registers");
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,336 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include <cstring>
#include <limits>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
#ifdef __APPLE__
#include <pthread.h>
#endif
namespace Dynarmic::BackendA64 {
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
namespace {
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory([[maybe_unused]] const void* base, [[maybe_unused]] size_t size, bool is_executable) {
#if defined(_WIN32)
DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#elif defined(__APPLE__)
pthread_jit_write_protect_np(is_executable);
#else
static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif
}
#endif
} // anonymous namespace
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
: fp_emitter(this)
, cb(std::move(cb))
, jsi(jsi)
, constant_pool(*this) {
AllocCodeSpace(TOTAL_CODE_SIZE);
EnableWriting();
GenRunCode();
}
void BlockOfCode::PreludeComplete() {
prelude_complete = true;
near_code_begin = GetCodePtr();
far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
FlushIcache();
ClearCache();
DisableWriting();
}
void BlockOfCode::EnableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
#endif
}
void BlockOfCode::DisableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
#endif
}
void BlockOfCode::ClearCache() {
ASSERT(prelude_complete);
in_far_code = false;
near_code_ptr = near_code_begin;
far_code_ptr = far_code_begin;
SetCodePtr(near_code_begin);
constant_pool.Clear();
}
size_t BlockOfCode::SpaceRemaining() const {
ASSERT(prelude_complete);
// This function provides an underestimate of near-code-size but that's okay.
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
// These are offsets from Xbyak::CodeArray::top_.
std::size_t far_code_offset, near_code_offset;
if (in_far_code) {
near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
} else {
near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
}
if (far_code_offset > TOTAL_CODE_SIZE)
return 0;
if (near_code_offset > FAR_CODE_OFFSET)
return 0;
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
}
void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
run_code(jit_state, code_ptr);
}
void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
step_code(jit_state, code_ptr);
}
void BlockOfCode::ReturnFromRunCode(bool fpscr_already_exited) {
size_t index = 0;
if (fpscr_already_exited)
index |= FPSCR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) {
size_t index = FORCE_RETURN;
if (fpscr_already_exited)
index |= FPSCR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::GenRunCode() {
const u8* loop, *enter_fpscr_then_loop;
AlignCode16();
run_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
// This serves two purposes:
// 1. It saves all the registers we as a callee need to save.
// 2. It aligns the stack so that the code the JIT emits can assume
// that the stack is appropriately aligned for CALLs.
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X27, cb.value_in_X27);
MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
MOV(Arm64Gen::X26, ABI_RETURN);
SwitchFpscrOnEntry();
BR(Arm64Gen::X25);
AlignCode16();
step_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X26, 1);
STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SwitchFpscrOnEntry();
BR(ABI_PARAM2);
enter_fpscr_then_loop = GetCodePtr();
SwitchFpscrOnEntry();
loop = GetCodePtr();
cb.LookupBlock->EmitCall(*this);
BR(ABI_RETURN);
// Return from run code variants
const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){
if (!force_return) {
CMP(Arm64Gen::X26, Arm64Gen::ZR);
B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop);
}
if (!fpscr_already_exited) {
SwitchFpscrOnExit();
}
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SUB(param[0], param[0], Arm64Gen::X26);
});
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
RET();
};
return_from_run_code[0] = AlignCode16();
emit_return_from_run_code(false, false);
return_from_run_code[FPSCR_ALREADY_EXITED] = AlignCode16();
emit_return_from_run_code(true, false);
return_from_run_code[FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(false, true);
return_from_run_code[FPSCR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(true, true);
PerfMapRegister(run_code, GetCodePtr(), "dynarmic_dispatcher");
}
void BlockOfCode::SwitchFpscrOnEntry() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
_MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1);
}
void BlockOfCode::SwitchFpscrOnExit() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPSR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
}
void BlockOfCode::UpdateTicks() {
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SUB(param[0], param[0], Arm64Gen::X26);
});
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
MOV(Arm64Gen::X26, ABI_RETURN);
}
void BlockOfCode::LookupBlock() {
cb.LookupBlock->EmitCall(*this);
}
void BlockOfCode::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
ASSERT_MSG(!in_far_code, "Can't patch when in far code, yet!");
constant_pool.EmitPatchLDR(Rt, lower, upper);
}
void BlockOfCode::PatchConstPool() {
constant_pool.PatchPool();
}
void BlockOfCode::SwitchToFarCode() {
ASSERT(prelude_complete);
ASSERT(!in_far_code);
in_far_code = true;
near_code_ptr = GetCodePtr();
SetCodePtr(far_code_ptr);
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
}
void BlockOfCode::SwitchToNearCode() {
ASSERT(prelude_complete);
ASSERT(in_far_code);
in_far_code = false;
far_code_ptr = GetCodePtr();
SetCodePtr(near_code_ptr);
}
CodePtr BlockOfCode::GetCodeBegin() const {
return near_code_begin;
}
u8* BlockOfCode::GetRegion() const {
return region;
}
std::size_t BlockOfCode::GetRegionSize() const {
return total_region_size;
}
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
void* ret = GetWritableCodePtr();
region_size += alloc_size;
SetCodePtr(GetCodePtr() + alloc_size);
memset(ret, 0, alloc_size);
return ret;
}
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
ARM64XEmitter::SetCodePtr(ptr);
}
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
ASSERT(current_size <= size);
for (u32 i = 0; i < (size - current_size) / 4; i++) {
HINT(Arm64Gen::HINT_NOP);
}
}
//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
// return cpu_info.has(type);
//#else
// (void)type;
// return false;
//#endif
//}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,147 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <type_traits>
#include "backend/A64/callback.h"
#include "backend/A64/constant_pool.h"
#include "backend/A64/jitstate_info.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using CodePtr = const void*;
struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
u64 value_in_X27;
};
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
BlockOfCode(const BlockOfCode&) = delete;
/// Call when external emitters have finished emitting their preludes.
void PreludeComplete();
/// Change permissions to RW. This is required to support systems with W^X enforced.
void EnableWriting();
/// Change permissions to RX. This is required to support systems with W^X enforced.
void DisableWriting();
/// Clears this block of code and resets code pointer to beginning.
void ClearCache();
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
size_t SpaceRemaining() const;
/// Runs emulated code from code_ptr.
void RunCode(void* jit_state, CodePtr code_ptr) const;
/// Runs emulated code from code_ptr for a single cycle.
void StepCode(void* jit_state, CodePtr code_ptr) const;
/// Code emitter: Returns to dispatcher
void ReturnFromRunCode(bool fpscr_already_exited = false);
/// Code emitter: Returns to dispatcher, forces return to host
void ForceReturnFromRunCode(bool fpscr_already_exited = false);
/// Code emitter: Makes guest FPSR and FPCR the current FPSR and FPCR
void SwitchFpscrOnEntry();
/// Code emitter: Makes saved host FPCR the current FPCR
void SwitchFpscrOnExit();
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
/// @note this clobbers ABI caller-save registers
void UpdateTicks();
/// Code emitter: Performs a block lookup based on current state
/// @note this clobbers ABI caller-save registers
void LookupBlock();
u64 MConst(u64 lower, u64 upper = 0);
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
void PatchConstPool();
/// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
void SwitchToFarCode();
void SwitchToNearCode();
CodePtr GetCodeBegin() const;
u8* GetRegion() const;
std::size_t GetRegionSize() const;
const void* GetReturnFromRunCodeAddress() const {
return return_from_run_code[0];
}
const void* GetForceReturnFromRunCodeAddress() const {
return return_from_run_code[FORCE_RETURN];
}
/// Allocate memory of `size` bytes from the same block of memory the code is in.
/// This is useful for objects that need to be placed close to or within code.
/// The lifetime of this memory is the same as the code around it.
void* AllocateFromCodeSpace(size_t size);
void SetCodePtr(CodePtr code_ptr);
void EnsurePatchLocationSize(CodePtr begin, size_t size);
Arm64Gen::ARM64FloatEmitter fp_emitter;
// ABI registers
static const Arm64Gen::ARM64Reg ABI_RETURN;
static const Arm64Gen::ARM64Reg ABI_RETURN2;
static const Arm64Gen::ARM64Reg ABI_PARAM1;
static const Arm64Gen::ARM64Reg ABI_PARAM2;
static const Arm64Gen::ARM64Reg ABI_PARAM3;
static const Arm64Gen::ARM64Reg ABI_PARAM4;
static const Arm64Gen::ARM64Reg ABI_PARAM5;
static const Arm64Gen::ARM64Reg ABI_PARAM6;
static const Arm64Gen::ARM64Reg ABI_PARAM7;
static const Arm64Gen::ARM64Reg ABI_PARAM8;
static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
// bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
JitStateInfo GetJitStateInfo() const { return jsi; }
private:
RunCodeCallbacks cb;
JitStateInfo jsi;
bool prelude_complete = false;
CodePtr near_code_begin;
CodePtr far_code_begin;
ConstantPool constant_pool;
bool in_far_code = false;
CodePtr near_code_ptr;
CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr;
static constexpr size_t FPSCR_ALREADY_EXITED = 1 << 0;
static constexpr size_t FORCE_RETURN = 1 << 1;
std::array<const void*, 4> return_from_run_code;
void GenRunCode();
//Xbyak::util::Cpu cpu_info;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,45 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "backend/A64/block_range_information.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
}
template <typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear();
}
template <typename ProgramCounterType>
std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
std::unordered_set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) {
for (const auto &descriptor : it->second) {
erase_locations.insert(descriptor);
}
}
}
// TODO: EFFICIENCY: Remove ranges that are to be erased.
return erase_locations;
}
template class BlockRangeInformation<u32>;
template class BlockRangeInformation<u64>;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,29 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "frontend/ir/location_descriptor.h"
namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType>
class BlockRangeInformation {
public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
void ClearCache();
std::unordered_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
private:
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,41 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/callback.h"
#include "backend/A64/block_of_code.h"
namespace Dynarmic::BackendA64 {
Callback::~Callback() = default;
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.QuickCallFunction(fn);
}
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.QuickCallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM1, arg);
code.QuickCallFunction(fn);
}
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
#if defined(WIN32) && !defined(__MINGW64__)
l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM1, arg);
#else
l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM2, arg);
#endif
code.QuickCallFunction(fn);
}
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,54 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <functional>
#include <vector>
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using RegList = std::vector<Arm64Gen::ARM64Reg>;
class BlockOfCode;
class Callback {
public:
virtual ~Callback();
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0;
};
class SimpleCallback final : public Callback {
public:
template <typename Function>
SimpleCallback(Function fn) : fn(reinterpret_cast<void (*)()>(fn)) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private:
void (*fn)();
};
class ArgCallback final : public Callback {
public:
template <typename Function>
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private:
void (*fn)();
u64 arg;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,65 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstring>
#include "backend/A64/block_of_code.h"
#include "backend/A64/constant_pool.h"
#include "common/assert.h"
namespace Dynarmic::BackendA64 {
ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {}
void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
const auto constant = std::make_tuple(lower, upper);
auto iter = constant_info.find(constant);
if (iter == constant_info.end()) {
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
patch_info.emplace_back(p);
code.BRK(0);
return;
}
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
if (!(offset >= -0x40000 && offset <= 0x3FFFF)) {
constant_info.erase(constant);
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
patch_info.emplace_back(p);
code.BRK(0x42);
return;
}
DEBUG_ASSERT((offset & 3) == 0);
code.LDR(Rt, offset / 4);
}
void ConstantPool::PatchPool() {
u8* pool_ptr = code.GetWritableCodePtr();
for (PatchInfo patch : patch_info) {
auto iter = constant_info.find(patch.constant);
if (iter == constant_info.end()) {
std::memcpy(pool_ptr, &std::get<0>(patch.constant), sizeof(u64));
std::memcpy(pool_ptr + sizeof(u64), &std::get<1>(patch.constant), sizeof(u64));
iter = constant_info.emplace(patch.constant, pool_ptr).first;
pool_ptr += align_size;
}
code.SetCodePtr(patch.ptr);
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
DEBUG_ASSERT((offset & 3) == 0);
code.LDR(patch.Rt, offset / 4);
}
patch_info.clear();
code.SetCodePtr(pool_ptr);
}
void ConstantPool::Clear() {
constant_info.clear();
patch_info.clear();
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,47 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <map>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
/// ConstantPool allocates a block of memory from BlockOfCode.
/// It places constants into this block of memory, returning the address
/// of the memory location where the constant is placed. If the constant
/// already exists, its memory location is reused.
class ConstantPool final {
public:
ConstantPool(BlockOfCode& code);
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
void PatchPool();
void Clear();
private:
static constexpr size_t align_size = 16; // bytes
std::map<std::tuple<u64, u64>, void*> constant_info;
BlockOfCode& code;
struct PatchInfo {
const void* ptr;
Arm64Gen::ARM64Reg Rt;
std::tuple<u64, u64> constant;
};
std::vector<PatchInfo> patch_info;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,77 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstring>
#include <memory>
#include <mp/traits/function_info.h>
#include "backend/A64/callback.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace impl {
template <typename FunctionType, FunctionType mfp>
struct ThunkBuilder;
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
struct ThunkBuilder<R(C::*)(Args...), mfp> {
static R Thunk(C* this_, Args... args) {
return (this_->*mfp)(std::forward<Args>(args)...);
}
};
} // namespace impl
template<auto mfp>
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
}
template<auto mfp>
ArgCallback DevirtualizeWindows(mp::class_type<decltype(mfp)>* this_) {
static_assert(sizeof(mfp) == 8);
return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
}
template<auto mfp>
ArgCallback DevirtualizeAarch64(mp::class_type<decltype(mfp)>* this_) {
struct MemberFunctionPointer {
/// For a non-virtual function, this is a simple function pointer.
/// For a virtual function, it is virtual table offset in bytes.
u64 ptr;
/// Twice the required adjustment to `this`, plus 1 if the member function is virtual.
u64 adj;
} mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
static_assert(sizeof(MemberFunctionPointer) == 16);
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
u64 fn_ptr = mfp_struct.ptr;
u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj / 2;
if (mfp_struct.adj & 1) {
u64 vtable = Common::BitCastPointee<u64>(this_ptr);
fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr);
}
return ArgCallback{fn_ptr, this_ptr};
}
template<auto mfp>
ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
#if defined(linux) || defined(__linux) || defined(__linux__)
return DevirtualizeAarch64<mfp>(this_);
#else
return DevirtualizeGeneric<mfp>(this_);
#endif
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,286 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_map>
#include <unordered_set>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/hostloc.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "common/variant_util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
namespace Dynarmic::BackendA64 {
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {}
void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst);
inst->ClearArgs();
}
EmitA64::EmitA64(BlockOfCode& code)
: code(code) {}
EmitA64::~EmitA64() = default;
std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
auto iter = block_descriptors.find(descriptor);
if (iter == block_descriptors.end())
return std::nullopt;
return iter->second;
}
void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
}
void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
code.BRK(0);
}
void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate()) {
ctx.reg_alloc.DefineValue(inst, args[0]);
}
}
void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
auto iter = block_descriptors.find(target);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code.GetReturnFromRunCodeAddress();
code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
code.MOVI2R(loc_desc_reg, target.Value());
patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
EmitPatchMovX0(target_code_ptr);
code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
}
void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr({HostLoc::X0});
Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
}
void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
code.CMP(value, ZR);
code.MRS(nzcv, FIELD_NZCV);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
u32 value = 0;
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code.MOVI2R(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
} else {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
Arm64Gen::ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
// TODO: Optimize
code.LSR(nzcv, nzcv, 28);
code.MOVI2R(scratch, 0b00010000'10000001);
code.MUL(nzcv, nzcv, scratch);
code.ANDI2R(nzcv, nzcv, 1, scratch);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
}
void EmitA64::EmitAddCycles(size_t cycles) {
ASSERT(cycles < std::numeric_limits<u32>::max());
code.SUBI2R(X26, X26, static_cast<u32>(cycles));
}
FixupBranch EmitA64::EmitCond(IR::Cond cond) {
FixupBranch label;
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
code._MSR(FIELD_NZCV, cpsr);
switch (cond) {
case IR::Cond::EQ: //z
label = code.B(CC_EQ);
break;
case IR::Cond::NE: //!z
label = code.B(CC_NEQ);
break;
case IR::Cond::CS: //c
label = code.B(CC_CS);
break;
case IR::Cond::CC: //!c
label = code.B(CC_CC);
break;
case IR::Cond::MI: //n
label = code.B(CC_MI);
break;
case IR::Cond::PL: //!n
label = code.B(CC_PL);
break;
case IR::Cond::VS: //v
label = code.B(CC_VS);
break;
case IR::Cond::VC: //!v
label = code.B(CC_VC);
break;
case IR::Cond::HI: //c & !z
label = code.B(CC_HI);
break;
case IR::Cond::LS: //!c | z
label = code.B(CC_LS);
break;
case IR::Cond::GE: // n == v
label = code.B(CC_GE);
break;
case IR::Cond::LT: // n != v
label = code.B(CC_LT);
break;
case IR::Cond::GT: // !z & (n == v)
label = code.B(CC_GT);
break;
case IR::Cond::LE: // z | (n != v)
label = code.B(CC_LE);
break;
default:
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
break;
}
return label;
}
EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.Value(), block_desc);
return block_desc;
}
void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location, is_single_step);
} else {
ASSERT_MSG(false, "Invalid terminal");
}
});
}
void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
const CodePtr save_code_ptr = code.GetCodePtr();
const PatchInformation& patch_info = patch_information[desc];
for (CodePtr location : patch_info.jg) {
code.SetCodePtr(location);
EmitPatchJg(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location);
EmitPatchJmp(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.mov_x0) {
code.SetCodePtr(location);
EmitPatchMovX0(bb);
code.FlushIcache();
}
code.SetCodePtr(save_code_ptr);
}
void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
Patch(desc, nullptr);
}
void EmitA64::ClearCache() {
block_descriptors.clear();
patch_information.clear();
PerfMapClear();
}
void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) {
auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) {
continue;
}
if (patch_information.count(descriptor)) {
Unpatch(descriptor);
}
block_descriptors.erase(it);
}
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,124 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <optional>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "backend/A64/reg_alloc.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/bit_util.h"
#include "common/fp/rounding_mode.h"
#include "frontend/ir/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::IR {
class Block;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic::BackendA64 {
class BlockOfCode;
using namespace Arm64Gen;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template <typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
void EraseInstruction(IR::Inst* inst);
virtual FP::RoundingMode FPSCR_RMode() const = 0;
virtual u32 FPCR() const = 0;
virtual bool FPSCR_FTZ() const = 0;
virtual bool FPSCR_DN() const = 0;
virtual bool AccurateNaN() const { return true; }
RegAlloc& reg_alloc;
IR::Block& block;
};
class EmitA64 {
public:
struct BlockDescriptor {
CodePtr entrypoint; // Entrypoint of emitted code
size_t size; // Length in bytes of emitted code
};
EmitA64(BlockOfCode& code);
virtual ~EmitA64();
/// Looks up an emitted host block in the cache.
std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
/// Empties the entire cache.
virtual void ClearCache();
/// Invalidates a selection of basic blocks.
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
protected:
// Microinstruction emitters
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
#define A32OPC(...)
#define A64OPC(...)
#include "backend/A64/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
void EmitAddCycles(size_t cycles);
FixupBranch EmitCond(IR::Cond cond);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
// Terminal instruction emitters
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
// Patching
struct PatchInformation {
std::vector<CodePtr> jg;
std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_x0;
};
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
// State
BlockOfCode& code;
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
};
} // namespace Dynarmic::BackendX64

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,471 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <optional>
#include <type_traits>
#include <utility>
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/fp/fpcr.h"
#include "common/fp/fpsr.h"
#include "common/fp/info.h"
#include "common/fp/op.h"
#include "common/fp/rounding_mode.h"
#include "common/fp/util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
namespace {
Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rounding_mode) {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
return RoundingMode::ROUND_N;
case FP::RoundingMode::TowardsPlusInfinity:
return RoundingMode::ROUND_P;
case FP::RoundingMode::TowardsMinusInfinity:
return RoundingMode::ROUND_M;
case FP::RoundingMode::TowardsZero:
return RoundingMode::ROUND_Z;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
return RoundingMode::ROUND_A;
default:
UNREACHABLE();
}
}
template <size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result);
} else {
fn(result);
}
ctx.reg_alloc.DefineValue(inst, result);
}
template <size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
ARM64Reg operand = ctx.reg_alloc.UseScratchFpr(args[1]);
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
operand = fsize == 32 ? EncodeRegToSingle(operand) : EncodeRegToDouble(operand);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result, operand);
}
else {
fn(result, result, operand);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // anonymous namespace
//void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
//
// code.pand(result, code.MConst(xword, f16_non_sign_mask));
//
// ctx.reg_alloc.DefineValue(inst, result);
//}
void EmitA64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FABS(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FABS(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
//void EmitA64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
//
// code.pxor(result, code.MConst(xword, f16_negative_zero));
//
// ctx.reg_alloc.DefineValue(inst, result);
//}
void EmitA64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FNEG(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FNEG(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
}
void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
}
void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
}
void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
}
void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
}
void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
}
void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
}
void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<64>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
}
void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
}
void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
}
static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
// Fpsr's nzcv is copied across integer nzcv
code.MRS(nzcv, FIELD_NZCV);
return nzcv;
}
void EmitA64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg reg_a = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[0]));
ARM64Reg reg_b = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[1]));
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code.fp_emitter.FCMPE(reg_a, reg_b);
} else {
code.fp_emitter.FCMP(reg_a, reg_b);
}
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg reg_a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[0]));
const ARM64Reg reg_b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code.fp_emitter.FCMPE(reg_a, reg_b);
} else {
code.fp_emitter.FCMP(reg_a, reg_b);
}
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(64, 16, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(32, 16, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(64, 32, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(16, 32, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(16, 64, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(32, 64, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
template<size_t fsize, bool unsigned_, size_t isize>
static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const auto round_imm = ConvertRoundingModeToA64RoundingMode(rounding_mode);
ASSERT_MSG(fbits == 0, "fixed point conversions are not supported yet");
ARM64Reg src = ctx.reg_alloc.UseScratchFpr(args[0]);
ARM64Reg result = ctx.reg_alloc.ScratchGpr();
src = fsize == 64 ? EncodeRegToDouble(src) : EncodeRegToSingle(src);
result = isize == 64 ? result : DecodeReg(result);
if constexpr (unsigned_) {
code.fp_emitter.FCVTU(result, src, round_imm);
}
else {
code.fp_emitter.FCVTS(result, src, round_imm);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, false, 32>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, false, 64>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 32>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 64>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 32>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 64>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, true, 32>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, true, 64>(code, ctx, inst);
}
void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,469 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(B, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(B, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(H, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(H, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(B, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(H, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg reg_a_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
const ARM64Reg reg_b_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[1]));
const ARM64Reg reg_a_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_b_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
ARM64Reg reg_sum, reg_diff;
if (is_signed) {
code.SXTH(reg_a_lo, reg_a_hi);
code.SXTH(reg_b_lo, reg_b_hi);
code.ASR(reg_a_hi, reg_a_hi, 16);
code.ASR(reg_b_hi, reg_b_hi, 16);
} else {
code.UXTH(reg_a_lo, reg_a_hi);
code.UXTH(reg_b_lo, reg_b_hi);
code.LSR(reg_a_hi, reg_a_hi, 16);
code.LSR(reg_b_hi, reg_b_hi, 16);
}
if (hi_is_sum) {
code.SUB(reg_a_lo, reg_a_lo, reg_b_hi);
code.ADD(reg_a_hi, reg_a_hi, reg_b_lo);
reg_diff = reg_a_lo;
reg_sum = reg_a_hi;
} else {
code.ADD(reg_a_lo, reg_a_lo, reg_b_hi);
code.SUB(reg_a_hi, reg_a_hi, reg_b_lo);
reg_diff = reg_a_hi;
reg_sum = reg_a_lo;
}
if (ge_inst) {
// The reg_b registers are no longer required.
const ARM64Reg ge_sum = reg_b_hi;
const ARM64Reg ge_diff = reg_b_lo;
if (!is_signed) {
code.LSL(ge_sum, reg_sum, 15);
code.ASR(ge_sum, ge_sum, 31);
} else {
code.MVN(ge_sum, reg_sum);
code.ASR(ge_sum, ge_sum, 31);
}
code.MVN(ge_diff, reg_diff);
code.ASR(ge_diff, ge_diff, 31);
code.ANDI2R(ge_sum, ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
code.ANDI2R(ge_diff, ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code.ORR(ge_sum, ge_sum, ge_diff);
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
ctx.EraseInstruction(ge_inst);
}
if (is_halving) {
code.LSR(reg_a_hi, reg_a_hi, 1);
code.LSR(reg_a_lo, reg_a_lo, 1);
}
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
// Merge them.
code.BFM(reg_a_lo, reg_a_hi, 16, 15);
ctx.reg_alloc.DefineValue(inst, reg_a_lo);
}
void EmitA64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, false, false);
}
void EmitA64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, true, false);
}
void EmitA64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, false, false);
}
void EmitA64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, false);
}
void EmitA64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, false, true);
}
void EmitA64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, true, true);
}
void EmitA64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, false, true);
}
void EmitA64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, true);
}
void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UABD(B, a, a, b);
code.fp_emitter.UADDLV(B, a, a);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[2]));
code.fp_emitter.BSL(ge, b, a);
ctx.reg_alloc.DefineValue(inst, ge);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,167 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <limits>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
namespace {
enum class Op {
Add,
Sub,
};
template<Op op, size_t size>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
ARM64Reg addend = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if constexpr (op == Op::Add) {
code.fp_emitter.SQADD(size, result, result, addend);
}
else {
code.fp_emitter.SQSUB(size, result, result, addend);
}
if (overflow_inst) {
ARM64Reg overflow = ctx.reg_alloc.ScratchGpr();
code.MRS(overflow, FIELD_FPSR);
code.UBFX(overflow, overflow, 27, 1);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // anonymous namespace
void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
if (overflow_inst) {
const auto no_overflow = IR::Value(false);
overflow_inst->ReplaceUsesWith(no_overflow);
}
ctx.reg_alloc.DefineValue(inst, args[0]);
return;
}
const u32 mask = (1u << N) - 1;
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
// Put the appropriate saturated value in result
code.MOVI2R(tmp, positive_saturated_value);
code.CMP(reg_a, tmp);
code.MOVI2R(result, sext_negative_satured_value);
code.CSEL(result, tmp, result, CC_GT);
// Do the saturation
code.CMPI2R(overflow, mask, tmp);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.MOVI2R(result, saturated_value);
code.CMP(reg_a, result);
code.CSEL(result, WZR, result, CC_LE);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
enum CCFlags {
CC_EQ = 0, // Equal
CC_NEQ, // Not equal
CC_CS, // Carry Set
CC_CC, // Carry Clear
CC_MI, // Minus (Negative)
CC_PL, // Plus
CC_VS, // Overflow
CC_VC, // No Overflow
CC_HI, // Unsigned higher
CC_LS, // Unsigned lower or same
CC_GE, // Signed greater than or equal
CC_LT, // Signed less than
CC_GT, // Signed greater than
CC_LE, // Signed less than or equal
CC_AL, // Always (unconditional) 14
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
};
const u32 NO_COND = 0xE0000000;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,139 @@
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions
// without having to prefix them with gen-> or something similar. Example
// implementation: class JIT : public CodeBlock<ARMXEmitter> {}
template <class T>
class CodeBlock : public T {
private:
// A privately used function to set the executable RAM space to something
// invalid. For debugging usefulness it should be used to set the RAM to a
// host specific breakpoint instruction
virtual void PoisonMemory() = 0;
protected:
u8* region = nullptr;
// Size of region we can use.
size_t region_size = 0;
// Original size of the region we allocated.
size_t total_region_size = 0;
bool m_is_child = false;
std::vector<CodeBlock*> m_children;
public:
CodeBlock() = default;
virtual ~CodeBlock() {
if (region)
FreeCodeSpace();
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
// Call this before you generate any code.
void AllocCodeSpace(size_t size) {
region_size = size;
total_region_size = size;
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
#if defined(__APPLE__)
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
#else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
ASSERT_MSG(ptr != nullptr, "Failed to allocate executable memory");
region = static_cast<u8*>(ptr);
T::SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally
// executes uninitialized, it just breaks into the debugger.
void ClearCodeSpace() {
PoisonMemory();
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though
// it'll do the job.
void FreeCodeSpace() {
ASSERT(!m_is_child);
ASSERT(munmap(region, total_region_size) == 0);
region = nullptr;
region_size = 0;
total_region_size = 0;
for (CodeBlock* child : m_children) {
child->region = nullptr;
child->region_size = 0;
child->total_region_size = 0;
}
}
bool IsInSpace(const u8* ptr) const {
return ptr >= region && ptr < (region + region_size);
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(),
// AllocCodeSpace()).
void WriteProtect() {
ASSERT(mprotect(region, region_size, PROT_READ | PROT_EXEC) != 0);
}
void ResetCodePtr() {
T::SetCodePtr(region);
}
size_t GetSpaceLeft() const {
ASSERT(static_cast<size_t>(T::GetCodePtr() - region) < region_size);
return region_size - (T::GetCodePtr() - region);
}
bool IsAlmostFull() const {
// This should be bigger than the biggest block ever.
return GetSpaceLeft() < 0x10000;
}
bool HasChildren() const {
return region_size != total_region_size;
}
u8* AllocChildCodeSpace(size_t child_size) {
ASSERT_MSG(child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
u8* child_region = region + region_size - child_size;
region_size -= child_size;
return child_region;
}
void AddChildCodeSpace(CodeBlock* child, size_t child_size) {
u8* child_region = AllocChildCodeSpace(child_size);
child->m_is_child = true;
child->region = child_region;
child->region_size = child_size;
child->total_region_size = child_size;
child->ResetCodePtr();
m_children.emplace_back(child);
}
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <functional>
#include "backend/A64/a32_jitstate.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
struct A64State {
std::array<u64, 32> X;
std::array<std::array<u64, 2>, 16> Q;
};
static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
bool SupportsFastmem() const;
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,25 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/exception_handler.h"
namespace Dynarmic::BackendA64 {
struct ExceptionHandler::Impl final {
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
// Do nothing
}
bool ExceptionHandler::SupportsFastmem() const {
return false;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,166 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <mutex>
#include <vector>
#include <csignal>
#ifdef __APPLE__
#include <sys/ucontext.h>
#else
#include <ucontext.h>
#endif
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/exception_handler.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace {
struct CodeBlockInfo {
BlockOfCode* block;
std::function<void(CodePtr)> callback;
};
class SigHandler {
public:
SigHandler();
~SigHandler();
void AddCodeBlock(CodeBlockInfo info);
void RemoveCodeBlock(CodePtr PC);
private:
auto FindCodeBlockInfo(CodePtr PC) {
return std::find_if(code_block_infos.begin(), code_block_infos.end(),
[&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize() > PC; });
}
std::vector<CodeBlockInfo> code_block_infos;
std::mutex code_block_infos_mutex;
struct sigaction old_sa_segv;
struct sigaction old_sa_bus;
static void SigAction(int sig, siginfo_t* info, void* raw_context);
};
SigHandler sig_handler;
SigHandler::SigHandler() {
// Method below from dolphin.
const size_t signal_stack_size = std::max<size_t>(SIGSTKSZ, 2 * 1024 * 1024);
stack_t signal_stack;
signal_stack.ss_sp = malloc(signal_stack_size);
signal_stack.ss_size = signal_stack_size;
signal_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
"dynarmic: POSIX SigHandler: init failure at sigaltstack");
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigHandler::SigAction;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &old_sa_segv);
}
SigHandler::~SigHandler() {
// No cleanup required.
}
void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
code_block_infos.push_back(std::move(cb));
}
void SigHandler::RemoveCodeBlock(CodePtr PC) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(PC);
ASSERT(iter != code_block_infos.end());
code_block_infos.erase(iter);
}
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS);
std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
#ifdef __APPLE__
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext->__ss.__pc);
#else
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
#endif
const auto iter = sig_handler.FindCodeBlockInfo(PC);
if (iter != sig_handler.code_block_infos.end()) {
iter->callback(PC);
return;
}
fmt::print(
stderr,
"dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
PC);
struct sigaction* retry_sa =
sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context);
return;
}
if (retry_sa->sa_handler == SIG_DFL) {
signal(sig, SIG_DFL);
return;
}
if (retry_sa->sa_handler == SIG_IGN) {
return;
}
retry_sa->sa_handler(sig);
}
} // anonymous namespace
struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
code_begin = code.GetRegion();
sig_handler.AddCodeBlock({&code, std::move(cb)});
}
~Impl() {
sig_handler.RemoveCodeBlock(code_begin);
}
private:
CodePtr code_begin;
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
if (cb)
impl = std::make_unique<Impl>(code, std::move(cb));
}
bool ExceptionHandler::SupportsFastmem() const {
return static_cast<bool>(impl);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,21 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/hostloc.h"
namespace Dynarmic::BackendA64 {
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc) {
ASSERT(HostLocIsGPR(loc));
return static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(Arm64Gen::X0) + static_cast<int>(loc));
}
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) {
ASSERT(HostLocIsFPR(loc));
return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0)));
}
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,176 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "backend/A64/emitter/a64_emitter.h"
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToA64.
// 64bit GPR registers
X0,
X1,
X2,
X3,
X4,
X5,
X6,
X7,
X8,
X9,
X10,
X11,
X12,
X13,
X14,
X15,
X16,
X17,
X18,
X19,
X20,
X21,
X22,
X23,
X24,
X25,
X26,
X27,
X28,
X29,
X30,
SP, // 64bit stack pointer
// Qword FPR registers
Q0,
Q1,
Q2,
Q3,
Q4,
Q5,
Q6,
Q7,
Q8,
Q9,
Q10,
Q11,
Q12,
Q13,
Q14,
Q15,
Q16,
Q17,
Q18,
Q19,
Q20,
Q21,
Q22,
Q23,
Q24,
Q25,
Q26,
Q27,
Q28,
Q29,
Q30,
Q31,
FirstSpill,
};
constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
inline bool HostLocIsGPR(HostLoc reg) {
return reg >= HostLoc::X0 && reg <= HostLoc::X30;
}
inline bool HostLocIsFPR(HostLoc reg) {
return reg >= HostLoc::Q0 && reg <= HostLoc::Q31;
}
inline bool HostLocIsRegister(HostLoc reg) {
return HostLocIsGPR(reg) || HostLocIsFPR(reg);
}
inline HostLoc HostLocRegIdx(int idx) {
ASSERT(idx >= 0 && idx <= 30);
return static_cast<HostLoc>(idx);
}
inline HostLoc HostLocFprIdx(int idx) {
ASSERT(idx >= 0 && idx <= 31);
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::Q0) + idx);
}
inline HostLoc HostLocSpill(size_t i) {
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
}
inline bool HostLocIsSpill(HostLoc reg) {
return reg >= HostLoc::FirstSpill;
}
inline size_t HostLocBitWidth(HostLoc loc) {
if (HostLocIsGPR(loc))
return 64;
if (HostLocIsFPR(loc))
return 128;
if (HostLocIsSpill(loc))
return 128;
UNREACHABLE();
}
using HostLocList = std::initializer_list<HostLoc>;
// X18 may be reserved.(Windows and iOS)
// X26 holds the cycle counter
// X27 contains an emulated memory relate pointer
// X28 used for holding the JitState.
// X30 is the link register.
// In order of desireablity based first on ABI
constexpr HostLocList any_gpr = {
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
HostLoc::X24, HostLoc::X25,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12,
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3,
HostLoc::X2, HostLoc::X1, HostLoc::X0,
};
constexpr HostLocList any_fpr = {
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
HostLoc::Q14, HostLoc::Q15,
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2,
HostLoc::Q1, HostLoc::Q0,
};
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc);
template <typename JitStateType>
size_t SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < JitStateType::SpillCount,
"Spill index greater than number of available spill locations");
return JitStateType::GetSpillLocationOffsetFromIndex(i);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,44 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
namespace Dynarmic::BackendA64 {
struct JitStateInfo {
template <typename JitStateType>
JitStateInfo(const JitStateType&)
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
, offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR))
, offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr))
, offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr))
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
, rsb_ptr_mask(JitStateType::RSBPtrMask)
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
{}
const size_t offsetof_cycles_remaining;
const size_t offsetof_cycles_to_run;
const size_t offsetof_save_host_FPCR;
const size_t offsetof_guest_fpcr;
const size_t offsetof_guest_fpsr;
const size_t offsetof_rsb_ptr;
const size_t rsb_ptr_mask;
const size_t offsetof_rsb_location_descriptors;
const size_t offsetof_rsb_codeptrs;
const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,651 @@
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ...
OPCODE(Void, Void, )
OPCODE(Identity, Opaque, Opaque )
OPCODE(Breakpoint, Void, )
// A32 Context getters/setters
A32OPC(SetCheckBit, Void, U1 )
A32OPC(GetRegister, U32, A32Reg )
A32OPC(GetExtendedRegister32, U32, A32ExtReg )
A32OPC(GetExtendedRegister64, U64, A32ExtReg )
A32OPC(SetRegister, Void, A32Reg, U32 )
A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 )
A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 )
A32OPC(GetCpsr, U32, )
A32OPC(SetCpsr, Void, U32 )
A32OPC(SetCpsrNZCVRaw, Void, U32 )
A32OPC(SetCpsrNZCV, Void, NZCV )
A32OPC(SetCpsrNZCVQ, Void, U32 )
A32OPC(GetNFlag, U1, )
A32OPC(SetNFlag, Void, U1 )
A32OPC(GetZFlag, U1, )
A32OPC(SetZFlag, Void, U1 )
A32OPC(GetCFlag, U1, )
A32OPC(SetCFlag, Void, U1 )
A32OPC(GetVFlag, U1, )
A32OPC(SetVFlag, Void, U1 )
A32OPC(OrQFlag, Void, U1 )
A32OPC(GetGEFlags, U32, )
A32OPC(SetGEFlags, Void, U32 )
A32OPC(SetGEFlagsCompressed, Void, U32 )
A32OPC(BXWritePC, Void, U32 )
A32OPC(CallSupervisor, Void, U32 )
A32OPC(ExceptionRaised, Void, U32, U64 )
A32OPC(GetFpscr, U32, )
A32OPC(SetFpscr, Void, U32, )
A32OPC(GetFpscrNZCV, U32, )
A32OPC(SetFpscrNZCV, Void, NZCV )
// A64 Context getters/setters
//A64OPC(SetCheckBit, Void, U1 )
//A64OPC(GetCFlag, U1, )
//A64OPC(GetNZCVRaw, U32, )
//A64OPC(SetNZCVRaw, Void, U32 )
//A64OPC(SetNZCV, Void, NZCV )
//A64OPC(GetW, U32, A64Reg )
//A64OPC(GetX, U64, A64Reg )
//A64OPC(GetS, U128, A64Vec )
//A64OPC(GetD, U128, A64Vec )
//A64OPC(GetQ, U128, A64Vec )
//A64OPC(GetSP, U64, )
//A64OPC(GetFPCR, U32, )
//A64OPC(GetFPSR, U32, )
//A64OPC(SetW, Void, A64Reg, U32 )
//A64OPC(SetX, Void, A64Reg, U64 )
//A64OPC(SetS, Void, A64Vec, U128 )
//A64OPC(SetD, Void, A64Vec, U128 )
//A64OPC(SetQ, Void, A64Vec, U128 )
//A64OPC(SetSP, Void, U64 )
//A64OPC(SetFPCR, Void, U32 )
//A64OPC(SetFPSR, Void, U32 )
//A64OPC(OrQC, Void, U1 )
//A64OPC(SetPC, Void, U64 )
//A64OPC(CallSupervisor, Void, U32 )
//A64OPC(ExceptionRaised, Void, U64, U64 )
//A64OPC(DataCacheOperationRaised, Void, U64, U64 )
//A64OPC(DataSynchronizationBarrier, Void, )
//A64OPC(DataMemoryBarrier, Void, )
//A64OPC(InstructionSynchronizationBarrier, Void, )
//A64OPC(GetCNTFRQ, U32, )
//A64OPC(GetCNTPCT, U64, )
//A64OPC(GetCTR, U32, )
//A64OPC(GetDCZID, U32, )
//A64OPC(GetTPIDR, U64, )
//A64OPC(GetTPIDRRO, U64, )
//A64OPC(SetTPIDR, Void, U64 )
// Hints
OPCODE(PushRSB, Void, U64 )
// Pseudo-operation, handled specially at final emit
OPCODE(GetCarryFromOp, U1, Opaque )
OPCODE(GetOverflowFromOp, U1, Opaque )
OPCODE(GetGEFromOp, U32, Opaque )
OPCODE(GetNZCVFromOp, NZCV, Opaque )
OPCODE(GetUpperFromOp, U128, Opaque )
OPCODE(GetLowerFromOp, U128, Opaque )
OPCODE(NZCVFromPackedFlags, NZCV, U32 )
// Calculations
OPCODE(Pack2x32To1x64, U64, U32, U32 )
//OPCODE(Pack2x64To1x128, U128, U64, U64 )
OPCODE(LeastSignificantWord, U32, U64 )
OPCODE(MostSignificantWord, U32, U64 )
OPCODE(LeastSignificantHalf, U16, U32 )
OPCODE(LeastSignificantByte, U8, U32 )
OPCODE(MostSignificantBit, U1, U32 )
OPCODE(IsZero32, U1, U32 )
OPCODE(IsZero64, U1, U64 )
OPCODE(TestBit, U1, U64, U8 )
OPCODE(ConditionalSelect32, U32, Cond, U32, U32 )
OPCODE(ConditionalSelect64, U64, Cond, U64, U64 )
OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV )
OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 )
OPCODE(LogicalShiftLeft64, U64, U64, U8 )
OPCODE(LogicalShiftRight32, U32, U32, U8, U1 )
OPCODE(LogicalShiftRight64, U64, U64, U8 )
OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 )
//OPCODE(ArithmeticShiftRight64, U64, U64, U8 )
OPCODE(RotateRight32, U32, U32, U8, U1 )
OPCODE(RotateRight64, U64, U64, U8 )
OPCODE(RotateRightExtended, U32, U32, U1 )
OPCODE(Add32, U32, U32, U32, U1 )
OPCODE(Add64, U64, U64, U64, U1 )
OPCODE(Sub32, U32, U32, U32, U1 )
OPCODE(Sub64, U64, U64, U64, U1 )
OPCODE(Mul32, U32, U32, U32 )
OPCODE(Mul64, U64, U64, U64 )
//OPCODE(SignedMultiplyHigh64, U64, U64, U64 )
//OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 )
OPCODE(UnsignedDiv32, U32, U32, U32 )
OPCODE(UnsignedDiv64, U64, U64, U64 )
OPCODE(SignedDiv32, U32, U32, U32 )
OPCODE(SignedDiv64, U64, U64, U64 )
OPCODE(And32, U32, U32, U32 )
OPCODE(And64, U64, U64, U64 )
OPCODE(Eor32, U32, U32, U32 )
OPCODE(Eor64, U64, U64, U64 )
OPCODE(Or32, U32, U32, U32 )
OPCODE(Or64, U64, U64, U64 )
OPCODE(Not32, U32, U32 )
OPCODE(Not64, U64, U64 )
OPCODE(SignExtendByteToWord, U32, U8 )
OPCODE(SignExtendHalfToWord, U32, U16 )
OPCODE(SignExtendByteToLong, U64, U8 )
OPCODE(SignExtendHalfToLong, U64, U16 )
OPCODE(SignExtendWordToLong, U64, U32 )
OPCODE(ZeroExtendByteToWord, U32, U8 )
OPCODE(ZeroExtendHalfToWord, U32, U16 )
OPCODE(ZeroExtendByteToLong, U64, U8 )
OPCODE(ZeroExtendHalfToLong, U64, U16 )
OPCODE(ZeroExtendWordToLong, U64, U32 )
//OPCODE(ZeroExtendLongToQuad, U128, U64 )
//OPCODE(ByteReverseDual, U64, U64 )
OPCODE(ByteReverseWord, U32, U32 )
OPCODE(ByteReverseHalf, U16, U16 )
OPCODE(CountLeadingZeros32, U32, U32 )
OPCODE(CountLeadingZeros64, U64, U64 )
//OPCODE(ExtractRegister32, U32, U32, U32, U8 )
//OPCODE(ExtractRegister64, U64, U64, U64, U8 )
//OPCODE(MaxSigned32, U32, U32, U32 )
//OPCODE(MaxSigned64, U64, U64, U64 )
//OPCODE(MaxUnsigned32, U32, U32, U32 )
//OPCODE(MaxUnsigned64, U64, U64, U64 )
//OPCODE(MinSigned32, U32, U32, U32 )
//OPCODE(MinSigned64, U64, U64, U64 )
//OPCODE(MinUnsigned32, U32, U32, U32 )
//OPCODE(MinUnsigned64, U64, U64, U64 )
// Saturated instructions
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 )
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 )
OPCODE(SignedSaturatedSub8, U8, U8, U8 )
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
OPCODE(SignedSaturation, U32, U32, U8 )
//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 )
//OPCODE(UnsignedSaturatedSub8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
OPCODE(UnsignedSaturation, U32, U32, U8 )
// Packed instructions
OPCODE(PackedAddU8, U32, U32, U32 )
OPCODE(PackedAddS8, U32, U32, U32 )
OPCODE(PackedSubU8, U32, U32, U32 )
OPCODE(PackedSubS8, U32, U32, U32 )
OPCODE(PackedAddU16, U32, U32, U32 )
OPCODE(PackedAddS16, U32, U32, U32 )
OPCODE(PackedSubU16, U32, U32, U32 )
OPCODE(PackedSubS16, U32, U32, U32 )
OPCODE(PackedAddSubU16, U32, U32, U32 )
OPCODE(PackedAddSubS16, U32, U32, U32 )
OPCODE(PackedSubAddU16, U32, U32, U32 )
OPCODE(PackedSubAddS16, U32, U32, U32 )
OPCODE(PackedHalvingAddU8, U32, U32, U32 )
OPCODE(PackedHalvingAddS8, U32, U32, U32 )
OPCODE(PackedHalvingSubU8, U32, U32, U32 )
OPCODE(PackedHalvingSubS8, U32, U32, U32 )
OPCODE(PackedHalvingAddU16, U32, U32, U32 )
OPCODE(PackedHalvingAddS16, U32, U32, U32 )
OPCODE(PackedHalvingSubU16, U32, U32, U32 )
OPCODE(PackedHalvingSubS16, U32, U32, U32 )
OPCODE(PackedHalvingAddSubU16, U32, U32, U32 )
OPCODE(PackedHalvingAddSubS16, U32, U32, U32 )
OPCODE(PackedHalvingSubAddU16, U32, U32, U32 )
OPCODE(PackedHalvingSubAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
OPCODE(PackedSaturatedSubU8, U32, U32, U32 )
OPCODE(PackedSaturatedSubS8, U32, U32, U32 )
OPCODE(PackedSaturatedAddU16, U32, U32, U32 )
OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
OPCODE(PackedAbsDiffSumS8, U32, U32, U32 )
OPCODE(PackedSelect, U32, U32, U32, U32 )
// CRC instructions
//OPCODE(CRC32Castagnoli8, U32, U32, U32 )
//OPCODE(CRC32Castagnoli16, U32, U32, U32 )
//OPCODE(CRC32Castagnoli32, U32, U32, U32 )
//OPCODE(CRC32Castagnoli64, U32, U32, U64 )
//OPCODE(CRC32ISO8, U32, U32, U32 )
//OPCODE(CRC32ISO16, U32, U32, U32 )
//OPCODE(CRC32ISO32, U32, U32, U32 )
//OPCODE(CRC32ISO64, U32, U32, U64 )
// AES instructions
//OPCODE(AESDecryptSingleRound, U128, U128 )
//OPCODE(AESEncryptSingleRound, U128, U128 )
//OPCODE(AESInverseMixColumns, U128, U128 )
//OPCODE(AESMixColumns, U128, U128 )
// SM4 instructions
//OPCODE(SM4AccessSubstitutionBox, U8, U8 )
// Vector instructions
//OPCODE(VectorGetElement8, U8, U128, U8 )
//OPCODE(VectorGetElement16, U16, U128, U8 )
//OPCODE(VectorGetElement32, U32, U128, U8 )
//OPCODE(VectorGetElement64, U64, U128, U8 )
//OPCODE(VectorSetElement8, U128, U128, U8, U8 )
//OPCODE(VectorSetElement16, U128, U128, U8, U16 )
//OPCODE(VectorSetElement32, U128, U128, U8, U32 )
//OPCODE(VectorSetElement64, U128, U128, U8, U64 )
//OPCODE(VectorAbs8, U128, U128 )
//OPCODE(VectorAbs16, U128, U128 )
//OPCODE(VectorAbs32, U128, U128 )
//OPCODE(VectorAbs64, U128, U128 )
//OPCODE(VectorAdd8, U128, U128, U128 )
//OPCODE(VectorAdd16, U128, U128, U128 )
//OPCODE(VectorAdd32, U128, U128, U128 )
//OPCODE(VectorAdd64, U128, U128, U128 )
//OPCODE(VectorAnd, U128, U128, U128 )
//OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 )
//OPCODE(VectorArithmeticVShift8, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift16, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift32, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift64, U128, U128, U128 )
//OPCODE(VectorBroadcastLower8, U128, U8 )
//OPCODE(VectorBroadcastLower16, U128, U16 )
//OPCODE(VectorBroadcastLower32, U128, U32 )
//OPCODE(VectorBroadcast8, U128, U8 )
//OPCODE(VectorBroadcast16, U128, U16 )
//OPCODE(VectorBroadcast32, U128, U32 )
//OPCODE(VectorBroadcast64, U128, U64 )
//OPCODE(VectorCountLeadingZeros8, U128, U128 )
//OPCODE(VectorCountLeadingZeros16, U128, U128 )
//OPCODE(VectorCountLeadingZeros32, U128, U128 )
//OPCODE(VectorDeinterleaveEven8, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven16, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven32, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven64, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 )
//OPCODE(VectorEor, U128, U128, U128 )
//OPCODE(VectorEqual8, U128, U128, U128 )
//OPCODE(VectorEqual16, U128, U128, U128 )
//OPCODE(VectorEqual32, U128, U128, U128 )
//OPCODE(VectorEqual64, U128, U128, U128 )
//OPCODE(VectorEqual128, U128, U128, U128 )
//OPCODE(VectorExtract, U128, U128, U128, U8 )
//OPCODE(VectorExtractLower, U128, U128, U128, U8 )
//OPCODE(VectorGreaterS8, U128, U128, U128 )
//OPCODE(VectorGreaterS16, U128, U128, U128 )
//OPCODE(VectorGreaterS32, U128, U128, U128 )
//OPCODE(VectorGreaterS64, U128, U128, U128 )
//OPCODE(VectorHalvingAddS8, U128, U128, U128 )
//OPCODE(VectorHalvingAddS16, U128, U128, U128 )
//OPCODE(VectorHalvingAddS32, U128, U128, U128 )
//OPCODE(VectorHalvingAddU8, U128, U128, U128 )
//OPCODE(VectorHalvingAddU16, U128, U128, U128 )
//OPCODE(VectorHalvingAddU32, U128, U128, U128 )
//OPCODE(VectorHalvingSubS8, U128, U128, U128 )
//OPCODE(VectorHalvingSubS16, U128, U128, U128 )
//OPCODE(VectorHalvingSubS32, U128, U128, U128 )
//OPCODE(VectorHalvingSubU8, U128, U128, U128 )
//OPCODE(VectorHalvingSubU16, U128, U128, U128 )
//OPCODE(VectorHalvingSubU32, U128, U128, U128 )
//OPCODE(VectorInterleaveLower8, U128, U128, U128 )
//OPCODE(VectorInterleaveLower16, U128, U128, U128 )
//OPCODE(VectorInterleaveLower32, U128, U128, U128 )
//OPCODE(VectorInterleaveLower64, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper8, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper16, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper32, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper64, U128, U128, U128 )
//OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight8, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight16, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight32, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight64, U128, U128, U8 )
//OPCODE(VectorLogicalVShift8, U128, U128, U128 )
//OPCODE(VectorLogicalVShift16, U128, U128, U128 )
//OPCODE(VectorLogicalVShift32, U128, U128, U128 )
//OPCODE(VectorLogicalVShift64, U128, U128, U128 )
//OPCODE(VectorMaxS8, U128, U128, U128 )
//OPCODE(VectorMaxS16, U128, U128, U128 )
//OPCODE(VectorMaxS32, U128, U128, U128 )
//OPCODE(VectorMaxS64, U128, U128, U128 )
//OPCODE(VectorMaxU8, U128, U128, U128 )
//OPCODE(VectorMaxU16, U128, U128, U128 )
//OPCODE(VectorMaxU32, U128, U128, U128 )
//OPCODE(VectorMaxU64, U128, U128, U128 )
//OPCODE(VectorMinS8, U128, U128, U128 )
//OPCODE(VectorMinS16, U128, U128, U128 )
//OPCODE(VectorMinS32, U128, U128, U128 )
//OPCODE(VectorMinS64, U128, U128, U128 )
//OPCODE(VectorMinU8, U128, U128, U128 )
//OPCODE(VectorMinU16, U128, U128, U128 )
//OPCODE(VectorMinU32, U128, U128, U128 )
//OPCODE(VectorMinU64, U128, U128, U128 )
//OPCODE(VectorMultiply8, U128, U128, U128 )
//OPCODE(VectorMultiply16, U128, U128, U128 )
//OPCODE(VectorMultiply32, U128, U128, U128 )
//OPCODE(VectorMultiply64, U128, U128, U128 )
//OPCODE(VectorNarrow16, U128, U128 )
//OPCODE(VectorNarrow32, U128, U128 )
//OPCODE(VectorNarrow64, U128, U128 )
//OPCODE(VectorNot, U128, U128 )
//OPCODE(VectorOr, U128, U128, U128 )
//OPCODE(VectorPairedAddLower8, U128, U128, U128 )
//OPCODE(VectorPairedAddLower16, U128, U128, U128 )
//OPCODE(VectorPairedAddLower32, U128, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden8, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden16, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden32, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 )
//OPCODE(VectorPairedAdd8, U128, U128, U128 )
//OPCODE(VectorPairedAdd16, U128, U128, U128 )
//OPCODE(VectorPairedAdd32, U128, U128, U128 )
//OPCODE(VectorPairedAdd64, U128, U128, U128 )
//OPCODE(VectorPairedMaxS8, U128, U128, U128 )
//OPCODE(VectorPairedMaxS16, U128, U128, U128 )
//OPCODE(VectorPairedMaxS32, U128, U128, U128 )
//OPCODE(VectorPairedMaxU8, U128, U128, U128 )
//OPCODE(VectorPairedMaxU16, U128, U128, U128 )
//OPCODE(VectorPairedMaxU32, U128, U128, U128 )
//OPCODE(VectorPairedMinS8, U128, U128, U128 )
//OPCODE(VectorPairedMinS16, U128, U128, U128 )
//OPCODE(VectorPairedMinS32, U128, U128, U128 )
//OPCODE(VectorPairedMinU8, U128, U128, U128 )
//OPCODE(VectorPairedMinU16, U128, U128, U128 )
//OPCODE(VectorPairedMinU32, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiply8, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )
//OPCODE(VectorPopulationCount, U128, U128 )
//OPCODE(VectorReverseBits, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
//OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 )
//OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 )
//OPCODE(VectorShuffleWords, U128, U128, U8 )
//OPCODE(VectorSignExtend8, U128, U128 )
//OPCODE(VectorSignExtend16, U128, U128 )
//OPCODE(VectorSignExtend32, U128, U128 )
//OPCODE(VectorSignExtend64, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 )
//OPCODE(VectorSignedMultiply16, Void, U128, U128 )
//OPCODE(VectorSignedMultiply32, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs8, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs16, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs32, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs64, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg8, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 )
//OPCODE(VectorSub8, U128, U128, U128 )
//OPCODE(VectorSub16, U128, U128, U128 )
//OPCODE(VectorSub32, U128, U128, U128 )
//OPCODE(VectorSub64, U128, U128, U128 )
//OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
//OPCODE(VectorTableLookup, U128, U128, Table, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
//OPCODE(VectorUnsignedMultiply16, Void, U128, U128 )
//OPCODE(VectorUnsignedMultiply32, Void, U128, U128 )
//OPCODE(VectorUnsignedRecipEstimate, U128, U128 )
//OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned8, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned16, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned32, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 )
//OPCODE(VectorZeroExtend8, U128, U128 )
//OPCODE(VectorZeroExtend16, U128, U128 )
//OPCODE(VectorZeroExtend32, U128, U128 )
//OPCODE(VectorZeroExtend64, U128, U128 )
//OPCODE(VectorZeroUpper, U128, U128 )
//OPCODE(ZeroVector, U128, )
// Floating-point operations
//OPCODE(FPAbs16, U16, U16 )
OPCODE(FPAbs32, U32, U32 )
OPCODE(FPAbs64, U64, U64 )
OPCODE(FPAdd32, U32, U32, U32 )
OPCODE(FPAdd64, U64, U64, U64 )
OPCODE(FPCompare32, NZCV, U32, U32, U1 )
OPCODE(FPCompare64, NZCV, U64, U64, U1 )
OPCODE(FPDiv32, U32, U32, U32 )
OPCODE(FPDiv64, U64, U64, U64 )
//OPCODE(FPMax32, U32, U32, U32 )
//OPCODE(FPMax64, U64, U64, U64 )
//OPCODE(FPMaxNumeric32, U32, U32, U32 )
//OPCODE(FPMaxNumeric64, U64, U64, U64 )
//OPCODE(FPMin32, U32, U32, U32 )
//OPCODE(FPMin64, U64, U64, U64 )
//OPCODE(FPMinNumeric32, U32, U32, U32 )
//OPCODE(FPMinNumeric64, U64, U64, U64 )
OPCODE(FPMul32, U32, U32, U32 )
OPCODE(FPMul64, U64, U64, U64 )
//OPCODE(FPMulAdd16, U16, U16, U16, U16 )
//OPCODE(FPMulAdd32, U32, U32, U32, U32 )
//OPCODE(FPMulAdd64, U64, U64, U64, U64 )
//OPCODE(FPMulX32, U32, U32, U32 )
//OPCODE(FPMulX64, U64, U64, U64 )
//OPCODE(FPNeg16, U16, U16 )
OPCODE(FPNeg32, U32, U32 )
OPCODE(FPNeg64, U64, U64 )
//OPCODE(FPRecipEstimate16, U16, U16 )
//OPCODE(FPRecipEstimate32, U32, U32 )
//OPCODE(FPRecipEstimate64, U64, U64 )
//OPCODE(FPRecipExponent16, U16, U16 )
//OPCODE(FPRecipExponent32, U32, U32 )
//OPCODE(FPRecipExponent64, U64, U64 )
//OPCODE(FPRecipStepFused16, U16, U16, U16 )
//OPCODE(FPRecipStepFused32, U32, U32, U32 )
//OPCODE(FPRecipStepFused64, U64, U64, U64 )
//OPCODE(FPRoundInt16, U16, U16, U8, U1 )
//OPCODE(FPRoundInt32, U32, U32, U8, U1 )
//OPCODE(FPRoundInt64, U64, U64, U8, U1 )
//OPCODE(FPRSqrtEstimate16, U16, U16 )
//OPCODE(FPRSqrtEstimate32, U32, U32 )
//OPCODE(FPRSqrtEstimate64, U64, U64 )
//OPCODE(FPRSqrtStepFused16, U16, U16, U16 )
//OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
//OPCODE(FPRSqrtStepFused64, U64, U64, U64 )
OPCODE(FPSqrt32, U32, U32 )
OPCODE(FPSqrt64, U64, U64 )
OPCODE(FPSub32, U32, U32, U32 )
OPCODE(FPSub64, U64, U64, U64 )
// Floating-point conversions
OPCODE(FPHalfToDouble, U64, U16, U8 )
OPCODE(FPHalfToSingle, U32, U16, U8 )
OPCODE(FPSingleToDouble, U64, U32, U8 )
OPCODE(FPSingleToHalf, U16, U32, U8 )
OPCODE(FPDoubleToHalf, U16, U64, U8 )
OPCODE(FPDoubleToSingle, U32, U64, U8 )
OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
//OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 )
//OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 )
//OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 )
//OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 )
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 )
OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 )
OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 )
OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 )
OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 )
OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 )
OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 )
OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 )
OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 )
// Floating-point vector instructions
//OPCODE(FPVectorAbs16, U128, U128 )
//OPCODE(FPVectorAbs32, U128, U128 )
//OPCODE(FPVectorAbs64, U128, U128 )
//OPCODE(FPVectorAdd32, U128, U128, U128 )
//OPCODE(FPVectorAdd64, U128, U128, U128 )
//OPCODE(FPVectorDiv32, U128, U128, U128 )
//OPCODE(FPVectorDiv64, U128, U128, U128 )
//OPCODE(FPVectorEqual32, U128, U128, U128 )
//OPCODE(FPVectorEqual64, U128, U128, U128 )
//OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorGreater32, U128, U128, U128 )
//OPCODE(FPVectorGreater64, U128, U128, U128 )
//OPCODE(FPVectorGreaterEqual32, U128, U128, U128 )
//OPCODE(FPVectorGreaterEqual64, U128, U128, U128 )
//OPCODE(FPVectorMax32, U128, U128, U128 )
//OPCODE(FPVectorMax64, U128, U128, U128 )
//OPCODE(FPVectorMin32, U128, U128, U128 )
//OPCODE(FPVectorMin64, U128, U128, U128 )
//OPCODE(FPVectorMul32, U128, U128, U128 )
//OPCODE(FPVectorMul64, U128, U128, U128 )
//OPCODE(FPVectorMulAdd16, U128, U128, U128, U128 )
//OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 )
//OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 )
//OPCODE(FPVectorMulX32, U128, U128, U128 )
//OPCODE(FPVectorMulX64, U128, U128, U128 )
//OPCODE(FPVectorNeg16, U128, U128 )
//OPCODE(FPVectorNeg32, U128, U128 )
//OPCODE(FPVectorNeg64, U128, U128 )
//OPCODE(FPVectorPairedAdd32, U128, U128, U128 )
//OPCODE(FPVectorPairedAdd64, U128, U128, U128 )
//OPCODE(FPVectorPairedAddLower32, U128, U128, U128 )
//OPCODE(FPVectorPairedAddLower64, U128, U128, U128 )
//OPCODE(FPVectorRecipEstimate16, U128, U128 )
//OPCODE(FPVectorRecipEstimate32, U128, U128 )
//OPCODE(FPVectorRecipEstimate64, U128, U128 )
//OPCODE(FPVectorRecipStepFused16, U128, U128, U128 )
//OPCODE(FPVectorRecipStepFused32, U128, U128, U128 )
//OPCODE(FPVectorRecipStepFused64, U128, U128, U128 )
//OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
//OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
//OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
//OPCODE(FPVectorRSqrtEstimate16, U128, U128 )
//OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
//OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 )
//OPCODE(FPVectorSqrt32, U128, U128 )
//OPCODE(FPVectorSqrt64, U128, U128 )
//OPCODE(FPVectorSub32, U128, U128, U128 )
//OPCODE(FPVectorSub64, U128, U128, U128 )
//OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
//OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
// A32 Memory access
A32OPC(ClearExclusive, Void, )
A32OPC(SetExclusive, Void, U32, U8 )
A32OPC(ReadMemory8, U8, U32 )
A32OPC(ReadMemory16, U16, U32 )
A32OPC(ReadMemory32, U32, U32 )
A32OPC(ReadMemory64, U64, U32 )
A32OPC(WriteMemory8, Void, U32, U8 )
A32OPC(WriteMemory16, Void, U32, U16 )
A32OPC(WriteMemory32, Void, U32, U32 )
A32OPC(WriteMemory64, Void, U32, U64 )
A32OPC(ExclusiveWriteMemory8, U32, U32, U8 )
A32OPC(ExclusiveWriteMemory16, U32, U32, U16 )
A32OPC(ExclusiveWriteMemory32, U32, U32, U32 )
A32OPC(ExclusiveWriteMemory64, U32, U32, U64 )
// A64 Memory access
//A64OPC(ClearExclusive, Void, )
//A64OPC(SetExclusive, Void, U64, U8 )
//A64OPC(ReadMemory8, U8, U64 )
//A64OPC(ReadMemory16, U16, U64 )
//A64OPC(ReadMemory32, U32, U64 )
//A64OPC(ReadMemory64, U64, U64 )
//A64OPC(ReadMemory128, U128, U64 )
//A64OPC(WriteMemory8, Void, U64, U8 )
//A64OPC(WriteMemory16, Void, U64, U16 )
//A64OPC(WriteMemory32, Void, U64, U32 )
//A64OPC(WriteMemory64, Void, U64, U64 )
//A64OPC(WriteMemory128, Void, U64, U128 )
//A64OPC(ExclusiveWriteMemory8, U32, U64, U8 )
//A64OPC(ExclusiveWriteMemory16, U32, U64, U16 )
//A64OPC(ExclusiveWriteMemory32, U32, U64, U32 )
//A64OPC(ExclusiveWriteMemory64, U32, U64, U64 )
//A64OPC(ExclusiveWriteMemory128, U32, U64, U128 )
// Coprocessor
A32OPC(CoprocInternalOperation, Void, CoprocInfo )
A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 )
A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 )
A32OPC(CoprocGetOneWord, U32, CoprocInfo )
A32OPC(CoprocGetTwoWords, U64, CoprocInfo )
A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 )
A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )

View File

@ -0,0 +1,89 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstddef>
#include <string>
#ifdef __linux__
#include <cstdio>
#include <cstdlib>
#include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace {
std::mutex mutex;
std::FILE* file = nullptr;
void OpenFile() {
const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
if (!perf_dir) {
file = nullptr;
return;
}
const pid_t pid = getpid();
const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
file = std::fopen(filename.c_str(), "w");
if (!file) {
return;
}
std::setvbuf(file, nullptr, _IONBF, 0);
}
} // anonymous namespace
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
std::lock_guard guard{mutex};
if (!file) {
OpenFile();
if (!file) {
return;
}
}
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
}
} // namespace detail
void PerfMapClear() {
std::lock_guard guard{mutex};
if (!file) {
return;
}
std::fclose(file);
file = nullptr;
OpenFile();
}
} // namespace Dynarmic::BackendX64
#else
namespace Dynarmic::BackendA64 {
namespace detail {
void PerfMapRegister(const void*, const void*, const std::string&) {}
} // namespace detail
void PerfMapClear() {}
} // namespace Dynarmic::BackendX64
#endif

View File

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
#include <string>
#include "common/cast_util.h"
namespace Dynarmic::BackendA64 {
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
} // namespace detail
template<typename T>
void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
}
void PerfMapClear();
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,650 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <algorithm>
#include <numeric>
#include <utility>
#include <fmt/ostream.h>
#include "backend/A64/abi.h"
#include "backend/A64/reg_alloc.h"
#include "common/assert.h"
namespace Dynarmic::BackendA64 {
static u64 ImmediateToU64(const IR::Value& imm) {
switch (imm.GetType()) {
case IR::Type::U1:
return u64(imm.GetU1());
case IR::Type::U8:
return u64(imm.GetU8());
case IR::Type::U16:
return u64(imm.GetU16());
case IR::Type::U32:
return u64(imm.GetU32());
case IR::Type::U64:
return u64(imm.GetU64());
default:
ASSERT_FALSE("This should never happen.");
}
}
static bool CanExchange(HostLoc a, HostLoc b) {
return HostLocIsGPR(a) && HostLocIsGPR(b);
}
// Minimum number of bits required to represent a type
static size_t GetBitWidth(IR::Type type) {
switch (type) {
case IR::Type::A32Reg:
case IR::Type::A32ExtReg:
case IR::Type::A64Reg:
case IR::Type::A64Vec:
case IR::Type::CoprocInfo:
case IR::Type::Cond:
case IR::Type::Void:
case IR::Type::Table:
ASSERT_FALSE("Type {} cannot be represented at runtime", type);
return 0;
case IR::Type::Opaque:
ASSERT_FALSE("Not a concrete type");
return 0;
case IR::Type::U1:
return 8;
case IR::Type::U8:
return 8;
case IR::Type::U16:
return 16;
case IR::Type::U32:
return 32;
case IR::Type::U64:
return 64;
case IR::Type::U128:
return 128;
case IR::Type::NZCVFlags:
return 32; // TODO: Update to 16 when flags optimization is done
}
UNREACHABLE();
return 0;
}
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
bool HostLocInfo::IsLocked() const {
return is_being_used_count > 0;
}
bool HostLocInfo::IsEmpty() const {
return is_being_used_count == 0 && values.empty();
}
bool HostLocInfo::IsLastUse() const {
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
}
void HostLocInfo::ReadLock() {
ASSERT(!is_scratch);
is_being_used_count++;
}
void HostLocInfo::WriteLock() {
ASSERT(is_being_used_count == 0);
is_being_used_count++;
is_scratch = true;
}
void HostLocInfo::AddArgReference() {
current_references++;
ASSERT(accumulated_uses + current_references <= total_uses);
}
void HostLocInfo::ReleaseOne() {
is_being_used_count--;
is_scratch = false;
if (current_references == 0)
return;
accumulated_uses++;
current_references--;
if (current_references == 0)
ReleaseAll();
}
void HostLocInfo::ReleaseAll() {
accumulated_uses += current_references;
current_references = 0;
ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
if (total_uses == accumulated_uses) {
values.clear();
accumulated_uses = 0;
total_uses = 0;
max_bit_width = 0;
}
is_being_used_count = 0;
is_scratch = false;
}
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
return std::find(values.begin(), values.end(), inst) != values.end();
}
size_t HostLocInfo::GetMaxBitWidth() const {
return max_bit_width;
}
void HostLocInfo::AddValue(IR::Inst* inst) {
values.push_back(inst);
total_uses += inst->UseCount();
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::IsVoid() const {
return GetType() == IR::Type::Void;
}
bool Argument::FitsInImmediateU32() const {
if (!IsImmediate())
return false;
u64 imm = ImmediateToU64(value);
return imm < 0x100000000;
}
bool Argument::FitsInImmediateS32() const {
if (!IsImmediate())
return false;
s64 imm = static_cast<s64>(ImmediateToU64(value));
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
}
bool Argument::GetImmediateU1() const {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateS32() const {
ASSERT(FitsInImmediateS32());
u64 imm = ImmediateToU64(value);
return imm;
}
u64 Argument::GetImmediateU64() const {
return ImmediateToU64(value);
}
IR::Cond Argument::GetImmediateCond() const {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
bool Argument::IsInGpr() const {
if (IsImmediate())
return false;
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
}
bool Argument::IsInFpr() const {
if (IsImmediate())
return false;
return HostLocIsFPR(*reg_alloc.ValueLocation(value.GetInst()));
}
bool Argument::IsInMemory() const {
if (IsImmediate())
return false;
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value& arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
}
}
return ret;
}
Arm64Gen::ARM64Reg RegAlloc::UseGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, any_gpr));
}
Arm64Gen::ARM64Reg RegAlloc::UseFpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToFpr(UseImpl(arg.value, any_fpr));
}
//OpArg RegAlloc::UseOpArg(Argument& arg) {
// return UseGpr(arg);
//}
void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
}
Arm64Gen::ARM64Reg RegAlloc::UseScratchGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
}
Arm64Gen::ARM64Reg RegAlloc::UseScratchFpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToFpr(UseScratchImpl(arg.value, any_fpr));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseScratchImpl(arg.value, {host_loc});
}
void RegAlloc::DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg) {
ASSERT(IsVector(reg) || IsGPR(reg));
HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
DefineValueImpl(inst, hostloc);
}
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
DefineValueImpl(inst, arg.value);
}
void RegAlloc::Release(const Arm64Gen::ARM64Reg& reg) {
ASSERT(IsVector(reg) || IsGPR(reg));
const HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
LocInfo(hostloc).ReleaseOne();
}
Arm64Gen::ARM64Reg RegAlloc::ScratchGpr(HostLocList desired_locations) {
return HostLocToReg64(ScratchImpl(desired_locations));
}
Arm64Gen::ARM64Reg RegAlloc::ScratchFpr(HostLocList desired_locations) {
return HostLocToFpr(ScratchImpl(desired_locations));
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location) {
LocInfo(current_location).ReadLock();
return current_location;
}
if (LocInfo(current_location).IsLocked()) {
return UseScratchImpl(use_value, desired_locations);
}
const HostLoc destination_location = SelectARegister(desired_locations);
if (max_bit_width > HostLocBitWidth(destination_location)) {
return UseScratchImpl(use_value, desired_locations);
} else if (CanExchange(destination_location, current_location)) {
Exchange(destination_location, current_location);
} else {
MoveOutOfTheWay(destination_location);
Move(destination_location, current_location);
}
LocInfo(destination_location).ReadLock();
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t bit_width = GetBitWidth(use_inst->GetType());
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
if (!LocInfo(current_location).IsLastUse()) {
MoveOutOfTheWay(current_location);
}
LocInfo(current_location).WriteLock();
return current_location;
}
const HostLoc destination_location = SelectARegister(desired_locations);
MoveOutOfTheWay(destination_location);
CopyToScratch(bit_width, destination_location, current_location);
LocInfo(destination_location).WriteLock();
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
LocInfo(location).WriteLock();
return location;
}
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0,
std::optional<Argument::copyable_reference> arg1,
std::optional<Argument::copyable_reference> arg2,
std::optional<Argument::copyable_reference> arg3,
std::optional<Argument::copyable_reference> arg4,
std::optional<Argument::copyable_reference> arg5,
std::optional<Argument::copyable_reference> arg6,
std::optional<Argument::copyable_reference> arg7) {
constexpr size_t args_count = 8;
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 };
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7};
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
for (auto hostloc : args_hostloc)
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
return ret;
}();
for (size_t i = 0; i < args_count; i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
}
}
for (size_t i = 0; i < args_count; i++) {
if (!args[i]) {
// TODO: Force spill
ScratchGpr({args_hostloc[i]});
}
}
for (HostLoc caller_saved : other_caller_save) {
ScratchImpl({caller_saved});
}
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
}
void RegAlloc::EndOfAllocScope() {
for (auto& iter : hostloc_info) {
iter.ReleaseAll();
}
}
void RegAlloc::AssertNoMoreUses() {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
}
HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
std::vector<HostLoc> candidates = desired_locations;
// Find all locations that have not been allocated..
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return !this->LocInfo(loc).IsLocked();
});
candidates.erase(allocated_locs, candidates.end());
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
// Selects the best location out of the available locations.
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return this->LocInfo(loc).IsEmpty();
});
return candidates.front();
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
for (size_t i = 0; i < hostloc_info.size(); i++)
if (hostloc_info[i].ContainsValue(value))
return static_cast<HostLoc>(i);
return std::nullopt;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
LocInfo(host_loc).AddValue(def_inst);
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
if (use_inst.IsImmediate()) {
HostLoc location = ScratchImpl(any_gpr);
DefineValueImpl(def_inst, location);
LoadImmediate(use_inst, location);
return;
}
ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
HostLoc location = *ValueLocation(use_inst.GetInst());
DefineValueImpl(def_inst, location);
}
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
Arm64Gen::ARM64Reg reg = HostLocToReg64(host_loc);
u64 imm_value = ImmediateToU64(imm);
code.MOVI2R(reg, imm_value);
return host_loc;
}
if (HostLocIsFPR(host_loc)) {
Arm64Gen::ARM64Reg reg = Arm64Gen::EncodeRegToDouble(HostLocToFpr(host_loc));
u64 imm_value = ImmediateToU64(imm);
if (imm_value == 0)
code.fp_emitter.FMOV(reg, 0);
else {
code.EmitPatchLDR(reg, imm_value);
}
return host_loc;
}
UNREACHABLE();
}
void RegAlloc::Move(HostLoc to, HostLoc from) {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
ASSERT(bit_width <= HostLocBitWidth(to));
if (LocInfo(from).IsEmpty()) {
return;
}
EmitMove(bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
EmitMove(bit_width, to, from);
}
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
if (LocInfo(a).IsEmpty()) {
Move(a, b);
return;
}
if (LocInfo(b).IsEmpty()) {
Move(b, a);
return;
}
EmitExchange(a, b);
std::swap(LocInfo(a), LocInfo(b));
}
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
ASSERT(!LocInfo(reg).IsLocked());
if (!LocInfo(reg).IsEmpty()) {
SpillRegister(reg);
}
}
void RegAlloc::SpillRegister(HostLoc loc) {
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
HostLoc new_loc = FindFreeSpill();
Move(new_loc, loc);
}
HostLoc RegAlloc::FindFreeSpill() const {
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
HostLoc loc = static_cast<HostLoc>(i);
if (LocInfo(loc).IsEmpty())
return loc;
}
ASSERT_FALSE("All spill locations are full");
}
HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
return hostloc_info[static_cast<size_t>(loc)];
}
const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
return hostloc_info[static_cast<size_t>(loc)];
}
void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
if (HostLocIsFPR(to) && HostLocIsFPR(from)) {
// bit_width == 128
//mov(HostLocToFpr(to), HostLocToFpr(from));
ASSERT_FALSE("Unimplemented");
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.MOV(HostLocToReg64(to), HostLocToReg64(from));
} else {
code.MOV(DecodeReg(HostLocToReg64(to)), DecodeReg(HostLocToReg64(from)));
}
} else if (HostLocIsFPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.fp_emitter.FMOV(EncodeRegToDouble(HostLocToFpr(to)), HostLocToReg64(from));
} else {
code.fp_emitter.FMOV(EncodeRegToSingle(HostLocToFpr(to)), DecodeReg(HostLocToReg64(from)));
}
} else if (HostLocIsGPR(to) && HostLocIsFPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.fp_emitter.FMOV(HostLocToReg64(to), EncodeRegToDouble(HostLocToFpr(from)));
} else {
code.fp_emitter.FMOV(DecodeReg(HostLocToReg64(to)), EncodeRegToSingle(HostLocToFpr(from)));
}
} else if (HostLocIsFPR(to) && HostLocIsSpill(from)) {
s32 spill_addr = spill_to_addr(from);
// ASSERT(spill_addr.getBit() >= bit_width);
code.fp_emitter.LDR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(to), Arm64Gen::X28, spill_addr);
} else if (HostLocIsSpill(to) && HostLocIsFPR(from)) {
s32 spill_addr = spill_to_addr(to);
// ASSERT(spill_addr.getBit() >= bit_width);
code.fp_emitter.STR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(from), Arm64Gen::X28, spill_addr);
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.LDR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(to), Arm64Gen::X28, spill_to_addr(from));
} else {
code.LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(to)), Arm64Gen::X28, spill_to_addr(from));
}
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.STR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(from), Arm64Gen::X28, spill_to_addr(to));
} else {
code.STR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(from)), Arm64Gen::X28, spill_to_addr(to));
}
} else {
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
}
}
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
// Is this the best way to do it?
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
code.EOR(HostLocToReg64(b), HostLocToReg64(a), HostLocToReg64(b));
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
} else if (HostLocIsFPR(a) && HostLocIsFPR(b)) {
ASSERT_FALSE("Check your code: Exchanging XMM registers is unnecessary");
} else {
ASSERT_FALSE("Invalid RegAlloc::EmitExchange");
}
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,167 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <functional>
#include <utility>
#include <vector>
#include <optional>
#include "backend/A64/block_of_code.h"
#include "backend/A64/hostloc.h"
//#include "backend/A64/oparg.h"
#include "common/common_types.h"
#include "frontend/ir/cond.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/value.h"
namespace Dynarmic::BackendA64 {
class RegAlloc;
struct HostLocInfo {
public:
bool IsLocked() const;
bool IsEmpty() const;
bool IsLastUse() const;
void ReadLock();
void WriteLock();
void AddArgReference();
void ReleaseOne();
void ReleaseAll();
bool ContainsValue(const IR::Inst* inst) const;
size_t GetMaxBitWidth() const;
void AddValue(IR::Inst* inst);
private:
// Current instruction state
size_t is_being_used_count = 0;
bool is_scratch = false;
// Block state
size_t current_references = 0;
size_t accumulated_uses = 0;
size_t total_uses = 0;
// Value state
std::vector<IR::Inst*> values;
size_t max_bit_width = 0;
};
struct Argument {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsImmediate() const;
bool IsVoid() const;
bool FitsInImmediateU32() const;
bool FitsInImmediateS32() const;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateS32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
/// Is this value currently in a GPR?
bool IsInGpr() const;
/// Is this value currently in a FPR?
bool IsInFpr() const;
/// Is this value currently in memory?
bool IsInMemory() const;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
};
class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<u64(HostLoc)> spill_to_addr)
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
Arm64Gen::ARM64Reg UseGpr(Argument& arg);
Arm64Gen::ARM64Reg UseFpr(Argument& arg);
//OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
Arm64Gen::ARM64Reg UseScratchGpr(Argument& arg);
Arm64Gen::ARM64Reg UseScratchFpr(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
void DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg);
void DefineValue(IR::Inst* inst, Argument& arg);
void Release(const Arm64Gen::ARM64Reg& reg);
Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr);
Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr);
void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {},
std::optional<Argument::copyable_reference> arg2 = {},
std::optional<Argument::copyable_reference> arg3 = {},
std::optional<Argument::copyable_reference> arg4 = {},
std::optional<Argument::copyable_reference> arg5 = {},
std::optional<Argument::copyable_reference> arg6 = {},
std::optional<Argument::copyable_reference> arg7 = {});
// TODO: Values in host flags
void EndOfAllocScope();
void AssertNoMoreUses();
private:
friend struct Argument;
HostLoc SelectARegister(HostLocList desired_locations) const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc ScratchImpl(HostLocList desired_locations);
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
HostLoc LoadImmediate(IR::Value imm, HostLoc reg);
void Move(HostLoc to, HostLoc from);
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
void Exchange(HostLoc a, HostLoc b);
void MoveOutOfTheWay(HostLoc reg);
void SpillRegister(HostLoc loc);
HostLoc FindFreeSpill() const;
std::vector<HostLocInfo> hostloc_info;
HostLocInfo& LocInfo(HostLoc loc);
const HostLocInfo& LocInfo(HostLoc loc) const;
BlockOfCode& code;
std::function<u32(HostLoc)> spill_to_addr;
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);
};
} // namespace Dynarmic::BackendA64

View File

@ -44,4 +44,9 @@ u8 RecipEstimate(u64 a);
*/
u8 RecipSqrtEstimate(u64 a);
template <typename T>
constexpr bool IsPow2(T imm){
return imm > 0 && (imm & (imm - 1)) == 0;
}
} // namespace Dynarmic::Common