Debt: backport A64 backend
enable W^X on Apple silicon
This commit is contained in:
parent
97edb626c7
commit
df9d373a84
@ -8,9 +8,25 @@ if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
|
|||||||
set(MASTER_PROJECT ON)
|
set(MASTER_PROJECT ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Add the module directory to the list of paths
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||||
|
|
||||||
|
# Arch detection
|
||||||
|
include(DetectArchitecture)
|
||||||
|
if (NOT DEFINED ARCHITECTURE)
|
||||||
|
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
|
||||||
|
endif()
|
||||||
|
message(STATUS "Target architecture: ${ARCHITECTURE}")
|
||||||
|
|
||||||
|
set(REQUIRES_NO_EXECUTE_SUPPORT OFF)
|
||||||
|
# Apple Silicon chips require W^X
|
||||||
|
if(APPLE AND ARCHITECTURE STREQUAL "arm64")
|
||||||
|
set(REQUIRES_NO_EXECUTE_SUPPORT ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Dynarmic project options
|
# Dynarmic project options
|
||||||
option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON)
|
option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON)
|
||||||
option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF)
|
option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" ${REQUIRES_NO_EXECUTE_SUPPORT})
|
||||||
option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF)
|
option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF)
|
||||||
option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF)
|
option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF)
|
||||||
option(DYNARMIC_TESTS "Build tests" ${MASTER_PROJECT})
|
option(DYNARMIC_TESTS "Build tests" ${MASTER_PROJECT})
|
||||||
@ -39,9 +55,6 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
|
|||||||
message(SEND_ERROR "In-source builds are not allowed.")
|
message(SEND_ERROR "In-source builds are not allowed.")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Add the module directory to the list of paths
|
|
||||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
|
||||||
|
|
||||||
# Compiler flags
|
# Compiler flags
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
set(DYNARMIC_CXX_FLAGS
|
set(DYNARMIC_CXX_FLAGS
|
||||||
@ -105,13 +118,6 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Arch detection
|
|
||||||
include(DetectArchitecture)
|
|
||||||
if (NOT DEFINED ARCHITECTURE)
|
|
||||||
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
|
|
||||||
endif()
|
|
||||||
message(STATUS "Target architecture: ${ARCHITECTURE}")
|
|
||||||
|
|
||||||
# Include Boost
|
# Include Boost
|
||||||
if (NOT TARGET boost)
|
if (NOT TARGET boost)
|
||||||
if (NOT Boost_INCLUDE_DIRS)
|
if (NOT Boost_INCLUDE_DIRS)
|
||||||
|
@ -365,55 +365,66 @@ if (ARCHITECTURE STREQUAL "x86_64")
|
|||||||
else()
|
else()
|
||||||
target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
|
target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
elseif(ARCHITECTURE STREQUAL "arm64")
|
elseif(ARCHITECTURE STREQUAL "arm64")
|
||||||
target_link_libraries(dynarmic PRIVATE $<BUILD_INTERFACE:merry::oaknut>)
|
|
||||||
|
|
||||||
target_sources(dynarmic PRIVATE
|
target_sources(dynarmic PRIVATE
|
||||||
backend/arm64/a32_jitstate.cpp
|
backend/A64/emitter/a64_emitter.cpp
|
||||||
backend/arm64/a32_jitstate.h
|
backend/A64/emitter/a64_emitter.h
|
||||||
backend/arm64/abi.cpp
|
backend/A64/emitter/arm_common.h
|
||||||
backend/arm64/abi.h
|
backend/A64/emitter/code_block.h
|
||||||
backend/arm64/devirtualize.h
|
# backend/A64/a64_emit_a64.cpp
|
||||||
backend/arm64/emit_arm64.cpp
|
# backend/A64/a64_emit_a64.h
|
||||||
backend/arm64/emit_arm64.h
|
# backend/A64/a64_exclusive_monitor.cpp
|
||||||
backend/arm64/emit_arm64_a32.cpp
|
# backend/A64/a64_interface.cpp
|
||||||
backend/arm64/emit_arm64_a32_coprocessor.cpp
|
# backend/A64/a64_jitstate.cpp
|
||||||
backend/arm64/emit_arm64_a32_memory.cpp
|
# backend/A64/a64_jitstate.h
|
||||||
backend/arm64/emit_arm64_a64.cpp
|
backend/A64/abi.cpp
|
||||||
backend/arm64/emit_arm64_a64_memory.cpp
|
backend/A64/abi.h
|
||||||
backend/arm64/emit_arm64_cryptography.cpp
|
backend/A64/block_of_code.cpp
|
||||||
backend/arm64/emit_arm64_data_processing.cpp
|
backend/A64/block_of_code.h
|
||||||
backend/arm64/emit_arm64_floating_point.cpp
|
backend/A64/block_range_information.cpp
|
||||||
backend/arm64/emit_arm64_packed.cpp
|
backend/A64/block_range_information.h
|
||||||
backend/arm64/emit_arm64_saturation.cpp
|
backend/A64/callback.cpp
|
||||||
backend/arm64/emit_arm64_vector.cpp
|
backend/A64/callback.h
|
||||||
backend/arm64/emit_arm64_vector_floating_point.cpp
|
backend/A64/constant_pool.cpp
|
||||||
backend/arm64/emit_arm64_vector_saturation.cpp
|
backend/A64/constant_pool.h
|
||||||
backend/arm64/emit_context.h
|
backend/A64/devirtualize.h
|
||||||
backend/arm64/exclusive_monitor.cpp
|
backend/A64/emit_a64.cpp
|
||||||
backend/arm64/fpsr_manager.cpp
|
backend/A64/emit_a64.h
|
||||||
backend/arm64/fpsr_manager.h
|
# backend/A64/emit_a64_aes.cpp
|
||||||
backend/arm64/reg_alloc.cpp
|
# backend/A64/emit_a64_crc32.cpp
|
||||||
backend/arm64/reg_alloc.h
|
backend/A64/emit_a64_data_processing.cpp
|
||||||
backend/arm64/stack_layout.h
|
backend/A64/emit_a64_floating_point.cpp
|
||||||
common/spin_lock_arm64.cpp
|
backend/A64/emit_a64_packed.cpp
|
||||||
common/spin_lock_arm64.h
|
backend/A64/emit_a64_saturation.cpp
|
||||||
|
# backend/A64/emit_a64_sm4.cpp
|
||||||
|
# backend/A64/emit_a64_vector.cpp
|
||||||
|
# backend/A64/emit_a64_vector_floating_point.cpp
|
||||||
|
backend/A64/exception_handler.h
|
||||||
|
backend/A64/hostloc.cpp
|
||||||
|
backend/A64/hostloc.h
|
||||||
|
backend/A64/jitstate_info.h
|
||||||
|
backend/A64/opcodes.inc
|
||||||
|
backend/A64/perf_map.cpp
|
||||||
|
backend/A64/perf_map.h
|
||||||
|
backend/A64/reg_alloc.cpp
|
||||||
|
backend/A64/reg_alloc.h
|
||||||
)
|
)
|
||||||
|
|
||||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||||
target_sources(dynarmic PRIVATE
|
target_sources(dynarmic PRIVATE
|
||||||
backend/arm64/a32_address_space.cpp
|
backend/A64/a32_emit_a64.cpp
|
||||||
backend/arm64/a32_address_space.h
|
backend/A64/a32_emit_a64.h
|
||||||
backend/arm64/a32_core.h
|
backend/A64/a32_interface.cpp
|
||||||
backend/arm64/a32_interface.cpp
|
backend/A64/a32_jitstate.cpp
|
||||||
|
backend/A64/a32_jitstate.h
|
||||||
# Move this to the list below when implemented
|
|
||||||
backend/arm64/a64_interface.cpp
|
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
if (UNIX)
|
||||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
target_sources(dynarmic PRIVATE backend/A64/exception_handler_posix.cpp)
|
||||||
|
else()
|
||||||
|
target_sources(dynarmic PRIVATE backend/A64/exception_handler_generic.cpp)
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "Unsupported architecture")
|
message(FATAL_ERROR "Unsupported architecture")
|
||||||
|
1594
src/dynarmic/backend/A64/a32_emit_a64.cpp
Normal file
1594
src/dynarmic/backend/A64/a32_emit_a64.cpp
Normal file
File diff suppressed because it is too large
Load Diff
138
src/dynarmic/backend/A64/a32_emit_a64.h
Normal file
138
src/dynarmic/backend/A64/a32_emit_a64.h
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
#include <set>
|
||||||
|
#include <tuple>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "backend/A64/a32_jitstate.h"
|
||||||
|
#include "backend/A64/block_range_information.h"
|
||||||
|
#include "backend/A64/emit_a64.h"
|
||||||
|
#include "backend/A64/exception_handler.h"
|
||||||
|
#include "dynarmic/A32/a32.h"
|
||||||
|
#include "dynarmic/A32/config.h"
|
||||||
|
#include "frontend/A32/location_descriptor.h"
|
||||||
|
#include "frontend/ir/terminal.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
struct A64State;
|
||||||
|
class RegAlloc;
|
||||||
|
|
||||||
|
struct A32EmitContext final : public EmitContext {
|
||||||
|
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||||
|
A32::LocationDescriptor Location() const;
|
||||||
|
bool IsSingleStep() const;
|
||||||
|
FP::RoundingMode FPSCR_RMode() const override;
|
||||||
|
u32 FPCR() const override;
|
||||||
|
bool FPSCR_FTZ() const override;
|
||||||
|
bool FPSCR_DN() const override;
|
||||||
|
std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class A32EmitA64 final : public EmitA64 {
|
||||||
|
public:
|
||||||
|
A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
|
||||||
|
~A32EmitA64() override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emit host machine code for a basic block with intermediate representation `ir`.
|
||||||
|
* @note ir is modified.
|
||||||
|
*/
|
||||||
|
BlockDescriptor Emit(IR::Block& ir);
|
||||||
|
|
||||||
|
void ClearCache() override;
|
||||||
|
|
||||||
|
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
|
||||||
|
|
||||||
|
void FastmemCallback(CodePtr PC);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const A32::UserConfig config;
|
||||||
|
A32::Jit* jit_interface;
|
||||||
|
BlockRangeInformation<u32> block_ranges;
|
||||||
|
ExceptionHandler exception_handler;
|
||||||
|
|
||||||
|
void EmitCondPrelude(const A32EmitContext& ctx);
|
||||||
|
|
||||||
|
struct FastDispatchEntry {
|
||||||
|
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
|
||||||
|
const void* code_ptr = nullptr;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(FastDispatchEntry) == 0x10);
|
||||||
|
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
|
||||||
|
static constexpr size_t fast_dispatch_table_size = 0x10000;
|
||||||
|
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
|
||||||
|
void ClearFastDispatchTable();
|
||||||
|
|
||||||
|
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
|
||||||
|
std::set<DoNotFastmemMarker> do_not_fastmem;
|
||||||
|
DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
|
||||||
|
void DoNotFastmem(const DoNotFastmemMarker& marker);
|
||||||
|
bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
|
||||||
|
|
||||||
|
const void* read_memory_8;
|
||||||
|
const void* read_memory_16;
|
||||||
|
const void* read_memory_32;
|
||||||
|
const void* read_memory_64;
|
||||||
|
const void* write_memory_8;
|
||||||
|
const void* write_memory_16;
|
||||||
|
const void* write_memory_32;
|
||||||
|
const void* write_memory_64;
|
||||||
|
void GenMemoryAccessors();
|
||||||
|
template<typename T>
|
||||||
|
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
|
||||||
|
template<typename T>
|
||||||
|
void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
|
||||||
|
|
||||||
|
const void* terminal_handler_pop_rsb_hint;
|
||||||
|
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
||||||
|
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
|
||||||
|
void GenTerminalHandlers();
|
||||||
|
|
||||||
|
// Microinstruction emitters
|
||||||
|
#define OPCODE(...)
|
||||||
|
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
|
||||||
|
#define A64OPC(...)
|
||||||
|
#include "frontend/ir/opcodes.inc"
|
||||||
|
#undef OPCODE
|
||||||
|
#undef A32OPC
|
||||||
|
#undef A64OPC
|
||||||
|
|
||||||
|
// Helpers
|
||||||
|
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
||||||
|
|
||||||
|
// Fastmem
|
||||||
|
struct FastmemPatchInfo {
|
||||||
|
std::function<void()> callback;
|
||||||
|
};
|
||||||
|
std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
|
||||||
|
|
||||||
|
// Terminal instruction emitters
|
||||||
|
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
|
||||||
|
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||||
|
|
||||||
|
// Patching
|
||||||
|
void Unpatch(const IR::LocationDescriptor& target_desc) override;
|
||||||
|
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||||
|
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||||
|
void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
323
src/dynarmic/backend/A64/a32_interface.cpp
Normal file
323
src/dynarmic/backend/A64/a32_interface.cpp
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <boost/icl/interval_set.hpp>
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
#include <dynarmic/A32/a32.h>
|
||||||
|
#include <dynarmic/A32/context.h>
|
||||||
|
|
||||||
|
#include "backend/A64/a32_emit_a64.h"
|
||||||
|
#include "backend/A64/a32_jitstate.h"
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/callback.h"
|
||||||
|
#include "backend/A64/devirtualize.h"
|
||||||
|
#include "backend/A64/jitstate_info.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/llvm_disassemble.h"
|
||||||
|
#include "common/scope_exit.h"
|
||||||
|
#include "frontend/A32/translate/translate.h"
|
||||||
|
#include "frontend/ir/basic_block.h"
|
||||||
|
#include "frontend/ir/location_descriptor.h"
|
||||||
|
#include "ir_opt/passes.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::A32 {
|
||||||
|
|
||||||
|
using namespace BackendA64;
|
||||||
|
|
||||||
|
static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
|
||||||
|
return RunCodeCallbacks{
|
||||||
|
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
|
||||||
|
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
|
||||||
|
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
|
||||||
|
reinterpret_cast<u64>(config.fastmem_pointer),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Jit::Impl {
|
||||||
|
Impl(Jit* jit, A32::UserConfig config)
|
||||||
|
: block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
|
||||||
|
, emitter(block_of_code, config, jit)
|
||||||
|
, config(std::move(config))
|
||||||
|
, jit_interface(jit)
|
||||||
|
{}
|
||||||
|
|
||||||
|
A32JitState jit_state;
|
||||||
|
BlockOfCode block_of_code;
|
||||||
|
A32EmitA64 emitter;
|
||||||
|
|
||||||
|
const A32::UserConfig config;
|
||||||
|
|
||||||
|
// Requests made during execution to invalidate the cache are queued up here.
|
||||||
|
size_t invalid_cache_generation = 0;
|
||||||
|
boost::icl::interval_set<u32> invalid_cache_ranges;
|
||||||
|
bool invalidate_entire_cache = false;
|
||||||
|
|
||||||
|
void Execute() {
|
||||||
|
const CodePtr current_codeptr = [this]{
|
||||||
|
// RSB optimization
|
||||||
|
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
|
||||||
|
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||||
|
jit_state.rsb_ptr = new_rsb_ptr;
|
||||||
|
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return GetCurrentBlock();
|
||||||
|
}();
|
||||||
|
|
||||||
|
block_of_code.RunCode(&jit_state, current_codeptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Step() {
|
||||||
|
block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Disassemble(const IR::LocationDescriptor& descriptor) {
|
||||||
|
auto block = GetBasicBlock(descriptor);
|
||||||
|
std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
|
||||||
|
#ifdef DYNARMIC_USE_LLVM
|
||||||
|
for (const u32* pos = reinterpret_cast<const u32*>(block.entrypoint);
|
||||||
|
reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(block.entrypoint) + block.size; pos += 1) {
|
||||||
|
fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
|
||||||
|
fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
|
||||||
|
result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerformCacheInvalidation() {
|
||||||
|
if (invalidate_entire_cache) {
|
||||||
|
jit_state.ResetRSB();
|
||||||
|
block_of_code.ClearCache();
|
||||||
|
emitter.ClearCache();
|
||||||
|
|
||||||
|
invalid_cache_ranges.clear();
|
||||||
|
invalidate_entire_cache = false;
|
||||||
|
invalid_cache_generation++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (invalid_cache_ranges.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
jit_state.ResetRSB();
|
||||||
|
emitter.InvalidateCacheRanges(invalid_cache_ranges);
|
||||||
|
invalid_cache_ranges.clear();
|
||||||
|
invalid_cache_generation++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RequestCacheInvalidation() {
|
||||||
|
if (jit_interface->is_executing) {
|
||||||
|
jit_state.halt_requested = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PerformCacheInvalidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Jit* jit_interface;
|
||||||
|
|
||||||
|
static CodePtr GetCurrentBlockThunk(void* this_voidptr) {
|
||||||
|
Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
|
||||||
|
return this_.GetCurrentBlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::LocationDescriptor GetCurrentLocation() const {
|
||||||
|
return IR::LocationDescriptor{jit_state.GetUniqueHash()};
|
||||||
|
}
|
||||||
|
|
||||||
|
CodePtr GetCurrentBlock() {
|
||||||
|
return GetBasicBlock(GetCurrentLocation()).entrypoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
CodePtr GetCurrentSingleStep() {
|
||||||
|
return GetBasicBlock(A32::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true)).entrypoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
A32EmitA64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
|
||||||
|
auto block = emitter.GetBasicBlock(descriptor);
|
||||||
|
if (block)
|
||||||
|
return *block;
|
||||||
|
|
||||||
|
constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
|
||||||
|
if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
|
||||||
|
invalidate_entire_cache = true;
|
||||||
|
PerformCacheInvalidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions});
|
||||||
|
if (config.enable_optimizations) {
|
||||||
|
Optimization::A32GetSetElimination(ir_block);
|
||||||
|
Optimization::DeadCodeElimination(ir_block);
|
||||||
|
Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
|
||||||
|
Optimization::ConstantPropagation(ir_block);
|
||||||
|
Optimization::DeadCodeElimination(ir_block);
|
||||||
|
Optimization::A32MergeInterpretBlocksPass(ir_block, config.callbacks);
|
||||||
|
}
|
||||||
|
Optimization::VerificationPass(ir_block);
|
||||||
|
return emitter.Emit(ir_block);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {}
|
||||||
|
|
||||||
|
Jit::~Jit() = default;
|
||||||
|
|
||||||
|
void Jit::Run() {
|
||||||
|
ASSERT(!is_executing);
|
||||||
|
is_executing = true;
|
||||||
|
SCOPE_EXIT { this->is_executing = false; };
|
||||||
|
|
||||||
|
impl->jit_state.halt_requested = false;
|
||||||
|
|
||||||
|
impl->Execute();
|
||||||
|
|
||||||
|
impl->PerformCacheInvalidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::Step() {
|
||||||
|
ASSERT(!is_executing);
|
||||||
|
is_executing = true;
|
||||||
|
SCOPE_EXIT { this->is_executing = false; };
|
||||||
|
|
||||||
|
impl->jit_state.halt_requested = true;
|
||||||
|
|
||||||
|
impl->Step();
|
||||||
|
|
||||||
|
impl->PerformCacheInvalidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::ClearCache() {
|
||||||
|
impl->invalidate_entire_cache = true;
|
||||||
|
impl->RequestCacheInvalidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
|
||||||
|
impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
|
||||||
|
impl->RequestCacheInvalidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::Reset() {
|
||||||
|
ASSERT(!is_executing);
|
||||||
|
impl->jit_state = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::HaltExecution() {
|
||||||
|
impl->jit_state.halt_requested = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<u32, 16>& Jit::Regs() {
|
||||||
|
return impl->jit_state.Reg;
|
||||||
|
}
|
||||||
|
const std::array<u32, 16>& Jit::Regs() const {
|
||||||
|
return impl->jit_state.Reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<u32, 64>& Jit::ExtRegs() {
|
||||||
|
return impl->jit_state.ExtReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::array<u32, 64>& Jit::ExtRegs() const {
|
||||||
|
return impl->jit_state.ExtReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Jit::Cpsr() const {
|
||||||
|
return impl->jit_state.Cpsr();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::SetCpsr(u32 value) {
|
||||||
|
return impl->jit_state.SetCpsr(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Jit::Fpscr() const {
|
||||||
|
return impl->jit_state.Fpscr();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::SetFpscr(u32 value) {
|
||||||
|
return impl->jit_state.SetFpscr(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Context Jit::SaveContext() const {
|
||||||
|
Context ctx;
|
||||||
|
SaveContext(ctx);
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Context::Impl {
|
||||||
|
A32JitState jit_state;
|
||||||
|
size_t invalid_cache_generation;
|
||||||
|
};
|
||||||
|
|
||||||
|
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
|
||||||
|
Context::~Context() = default;
|
||||||
|
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
|
||||||
|
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
|
||||||
|
Context& Context::operator=(const Context& ctx) {
|
||||||
|
*impl = *ctx.impl;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
Context& Context::operator=(Context&& ctx) noexcept {
|
||||||
|
impl = std::move(ctx.impl);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<std::uint32_t, 16>& Context::Regs() {
|
||||||
|
return impl->jit_state.Reg;
|
||||||
|
}
|
||||||
|
const std::array<std::uint32_t, 16>& Context::Regs() const {
|
||||||
|
return impl->jit_state.Reg;
|
||||||
|
}
|
||||||
|
std::array<std::uint32_t, 64>& Context::ExtRegs() {
|
||||||
|
return impl->jit_state.ExtReg;
|
||||||
|
}
|
||||||
|
const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
|
||||||
|
return impl->jit_state.ExtReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::uint32_t Context::Cpsr() const {
|
||||||
|
return impl->jit_state.Cpsr();
|
||||||
|
}
|
||||||
|
void Context::SetCpsr(std::uint32_t value) {
|
||||||
|
impl->jit_state.SetCpsr(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::uint32_t Context::Fpscr() const {
|
||||||
|
return impl->jit_state.Fpscr();
|
||||||
|
}
|
||||||
|
void Context::SetFpscr(std::uint32_t value) {
|
||||||
|
return impl->jit_state.SetFpscr(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::SaveContext(Context& ctx) const {
|
||||||
|
ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
|
||||||
|
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit::LoadContext(const Context& ctx) {
|
||||||
|
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
|
||||||
|
impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Jit::Disassemble() const {
|
||||||
|
std::string result;
|
||||||
|
#ifdef DYNARMIC_USE_LLVM
|
||||||
|
for (const u32* pos = reinterpret_cast<const u32*>(impl->block_of_code.GetCodeBegin());
|
||||||
|
reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(impl->block_of_code.GetCodePtr()); pos += 1) {
|
||||||
|
fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
|
||||||
|
fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
|
||||||
|
result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::A32
|
172
src/dynarmic/backend/A64/a32_jitstate.cpp
Normal file
172
src/dynarmic/backend/A64/a32_jitstate.cpp
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/A64/a32_jitstate.h"
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "frontend/A32/location_descriptor.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CPSR Bits
|
||||||
|
* =========
|
||||||
|
*
|
||||||
|
* ARM CPSR flags
|
||||||
|
* --------------
|
||||||
|
* N bit 31 Negative flag
|
||||||
|
* Z bit 30 Zero flag
|
||||||
|
* C bit 29 Carry flag
|
||||||
|
* V bit 28 oVerflow flag
|
||||||
|
* Q bit 27 Saturation flag
|
||||||
|
* IT[1:0] bits 25-26 If-Then execution state (lower 2 bits)
|
||||||
|
* J bit 24 Jazelle instruction set flag
|
||||||
|
* GE bits 16-19 Greater than or Equal flags
|
||||||
|
* IT[7:2] bits 10-15 If-Then execution state (upper 6 bits)
|
||||||
|
* E bit 9 Data Endianness flag
|
||||||
|
* A bit 8 Disable imprecise Aborts
|
||||||
|
* I bit 7 Disable IRQ interrupts
|
||||||
|
* F bit 6 Disable FIQ interrupts
|
||||||
|
* T bit 5 Thumb instruction set flag
|
||||||
|
* M bits 0-4 Processor Mode bits
|
||||||
|
*
|
||||||
|
* A64 flags
|
||||||
|
* -------------------
|
||||||
|
* N bit 31 Negative flag
|
||||||
|
* Z bit 30 Zero flag
|
||||||
|
* C bit 29 Carry flag
|
||||||
|
* V bit 28 oVerflow flag
|
||||||
|
*/
|
||||||
|
|
||||||
|
u32 A32JitState::Cpsr() const {
|
||||||
|
DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
|
||||||
|
DEBUG_ASSERT((cpsr_q & ~1) == 0);
|
||||||
|
DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
|
||||||
|
|
||||||
|
u32 cpsr = 0;
|
||||||
|
|
||||||
|
// NZCV flags
|
||||||
|
cpsr |= cpsr_nzcv;
|
||||||
|
// Q flag
|
||||||
|
cpsr |= cpsr_q ? 1 << 27 : 0;
|
||||||
|
// GE flags
|
||||||
|
cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
|
||||||
|
cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
|
||||||
|
cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
|
||||||
|
cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
|
||||||
|
// E flag, T flag
|
||||||
|
cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
|
||||||
|
cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
|
||||||
|
// IT state
|
||||||
|
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
|
||||||
|
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
|
||||||
|
// Other flags
|
||||||
|
cpsr |= cpsr_jaifm;
|
||||||
|
|
||||||
|
return cpsr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void A32JitState::SetCpsr(u32 cpsr) {
|
||||||
|
// NZCV flags
|
||||||
|
cpsr_nzcv = cpsr & 0xF0000000;
|
||||||
|
// Q flag
|
||||||
|
cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
|
||||||
|
// GE flags
|
||||||
|
cpsr_ge = 0;
|
||||||
|
cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
|
||||||
|
cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
|
||||||
|
cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
|
||||||
|
cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
|
||||||
|
|
||||||
|
upper_location_descriptor &= 0xFFFF0000;
|
||||||
|
// E flag, T flag
|
||||||
|
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
|
||||||
|
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
|
||||||
|
// IT state
|
||||||
|
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
|
||||||
|
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
|
||||||
|
|
||||||
|
// Other flags
|
||||||
|
cpsr_jaifm = cpsr & 0x010001DF;
|
||||||
|
}
|
||||||
|
|
||||||
|
void A32JitState::ResetRSB() {
|
||||||
|
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
|
||||||
|
rsb_codeptrs.fill(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FPSCR
|
||||||
|
* =========================
|
||||||
|
*
|
||||||
|
* VFP FPSCR cumulative exception bits
|
||||||
|
* -----------------------------------
|
||||||
|
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
|
||||||
|
* IXC bit 4 Inexact cumulative exception bit
|
||||||
|
* UFC bit 3 Underflow cumulative exception bit
|
||||||
|
* OFC bit 2 Overflow cumulative exception bit
|
||||||
|
* DZC bit 1 Division by Zero cumulative exception bit
|
||||||
|
* IOC bit 0 Invalid Operation cumulative exception bit
|
||||||
|
*
|
||||||
|
* VFP FPSCR exception trap enables
|
||||||
|
* --------------------------------
|
||||||
|
* IDE bit 15 Input Denormal exception trap enable
|
||||||
|
* IXE bit 12 Inexact exception trap enable
|
||||||
|
* UFE bit 11 Underflow exception trap enable
|
||||||
|
* OFE bit 10 Overflow exception trap enable
|
||||||
|
* DZE bit 9 Division by Zero exception trap enable
|
||||||
|
* IOE bit 8 Invalid Operation exception trap enable
|
||||||
|
*
|
||||||
|
* VFP FPSCR mode bits
|
||||||
|
* -------------------
|
||||||
|
* AHP bit 26 Alternate half-precision
|
||||||
|
* DN bit 25 Default NaN
|
||||||
|
* FZ bit 24 Flush to Zero
|
||||||
|
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
|
||||||
|
* Stride bits 20-21 Vector stride
|
||||||
|
* Len bits 16-18 Vector length
|
||||||
|
*/
|
||||||
|
|
||||||
|
// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
|
||||||
|
constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
|
||||||
|
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
|
||||||
|
|
||||||
|
u32 A32JitState::Fpscr() const {
|
||||||
|
DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
|
||||||
|
|
||||||
|
const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
|
||||||
|
|
||||||
|
u32 FPSCR = fpcr_mode | fpsr_nzcv;
|
||||||
|
FPSCR |= (guest_fpsr & 0x1F);
|
||||||
|
FPSCR |= fpsr_exc;
|
||||||
|
|
||||||
|
return FPSCR;
|
||||||
|
}
|
||||||
|
|
||||||
|
void A32JitState::SetFpscr(u32 FPSCR) {
|
||||||
|
// Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
|
||||||
|
static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
|
||||||
|
|
||||||
|
upper_location_descriptor &= 0x0000FFFF;
|
||||||
|
upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
|
||||||
|
|
||||||
|
fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
|
||||||
|
guest_fpcr = 0;
|
||||||
|
guest_fpsr = 0;
|
||||||
|
|
||||||
|
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
|
||||||
|
fpsr_exc = FPSCR & 0x9F;
|
||||||
|
|
||||||
|
// Mode Bits
|
||||||
|
guest_fpcr |= FPSCR & 0x07C09F00;
|
||||||
|
|
||||||
|
// Exceptions
|
||||||
|
guest_fpsr |= FPSCR & 0x9F;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
111
src/dynarmic/backend/A64/a32_jitstate.h
Normal file
111
src/dynarmic/backend/A64/a32_jitstate.h
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct A32JitState {
|
||||||
|
using ProgramCounterType = u32;
|
||||||
|
|
||||||
|
A32JitState() { ResetRSB(); }
|
||||||
|
|
||||||
|
std::array<u32, 16> Reg{}; // Current register file.
|
||||||
|
// TODO: Mode-specific register sets unimplemented.
|
||||||
|
|
||||||
|
u32 upper_location_descriptor = 0;
|
||||||
|
|
||||||
|
u32 cpsr_ge = 0;
|
||||||
|
u32 cpsr_q = 0;
|
||||||
|
u32 cpsr_nzcv = 0;
|
||||||
|
u32 cpsr_jaifm = 0;
|
||||||
|
u32 Cpsr() const;
|
||||||
|
void SetCpsr(u32 cpsr);
|
||||||
|
|
||||||
|
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
|
||||||
|
|
||||||
|
static constexpr size_t SpillCount = 64;
|
||||||
|
std::array<u64, SpillCount> Spill{}; // Spill.
|
||||||
|
static size_t GetSpillLocationOffsetFromIndex(size_t i) {
|
||||||
|
return static_cast<u64>(offsetof(A32JitState, Spill) + i * sizeof(u64));
|
||||||
|
}
|
||||||
|
|
||||||
|
// For internal use (See: BlockOfCode::RunCode)
|
||||||
|
u64 guest_fpcr = 0;
|
||||||
|
u64 guest_fpsr = 0;
|
||||||
|
u64 save_host_FPCR = 0;
|
||||||
|
s64 cycles_to_run = 0;
|
||||||
|
s64 cycles_remaining = 0;
|
||||||
|
bool halt_requested = false;
|
||||||
|
bool check_bit = false;
|
||||||
|
|
||||||
|
// Exclusive state
|
||||||
|
static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
|
||||||
|
u32 exclusive_state = 0;
|
||||||
|
u32 exclusive_address = 0;
|
||||||
|
|
||||||
|
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||||
|
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
||||||
|
u32 rsb_ptr = 0;
|
||||||
|
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||||
|
std::array<u64, RSBSize> rsb_codeptrs;
|
||||||
|
void ResetRSB();
|
||||||
|
|
||||||
|
u32 fpsr_exc = 0;
|
||||||
|
u32 fpsr_qc = 0; // Dummy value
|
||||||
|
u32 fpsr_nzcv = 0;
|
||||||
|
u32 Fpscr() const;
|
||||||
|
void SetFpscr(u32 FPSCR);
|
||||||
|
|
||||||
|
u64 GetUniqueHash() const noexcept {
|
||||||
|
return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TransferJitState(const A32JitState& src, bool reset_rsb) {
|
||||||
|
Reg = src.Reg;
|
||||||
|
upper_location_descriptor = src.upper_location_descriptor;
|
||||||
|
cpsr_ge = src.cpsr_ge;
|
||||||
|
cpsr_q = src.cpsr_q;
|
||||||
|
cpsr_nzcv = src.cpsr_nzcv;
|
||||||
|
cpsr_jaifm = src.cpsr_jaifm;
|
||||||
|
ExtReg = src.ExtReg;
|
||||||
|
guest_fpcr = src.guest_fpcr;
|
||||||
|
guest_fpsr = src.guest_fpsr;
|
||||||
|
fpsr_exc = src.fpsr_exc;
|
||||||
|
fpsr_qc = src.fpsr_qc;
|
||||||
|
fpsr_nzcv = src.fpsr_nzcv;
|
||||||
|
|
||||||
|
exclusive_state = 0;
|
||||||
|
exclusive_address = 0;
|
||||||
|
|
||||||
|
if (reset_rsb) {
|
||||||
|
ResetRSB();
|
||||||
|
} else {
|
||||||
|
rsb_ptr = src.rsb_ptr;
|
||||||
|
rsb_location_descriptors = src.rsb_location_descriptors;
|
||||||
|
rsb_codeptrs = src.rsb_codeptrs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using CodePtr = const void*;
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
87
src/dynarmic/backend/A64/abi.cpp
Normal file
87
src/dynarmic/backend/A64/abi.cpp
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
// Copyright (C) 2003 Dolphin Project.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, version 2.0 or later versions.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License 2.0 for more details.
|
||||||
|
|
||||||
|
// A copy of the GPL 2.0 should have been included with the program.
|
||||||
|
// If not, see http://www.gnu.org/licenses/
|
||||||
|
|
||||||
|
// 20th Sep 2018: This code was modified for Dynarmic.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/A64/abi.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/math_util.h"
|
||||||
|
#include "common/iterator_util.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
template<typename RegisterArrayT>
|
||||||
|
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
|
||||||
|
u32 gprs = 0 , fprs = 0;
|
||||||
|
|
||||||
|
for (HostLoc reg : regs) {
|
||||||
|
if (HostLocIsGPR(reg)) {
|
||||||
|
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
|
||||||
|
} else if (HostLocIsFPR(reg)) {
|
||||||
|
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.ABI_PushRegisters(fprs);
|
||||||
|
code.ABI_PushRegisters(gprs);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename RegisterArrayT>
|
||||||
|
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
|
||||||
|
u32 gprs = 0, fprs = 0;
|
||||||
|
|
||||||
|
for (HostLoc reg : regs) {
|
||||||
|
if (HostLocIsGPR(reg)) {
|
||||||
|
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
|
||||||
|
} else if (HostLocIsFPR(reg)) {
|
||||||
|
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
code.ABI_PopRegisters(gprs);
|
||||||
|
code.fp_emitter.ABI_PopRegisters(fprs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
|
||||||
|
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
|
||||||
|
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
|
||||||
|
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
|
||||||
|
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
|
||||||
|
std::vector<HostLoc> regs;
|
||||||
|
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||||
|
ABI_PushRegistersAndAdjustStack(code, regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
|
||||||
|
std::vector<HostLoc> regs;
|
||||||
|
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||||
|
ABI_PopRegistersAndAdjustStack(code, regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
110
src/dynarmic/backend/A64/abi.h
Normal file
110
src/dynarmic/backend/A64/abi.h
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/hostloc.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
constexpr HostLoc ABI_RETURN = HostLoc::X0;
|
||||||
|
|
||||||
|
constexpr HostLoc ABI_PARAM1 = HostLoc::X0;
|
||||||
|
constexpr HostLoc ABI_PARAM2 = HostLoc::X1;
|
||||||
|
constexpr HostLoc ABI_PARAM3 = HostLoc::X2;
|
||||||
|
constexpr HostLoc ABI_PARAM4 = HostLoc::X3;
|
||||||
|
constexpr HostLoc ABI_PARAM5 = HostLoc::X4;
|
||||||
|
constexpr HostLoc ABI_PARAM6 = HostLoc::X5;
|
||||||
|
constexpr HostLoc ABI_PARAM7 = HostLoc::X6;
|
||||||
|
constexpr HostLoc ABI_PARAM8 = HostLoc::X7;
|
||||||
|
|
||||||
|
constexpr std::array<HostLoc, 43> ABI_ALL_CALLER_SAVE = {
|
||||||
|
HostLoc::X0,
|
||||||
|
HostLoc::X1,
|
||||||
|
HostLoc::X2,
|
||||||
|
HostLoc::X3,
|
||||||
|
HostLoc::X4,
|
||||||
|
HostLoc::X5,
|
||||||
|
HostLoc::X6,
|
||||||
|
HostLoc::X7,
|
||||||
|
HostLoc::X8,
|
||||||
|
HostLoc::X9,
|
||||||
|
HostLoc::X10,
|
||||||
|
HostLoc::X11,
|
||||||
|
HostLoc::X12,
|
||||||
|
HostLoc::X13,
|
||||||
|
HostLoc::X14,
|
||||||
|
HostLoc::X15,
|
||||||
|
HostLoc::X16,
|
||||||
|
HostLoc::X17,
|
||||||
|
HostLoc::X18,
|
||||||
|
|
||||||
|
HostLoc::Q0,
|
||||||
|
HostLoc::Q1,
|
||||||
|
HostLoc::Q2,
|
||||||
|
HostLoc::Q3,
|
||||||
|
HostLoc::Q4,
|
||||||
|
HostLoc::Q5,
|
||||||
|
HostLoc::Q6,
|
||||||
|
HostLoc::Q7,
|
||||||
|
|
||||||
|
HostLoc::Q16,
|
||||||
|
HostLoc::Q17,
|
||||||
|
HostLoc::Q18,
|
||||||
|
HostLoc::Q19,
|
||||||
|
HostLoc::Q20,
|
||||||
|
HostLoc::Q21,
|
||||||
|
HostLoc::Q22,
|
||||||
|
HostLoc::Q23,
|
||||||
|
HostLoc::Q24,
|
||||||
|
HostLoc::Q25,
|
||||||
|
HostLoc::Q26,
|
||||||
|
HostLoc::Q27,
|
||||||
|
HostLoc::Q28,
|
||||||
|
HostLoc::Q29,
|
||||||
|
HostLoc::Q30,
|
||||||
|
HostLoc::Q31,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::array<HostLoc, 20> ABI_ALL_CALLEE_SAVE = {
|
||||||
|
HostLoc::X19,
|
||||||
|
HostLoc::X20,
|
||||||
|
HostLoc::X21,
|
||||||
|
HostLoc::X22,
|
||||||
|
HostLoc::X23,
|
||||||
|
HostLoc::X24,
|
||||||
|
HostLoc::X25,
|
||||||
|
HostLoc::X26,
|
||||||
|
HostLoc::X27,
|
||||||
|
HostLoc::X28,
|
||||||
|
HostLoc::X29,
|
||||||
|
HostLoc::X30,
|
||||||
|
|
||||||
|
HostLoc::Q8,
|
||||||
|
HostLoc::Q9,
|
||||||
|
HostLoc::Q10,
|
||||||
|
HostLoc::Q11,
|
||||||
|
HostLoc::Q12,
|
||||||
|
HostLoc::Q13,
|
||||||
|
HostLoc::Q14,
|
||||||
|
HostLoc::Q15,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
||||||
|
|
||||||
|
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 63, "Invalid total number of registers");
|
||||||
|
|
||||||
|
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
|
||||||
|
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
|
||||||
|
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
|
||||||
|
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
|
||||||
|
|
||||||
|
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
|
||||||
|
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
336
src/dynarmic/backend/A64/block_of_code.cpp
Normal file
336
src/dynarmic/backend/A64/block_of_code.cpp
Normal file
@ -0,0 +1,336 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "backend/A64/a32_jitstate.h"
|
||||||
|
#include "backend/A64/abi.h"
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/perf_map.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <pthread.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
|
||||||
|
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
|
||||||
|
|
||||||
|
const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
|
||||||
|
|
||||||
|
const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
|
||||||
|
BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
|
||||||
|
BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
|
||||||
|
BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
|
||||||
|
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
|
||||||
|
|
||||||
|
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||||
|
void ProtectMemory([[maybe_unused]] const void* base, [[maybe_unused]] size_t size, bool is_executable) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
DWORD oldProtect = 0;
|
||||||
|
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
pthread_jit_write_protect_np(is_executable);
|
||||||
|
#else
|
||||||
|
static const size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||||
|
const size_t iaddr = reinterpret_cast<size_t>(base);
|
||||||
|
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||||
|
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
|
||||||
|
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
|
||||||
|
: fp_emitter(this)
|
||||||
|
, cb(std::move(cb))
|
||||||
|
, jsi(jsi)
|
||||||
|
, constant_pool(*this) {
|
||||||
|
AllocCodeSpace(TOTAL_CODE_SIZE);
|
||||||
|
EnableWriting();
|
||||||
|
GenRunCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::PreludeComplete() {
|
||||||
|
prelude_complete = true;
|
||||||
|
near_code_begin = GetCodePtr();
|
||||||
|
far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
|
||||||
|
FlushIcache();
|
||||||
|
ClearCache();
|
||||||
|
DisableWriting();
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::EnableWriting() {
|
||||||
|
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||||
|
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::DisableWriting() {
|
||||||
|
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||||
|
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::ClearCache() {
|
||||||
|
ASSERT(prelude_complete);
|
||||||
|
in_far_code = false;
|
||||||
|
near_code_ptr = near_code_begin;
|
||||||
|
far_code_ptr = far_code_begin;
|
||||||
|
SetCodePtr(near_code_begin);
|
||||||
|
constant_pool.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t BlockOfCode::SpaceRemaining() const {
|
||||||
|
ASSERT(prelude_complete);
|
||||||
|
// This function provides an underestimate of near-code-size but that's okay.
|
||||||
|
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
|
||||||
|
// These are offsets from Xbyak::CodeArray::top_.
|
||||||
|
std::size_t far_code_offset, near_code_offset;
|
||||||
|
if (in_far_code) {
|
||||||
|
near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
|
||||||
|
far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
|
||||||
|
} else {
|
||||||
|
near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
|
||||||
|
far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
|
||||||
|
}
|
||||||
|
if (far_code_offset > TOTAL_CODE_SIZE)
|
||||||
|
return 0;
|
||||||
|
if (near_code_offset > FAR_CODE_OFFSET)
|
||||||
|
return 0;
|
||||||
|
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
|
||||||
|
run_code(jit_state, code_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
|
||||||
|
step_code(jit_state, code_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::ReturnFromRunCode(bool fpscr_already_exited) {
|
||||||
|
size_t index = 0;
|
||||||
|
if (fpscr_already_exited)
|
||||||
|
index |= FPSCR_ALREADY_EXITED;
|
||||||
|
B(return_from_run_code[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) {
|
||||||
|
size_t index = FORCE_RETURN;
|
||||||
|
if (fpscr_already_exited)
|
||||||
|
index |= FPSCR_ALREADY_EXITED;
|
||||||
|
B(return_from_run_code[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::GenRunCode() {
|
||||||
|
const u8* loop, *enter_fpscr_then_loop;
|
||||||
|
|
||||||
|
AlignCode16();
|
||||||
|
run_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
|
||||||
|
|
||||||
|
// This serves two purposes:
|
||||||
|
// 1. It saves all the registers we as a callee need to save.
|
||||||
|
// 2. It aligns the stack so that the code the JIT emits can assume
|
||||||
|
// that the stack is appropriately aligned for CALLs.
|
||||||
|
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||||
|
|
||||||
|
MOV(Arm64Gen::X28, ABI_PARAM1);
|
||||||
|
MOVI2R(Arm64Gen::X27, cb.value_in_X27);
|
||||||
|
MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register
|
||||||
|
|
||||||
|
cb.GetTicksRemaining->EmitCall(*this);
|
||||||
|
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||||
|
MOV(Arm64Gen::X26, ABI_RETURN);
|
||||||
|
|
||||||
|
SwitchFpscrOnEntry();
|
||||||
|
BR(Arm64Gen::X25);
|
||||||
|
|
||||||
|
AlignCode16();
|
||||||
|
step_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
|
||||||
|
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||||
|
|
||||||
|
MOV(Arm64Gen::X28, ABI_PARAM1);
|
||||||
|
|
||||||
|
MOVI2R(Arm64Gen::X26, 1);
|
||||||
|
STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||||
|
|
||||||
|
SwitchFpscrOnEntry();
|
||||||
|
BR(ABI_PARAM2);
|
||||||
|
|
||||||
|
enter_fpscr_then_loop = GetCodePtr();
|
||||||
|
SwitchFpscrOnEntry();
|
||||||
|
loop = GetCodePtr();
|
||||||
|
cb.LookupBlock->EmitCall(*this);
|
||||||
|
BR(ABI_RETURN);
|
||||||
|
|
||||||
|
// Return from run code variants
|
||||||
|
const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){
|
||||||
|
if (!force_return) {
|
||||||
|
CMP(Arm64Gen::X26, Arm64Gen::ZR);
|
||||||
|
B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fpscr_already_exited) {
|
||||||
|
SwitchFpscrOnExit();
|
||||||
|
}
|
||||||
|
|
||||||
|
cb.AddTicks->EmitCall(*this, [this](RegList param) {
|
||||||
|
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||||
|
SUB(param[0], param[0], Arm64Gen::X26);
|
||||||
|
});
|
||||||
|
|
||||||
|
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
|
||||||
|
RET();
|
||||||
|
};
|
||||||
|
|
||||||
|
return_from_run_code[0] = AlignCode16();
|
||||||
|
emit_return_from_run_code(false, false);
|
||||||
|
|
||||||
|
return_from_run_code[FPSCR_ALREADY_EXITED] = AlignCode16();
|
||||||
|
emit_return_from_run_code(true, false);
|
||||||
|
|
||||||
|
return_from_run_code[FORCE_RETURN] = AlignCode16();
|
||||||
|
emit_return_from_run_code(false, true);
|
||||||
|
|
||||||
|
return_from_run_code[FPSCR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
|
||||||
|
emit_return_from_run_code(true, true);
|
||||||
|
|
||||||
|
PerfMapRegister(run_code, GetCodePtr(), "dynarmic_dispatcher");
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::SwitchFpscrOnEntry() {
|
||||||
|
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
|
||||||
|
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
|
||||||
|
|
||||||
|
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
|
||||||
|
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
|
||||||
|
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
|
||||||
|
_MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::SwitchFpscrOnExit() {
|
||||||
|
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
|
||||||
|
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
|
||||||
|
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPSR);
|
||||||
|
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
|
||||||
|
|
||||||
|
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
|
||||||
|
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::UpdateTicks() {
|
||||||
|
cb.AddTicks->EmitCall(*this, [this](RegList param) {
|
||||||
|
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||||
|
SUB(param[0], param[0], Arm64Gen::X26);
|
||||||
|
});
|
||||||
|
|
||||||
|
cb.GetTicksRemaining->EmitCall(*this);
|
||||||
|
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
|
||||||
|
MOV(Arm64Gen::X26, ABI_RETURN);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::LookupBlock() {
|
||||||
|
cb.LookupBlock->EmitCall(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
|
||||||
|
ASSERT_MSG(!in_far_code, "Can't patch when in far code, yet!");
|
||||||
|
constant_pool.EmitPatchLDR(Rt, lower, upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::PatchConstPool() {
|
||||||
|
constant_pool.PatchPool();
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::SwitchToFarCode() {
|
||||||
|
ASSERT(prelude_complete);
|
||||||
|
ASSERT(!in_far_code);
|
||||||
|
in_far_code = true;
|
||||||
|
near_code_ptr = GetCodePtr();
|
||||||
|
SetCodePtr(far_code_ptr);
|
||||||
|
|
||||||
|
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::SwitchToNearCode() {
|
||||||
|
ASSERT(prelude_complete);
|
||||||
|
ASSERT(in_far_code);
|
||||||
|
in_far_code = false;
|
||||||
|
far_code_ptr = GetCodePtr();
|
||||||
|
SetCodePtr(near_code_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
CodePtr BlockOfCode::GetCodeBegin() const {
|
||||||
|
return near_code_begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8* BlockOfCode::GetRegion() const {
|
||||||
|
return region;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t BlockOfCode::GetRegionSize() const {
|
||||||
|
return total_region_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
|
||||||
|
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
|
||||||
|
|
||||||
|
void* ret = GetWritableCodePtr();
|
||||||
|
region_size += alloc_size;
|
||||||
|
SetCodePtr(GetCodePtr() + alloc_size);
|
||||||
|
memset(ret, 0, alloc_size);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
|
||||||
|
u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
|
||||||
|
ARM64XEmitter::SetCodePtr(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
|
||||||
|
size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
|
||||||
|
ASSERT(current_size <= size);
|
||||||
|
for (u32 i = 0; i < (size - current_size) / 4; i++) {
|
||||||
|
HINT(Arm64Gen::HINT_NOP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
|
||||||
|
//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
|
||||||
|
// return cpu_info.has(type);
|
||||||
|
//#else
|
||||||
|
// (void)type;
|
||||||
|
// return false;
|
||||||
|
//#endif
|
||||||
|
//}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
147
src/dynarmic/backend/A64/block_of_code.h
Normal file
147
src/dynarmic/backend/A64/block_of_code.h
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include "backend/A64/callback.h"
|
||||||
|
#include "backend/A64/constant_pool.h"
|
||||||
|
#include "backend/A64/jitstate_info.h"
|
||||||
|
#include "backend/A64/emitter/a64_emitter.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
using CodePtr = const void*;
|
||||||
|
|
||||||
|
struct RunCodeCallbacks {
|
||||||
|
std::unique_ptr<Callback> LookupBlock;
|
||||||
|
std::unique_ptr<Callback> AddTicks;
|
||||||
|
std::unique_ptr<Callback> GetTicksRemaining;
|
||||||
|
u64 value_in_X27;
|
||||||
|
};
|
||||||
|
|
||||||
|
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
|
||||||
|
public:
|
||||||
|
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
|
||||||
|
BlockOfCode(const BlockOfCode&) = delete;
|
||||||
|
|
||||||
|
|
||||||
|
/// Call when external emitters have finished emitting their preludes.
|
||||||
|
void PreludeComplete();
|
||||||
|
|
||||||
|
/// Change permissions to RW. This is required to support systems with W^X enforced.
|
||||||
|
void EnableWriting();
|
||||||
|
/// Change permissions to RX. This is required to support systems with W^X enforced.
|
||||||
|
void DisableWriting();
|
||||||
|
|
||||||
|
/// Clears this block of code and resets code pointer to beginning.
|
||||||
|
void ClearCache();
|
||||||
|
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
|
||||||
|
size_t SpaceRemaining() const;
|
||||||
|
|
||||||
|
/// Runs emulated code from code_ptr.
|
||||||
|
void RunCode(void* jit_state, CodePtr code_ptr) const;
|
||||||
|
/// Runs emulated code from code_ptr for a single cycle.
|
||||||
|
void StepCode(void* jit_state, CodePtr code_ptr) const;
|
||||||
|
/// Code emitter: Returns to dispatcher
|
||||||
|
void ReturnFromRunCode(bool fpscr_already_exited = false);
|
||||||
|
/// Code emitter: Returns to dispatcher, forces return to host
|
||||||
|
void ForceReturnFromRunCode(bool fpscr_already_exited = false);
|
||||||
|
/// Code emitter: Makes guest FPSR and FPCR the current FPSR and FPCR
|
||||||
|
void SwitchFpscrOnEntry();
|
||||||
|
/// Code emitter: Makes saved host FPCR the current FPCR
|
||||||
|
void SwitchFpscrOnExit();
|
||||||
|
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
|
||||||
|
/// @note this clobbers ABI caller-save registers
|
||||||
|
void UpdateTicks();
|
||||||
|
/// Code emitter: Performs a block lookup based on current state
|
||||||
|
/// @note this clobbers ABI caller-save registers
|
||||||
|
void LookupBlock();
|
||||||
|
|
||||||
|
u64 MConst(u64 lower, u64 upper = 0);
|
||||||
|
|
||||||
|
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
|
||||||
|
|
||||||
|
void PatchConstPool();
|
||||||
|
|
||||||
|
/// Far code sits far away from the near code. Execution remains primarily in near code.
|
||||||
|
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
|
||||||
|
void SwitchToFarCode();
|
||||||
|
void SwitchToNearCode();
|
||||||
|
|
||||||
|
CodePtr GetCodeBegin() const;
|
||||||
|
u8* GetRegion() const;
|
||||||
|
std::size_t GetRegionSize() const;
|
||||||
|
|
||||||
|
const void* GetReturnFromRunCodeAddress() const {
|
||||||
|
return return_from_run_code[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
const void* GetForceReturnFromRunCodeAddress() const {
|
||||||
|
return return_from_run_code[FORCE_RETURN];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Allocate memory of `size` bytes from the same block of memory the code is in.
|
||||||
|
/// This is useful for objects that need to be placed close to or within code.
|
||||||
|
/// The lifetime of this memory is the same as the code around it.
|
||||||
|
void* AllocateFromCodeSpace(size_t size);
|
||||||
|
|
||||||
|
void SetCodePtr(CodePtr code_ptr);
|
||||||
|
void EnsurePatchLocationSize(CodePtr begin, size_t size);
|
||||||
|
|
||||||
|
Arm64Gen::ARM64FloatEmitter fp_emitter;
|
||||||
|
|
||||||
|
// ABI registers
|
||||||
|
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_RETURN;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_RETURN2;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM1;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM2;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM3;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM4;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM5;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM6;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM7;
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_PARAM8;
|
||||||
|
|
||||||
|
static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
|
||||||
|
|
||||||
|
static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
|
||||||
|
|
||||||
|
// bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
|
||||||
|
|
||||||
|
JitStateInfo GetJitStateInfo() const { return jsi; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
RunCodeCallbacks cb;
|
||||||
|
JitStateInfo jsi;
|
||||||
|
|
||||||
|
bool prelude_complete = false;
|
||||||
|
CodePtr near_code_begin;
|
||||||
|
CodePtr far_code_begin;
|
||||||
|
|
||||||
|
ConstantPool constant_pool;
|
||||||
|
|
||||||
|
bool in_far_code = false;
|
||||||
|
CodePtr near_code_ptr;
|
||||||
|
CodePtr far_code_ptr;
|
||||||
|
|
||||||
|
using RunCodeFuncType = void(*)(void*, CodePtr);
|
||||||
|
RunCodeFuncType run_code = nullptr;
|
||||||
|
RunCodeFuncType step_code = nullptr;
|
||||||
|
static constexpr size_t FPSCR_ALREADY_EXITED = 1 << 0;
|
||||||
|
static constexpr size_t FORCE_RETURN = 1 << 1;
|
||||||
|
std::array<const void*, 4> return_from_run_code;
|
||||||
|
void GenRunCode();
|
||||||
|
|
||||||
|
//Xbyak::util::Cpu cpu_info;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
45
src/dynarmic/backend/A64/block_range_information.cpp
Normal file
45
src/dynarmic/backend/A64/block_range_information.cpp
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include <boost/icl/interval_map.hpp>
|
||||||
|
#include <boost/icl/interval_set.hpp>
|
||||||
|
|
||||||
|
#include "backend/A64/block_range_information.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
template <typename ProgramCounterType>
|
||||||
|
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
|
||||||
|
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ProgramCounterType>
|
||||||
|
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
|
||||||
|
block_ranges.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ProgramCounterType>
|
||||||
|
std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
|
||||||
|
std::unordered_set<IR::LocationDescriptor> erase_locations;
|
||||||
|
for (auto invalidate_interval : ranges) {
|
||||||
|
auto pair = block_ranges.equal_range(invalidate_interval);
|
||||||
|
for (auto it = pair.first; it != pair.second; ++it) {
|
||||||
|
for (const auto &descriptor : it->second) {
|
||||||
|
erase_locations.insert(descriptor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO: EFFICIENCY: Remove ranges that are to be erased.
|
||||||
|
return erase_locations;
|
||||||
|
}
|
||||||
|
|
||||||
|
template class BlockRangeInformation<u32>;
|
||||||
|
template class BlockRangeInformation<u64>;
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
29
src/dynarmic/backend/A64/block_range_information.h
Normal file
29
src/dynarmic/backend/A64/block_range_information.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include <boost/icl/interval_map.hpp>
|
||||||
|
#include <boost/icl/interval_set.hpp>
|
||||||
|
|
||||||
|
#include "frontend/ir/location_descriptor.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
template <typename ProgramCounterType>
|
||||||
|
class BlockRangeInformation {
|
||||||
|
public:
|
||||||
|
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
|
||||||
|
void ClearCache();
|
||||||
|
std::unordered_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
|
||||||
|
|
||||||
|
private:
|
||||||
|
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
41
src/dynarmic/backend/A64/callback.cpp
Normal file
41
src/dynarmic/backend/A64/callback.cpp
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/A64/callback.h"
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
Callback::~Callback() = default;
|
||||||
|
|
||||||
|
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
|
||||||
|
l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
|
||||||
|
code.QuickCallFunction(fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
|
||||||
|
l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
|
||||||
|
code.QuickCallFunction(fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
|
||||||
|
l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
|
||||||
|
code.MOVI2R(code.ABI_PARAM1, arg);
|
||||||
|
code.QuickCallFunction(fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
|
||||||
|
#if defined(WIN32) && !defined(__MINGW64__)
|
||||||
|
l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
|
||||||
|
code.MOVI2R(code.ABI_PARAM1, arg);
|
||||||
|
#else
|
||||||
|
l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
|
||||||
|
code.MOVI2R(code.ABI_PARAM2, arg);
|
||||||
|
#endif
|
||||||
|
code.QuickCallFunction(fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
54
src/dynarmic/backend/A64/callback.h
Normal file
54
src/dynarmic/backend/A64/callback.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/A64/emitter/a64_emitter.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
using RegList = std::vector<Arm64Gen::ARM64Reg>;
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
|
class Callback {
|
||||||
|
public:
|
||||||
|
virtual ~Callback();
|
||||||
|
|
||||||
|
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0;
|
||||||
|
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SimpleCallback final : public Callback {
|
||||||
|
public:
|
||||||
|
template <typename Function>
|
||||||
|
SimpleCallback(Function fn) : fn(reinterpret_cast<void (*)()>(fn)) {}
|
||||||
|
|
||||||
|
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
|
||||||
|
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void (*fn)();
|
||||||
|
};
|
||||||
|
|
||||||
|
class ArgCallback final : public Callback {
|
||||||
|
public:
|
||||||
|
template <typename Function>
|
||||||
|
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
|
||||||
|
|
||||||
|
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
|
||||||
|
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void (*fn)();
|
||||||
|
u64 arg;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
65
src/dynarmic/backend/A64/constant_pool.cpp
Normal file
65
src/dynarmic/backend/A64/constant_pool.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/constant_pool.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {}
|
||||||
|
|
||||||
|
void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
|
||||||
|
const auto constant = std::make_tuple(lower, upper);
|
||||||
|
auto iter = constant_info.find(constant);
|
||||||
|
if (iter == constant_info.end()) {
|
||||||
|
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
|
||||||
|
patch_info.emplace_back(p);
|
||||||
|
code.BRK(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
|
||||||
|
|
||||||
|
if (!(offset >= -0x40000 && offset <= 0x3FFFF)) {
|
||||||
|
constant_info.erase(constant);
|
||||||
|
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
|
||||||
|
patch_info.emplace_back(p);
|
||||||
|
code.BRK(0x42);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DEBUG_ASSERT((offset & 3) == 0);
|
||||||
|
code.LDR(Rt, offset / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConstantPool::PatchPool() {
|
||||||
|
u8* pool_ptr = code.GetWritableCodePtr();
|
||||||
|
for (PatchInfo patch : patch_info) {
|
||||||
|
auto iter = constant_info.find(patch.constant);
|
||||||
|
if (iter == constant_info.end()) {
|
||||||
|
std::memcpy(pool_ptr, &std::get<0>(patch.constant), sizeof(u64));
|
||||||
|
std::memcpy(pool_ptr + sizeof(u64), &std::get<1>(patch.constant), sizeof(u64));
|
||||||
|
iter = constant_info.emplace(patch.constant, pool_ptr).first;
|
||||||
|
pool_ptr += align_size;
|
||||||
|
}
|
||||||
|
code.SetCodePtr(patch.ptr);
|
||||||
|
|
||||||
|
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
|
||||||
|
DEBUG_ASSERT((offset & 3) == 0);
|
||||||
|
code.LDR(patch.Rt, offset / 4);
|
||||||
|
}
|
||||||
|
patch_info.clear();
|
||||||
|
code.SetCodePtr(pool_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConstantPool::Clear() {
|
||||||
|
constant_info.clear();
|
||||||
|
patch_info.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
47
src/dynarmic/backend/A64/constant_pool.h
Normal file
47
src/dynarmic/backend/A64/constant_pool.h
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
|
/// ConstantPool allocates a block of memory from BlockOfCode.
|
||||||
|
/// It places constants into this block of memory, returning the address
|
||||||
|
/// of the memory location where the constant is placed. If the constant
|
||||||
|
/// already exists, its memory location is reused.
|
||||||
|
class ConstantPool final {
|
||||||
|
public:
|
||||||
|
ConstantPool(BlockOfCode& code);
|
||||||
|
|
||||||
|
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
|
||||||
|
|
||||||
|
void PatchPool();
|
||||||
|
|
||||||
|
void Clear();
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr size_t align_size = 16; // bytes
|
||||||
|
|
||||||
|
std::map<std::tuple<u64, u64>, void*> constant_info;
|
||||||
|
|
||||||
|
BlockOfCode& code;
|
||||||
|
|
||||||
|
struct PatchInfo {
|
||||||
|
const void* ptr;
|
||||||
|
Arm64Gen::ARM64Reg Rt;
|
||||||
|
std::tuple<u64, u64> constant;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<PatchInfo> patch_info;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
77
src/dynarmic/backend/A64/devirtualize.h
Normal file
77
src/dynarmic/backend/A64/devirtualize.h
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <mp/traits/function_info.h>
|
||||||
|
|
||||||
|
#include "backend/A64/callback.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/cast_util.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace impl {
|
||||||
|
|
||||||
|
template <typename FunctionType, FunctionType mfp>
|
||||||
|
struct ThunkBuilder;
|
||||||
|
|
||||||
|
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
|
||||||
|
struct ThunkBuilder<R(C::*)(Args...), mfp> {
|
||||||
|
static R Thunk(C* this_, Args... args) {
|
||||||
|
return (this_->*mfp)(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace impl
|
||||||
|
|
||||||
|
template<auto mfp>
|
||||||
|
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
|
||||||
|
return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<auto mfp>
|
||||||
|
ArgCallback DevirtualizeWindows(mp::class_type<decltype(mfp)>* this_) {
|
||||||
|
static_assert(sizeof(mfp) == 8);
|
||||||
|
return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<auto mfp>
|
||||||
|
ArgCallback DevirtualizeAarch64(mp::class_type<decltype(mfp)>* this_) {
|
||||||
|
struct MemberFunctionPointer {
|
||||||
|
/// For a non-virtual function, this is a simple function pointer.
|
||||||
|
/// For a virtual function, it is virtual table offset in bytes.
|
||||||
|
u64 ptr;
|
||||||
|
/// Twice the required adjustment to `this`, plus 1 if the member function is virtual.
|
||||||
|
u64 adj;
|
||||||
|
} mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
|
||||||
|
|
||||||
|
static_assert(sizeof(MemberFunctionPointer) == 16);
|
||||||
|
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
|
||||||
|
|
||||||
|
u64 fn_ptr = mfp_struct.ptr;
|
||||||
|
u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj / 2;
|
||||||
|
if (mfp_struct.adj & 1) {
|
||||||
|
u64 vtable = Common::BitCastPointee<u64>(this_ptr);
|
||||||
|
fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr);
|
||||||
|
}
|
||||||
|
return ArgCallback{fn_ptr, this_ptr};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<auto mfp>
|
||||||
|
ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
|
||||||
|
#if defined(linux) || defined(__linux) || defined(__linux__)
|
||||||
|
return DevirtualizeAarch64<mfp>(this_);
|
||||||
|
#else
|
||||||
|
return DevirtualizeGeneric<mfp>(this_);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
286
src/dynarmic/backend/A64/emit_a64.cpp
Normal file
286
src/dynarmic/backend/A64/emit_a64.cpp
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/emit_a64.h"
|
||||||
|
#include "backend/A64/hostloc.h"
|
||||||
|
#include "backend/A64/perf_map.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/scope_exit.h"
|
||||||
|
#include "common/variant_util.h"
|
||||||
|
#include "frontend/ir/basic_block.h"
|
||||||
|
#include "frontend/ir/microinstruction.h"
|
||||||
|
#include "frontend/ir/opcodes.h"
|
||||||
|
|
||||||
|
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
|
||||||
|
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||||
|
: reg_alloc(reg_alloc), block(block) {}
|
||||||
|
|
||||||
|
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||||
|
block.Instructions().erase(inst);
|
||||||
|
inst->ClearArgs();
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitA64::EmitA64(BlockOfCode& code)
|
||||||
|
: code(code) {}
|
||||||
|
|
||||||
|
EmitA64::~EmitA64() = default;
|
||||||
|
|
||||||
|
std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
|
||||||
|
auto iter = block_descriptors.find(descriptor);
|
||||||
|
if (iter == block_descriptors.end())
|
||||||
|
return std::nullopt;
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
|
||||||
|
code.BRK(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
if (!args[0].IsImmediate()) {
|
||||||
|
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
|
||||||
|
auto iter = block_descriptors.find(target);
|
||||||
|
CodePtr target_code_ptr = iter != block_descriptors.end()
|
||||||
|
? iter->second.entrypoint
|
||||||
|
: code.GetReturnFromRunCodeAddress();
|
||||||
|
|
||||||
|
code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
|
||||||
|
|
||||||
|
code.MOVI2R(loc_desc_reg, target.Value());
|
||||||
|
|
||||||
|
patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
|
||||||
|
EmitPatchMovX0(target_code_ptr);
|
||||||
|
|
||||||
|
code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
|
||||||
|
code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
|
||||||
|
code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
|
||||||
|
|
||||||
|
code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
|
||||||
|
code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
|
||||||
|
code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ASSERT(args[0].IsImmediate());
|
||||||
|
u64 unique_hash_of_target = args[0].GetImmediateU64();
|
||||||
|
|
||||||
|
ctx.reg_alloc.ScratchGpr({HostLoc::X0});
|
||||||
|
Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
|
||||||
|
Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
|
||||||
|
ASSERT_FALSE("should never happen");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
|
||||||
|
ASSERT_FALSE("should never happen");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
|
||||||
|
ASSERT_FALSE("should never happen");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
|
||||||
|
ASSERT_FALSE("should never happen");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
|
||||||
|
ASSERT_FALSE("should never happen");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
|
||||||
|
Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
code.CMP(value, ZR);
|
||||||
|
code.MRS(nzcv, FIELD_NZCV);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (args[0].IsImmediate()) {
|
||||||
|
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
u32 value = 0;
|
||||||
|
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
|
||||||
|
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
|
||||||
|
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
|
||||||
|
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
|
||||||
|
code.MOVI2R(nzcv, value);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
|
} else {
|
||||||
|
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
|
||||||
|
Arm64Gen::ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
// TODO: Optimize
|
||||||
|
code.LSR(nzcv, nzcv, 28);
|
||||||
|
code.MOVI2R(scratch, 0b00010000'10000001);
|
||||||
|
code.MUL(nzcv, nzcv, scratch);
|
||||||
|
code.ANDI2R(nzcv, nzcv, 1, scratch);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitAddCycles(size_t cycles) {
|
||||||
|
ASSERT(cycles < std::numeric_limits<u32>::max());
|
||||||
|
code.SUBI2R(X26, X26, static_cast<u32>(cycles));
|
||||||
|
}
|
||||||
|
|
||||||
|
FixupBranch EmitA64::EmitCond(IR::Cond cond) {
|
||||||
|
FixupBranch label;
|
||||||
|
|
||||||
|
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
|
||||||
|
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
|
||||||
|
code._MSR(FIELD_NZCV, cpsr);
|
||||||
|
|
||||||
|
switch (cond) {
|
||||||
|
case IR::Cond::EQ: //z
|
||||||
|
label = code.B(CC_EQ);
|
||||||
|
break;
|
||||||
|
case IR::Cond::NE: //!z
|
||||||
|
label = code.B(CC_NEQ);
|
||||||
|
break;
|
||||||
|
case IR::Cond::CS: //c
|
||||||
|
label = code.B(CC_CS);
|
||||||
|
break;
|
||||||
|
case IR::Cond::CC: //!c
|
||||||
|
label = code.B(CC_CC);
|
||||||
|
break;
|
||||||
|
case IR::Cond::MI: //n
|
||||||
|
label = code.B(CC_MI);
|
||||||
|
break;
|
||||||
|
case IR::Cond::PL: //!n
|
||||||
|
label = code.B(CC_PL);
|
||||||
|
break;
|
||||||
|
case IR::Cond::VS: //v
|
||||||
|
label = code.B(CC_VS);
|
||||||
|
break;
|
||||||
|
case IR::Cond::VC: //!v
|
||||||
|
label = code.B(CC_VC);
|
||||||
|
break;
|
||||||
|
case IR::Cond::HI: //c & !z
|
||||||
|
label = code.B(CC_HI);
|
||||||
|
break;
|
||||||
|
case IR::Cond::LS: //!c | z
|
||||||
|
label = code.B(CC_LS);
|
||||||
|
break;
|
||||||
|
case IR::Cond::GE: // n == v
|
||||||
|
label = code.B(CC_GE);
|
||||||
|
break;
|
||||||
|
case IR::Cond::LT: // n != v
|
||||||
|
label = code.B(CC_LT);
|
||||||
|
break;
|
||||||
|
case IR::Cond::GT: // !z & (n == v)
|
||||||
|
label = code.B(CC_GT);
|
||||||
|
break;
|
||||||
|
case IR::Cond::LE: // z | (n != v)
|
||||||
|
label = code.B(CC_LE);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return label;
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
|
||||||
|
PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
|
||||||
|
Patch(descriptor, entrypoint);
|
||||||
|
BlockDescriptor block_desc{entrypoint, size};
|
||||||
|
|
||||||
|
block_descriptors.emplace(descriptor.Value(), block_desc);
|
||||||
|
return block_desc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||||
|
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
|
||||||
|
using T = std::decay_t<decltype(x)>;
|
||||||
|
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
|
||||||
|
this->EmitTerminalImpl(x, initial_location, is_single_step);
|
||||||
|
} else {
|
||||||
|
ASSERT_MSG(false, "Invalid terminal");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
||||||
|
const CodePtr save_code_ptr = code.GetCodePtr();
|
||||||
|
const PatchInformation& patch_info = patch_information[desc];
|
||||||
|
|
||||||
|
for (CodePtr location : patch_info.jg) {
|
||||||
|
code.SetCodePtr(location);
|
||||||
|
EmitPatchJg(desc, bb);
|
||||||
|
code.FlushIcache();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CodePtr location : patch_info.jmp) {
|
||||||
|
code.SetCodePtr(location);
|
||||||
|
EmitPatchJmp(desc, bb);
|
||||||
|
code.FlushIcache();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CodePtr location : patch_info.mov_x0) {
|
||||||
|
code.SetCodePtr(location);
|
||||||
|
EmitPatchMovX0(bb);
|
||||||
|
code.FlushIcache();
|
||||||
|
}
|
||||||
|
|
||||||
|
code.SetCodePtr(save_code_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
|
||||||
|
Patch(desc, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::ClearCache() {
|
||||||
|
block_descriptors.clear();
|
||||||
|
patch_information.clear();
|
||||||
|
|
||||||
|
PerfMapClear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
|
||||||
|
code.EnableWriting();
|
||||||
|
SCOPE_EXIT { code.DisableWriting(); };
|
||||||
|
|
||||||
|
for (const auto &descriptor : locations) {
|
||||||
|
auto it = block_descriptors.find(descriptor);
|
||||||
|
if (it == block_descriptors.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (patch_information.count(descriptor)) {
|
||||||
|
Unpatch(descriptor);
|
||||||
|
}
|
||||||
|
block_descriptors.erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
124
src/dynarmic/backend/A64/emit_a64.h
Normal file
124
src/dynarmic/backend/A64/emit_a64.h
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <optional>
|
||||||
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/A64/reg_alloc.h"
|
||||||
|
#include "backend/A64/emitter/a64_emitter.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
|
#include "common/fp/rounding_mode.h"
|
||||||
|
#include "frontend/ir/location_descriptor.h"
|
||||||
|
#include "frontend/ir/terminal.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::IR {
|
||||||
|
class Block;
|
||||||
|
class Inst;
|
||||||
|
} // namespace Dynarmic::IR
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
|
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
|
||||||
|
|
||||||
|
// Array alias that always sizes itself according to the given type T
|
||||||
|
// relative to the size of a vector register. e.g. T = u32 would result
|
||||||
|
// in a std::array<u32, 4>.
|
||||||
|
template <typename T>
|
||||||
|
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
|
||||||
|
|
||||||
|
struct EmitContext {
|
||||||
|
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||||
|
|
||||||
|
void EraseInstruction(IR::Inst* inst);
|
||||||
|
|
||||||
|
virtual FP::RoundingMode FPSCR_RMode() const = 0;
|
||||||
|
virtual u32 FPCR() const = 0;
|
||||||
|
virtual bool FPSCR_FTZ() const = 0;
|
||||||
|
virtual bool FPSCR_DN() const = 0;
|
||||||
|
virtual bool AccurateNaN() const { return true; }
|
||||||
|
|
||||||
|
RegAlloc& reg_alloc;
|
||||||
|
IR::Block& block;
|
||||||
|
};
|
||||||
|
|
||||||
|
class EmitA64 {
|
||||||
|
public:
|
||||||
|
struct BlockDescriptor {
|
||||||
|
CodePtr entrypoint; // Entrypoint of emitted code
|
||||||
|
size_t size; // Length in bytes of emitted code
|
||||||
|
};
|
||||||
|
|
||||||
|
EmitA64(BlockOfCode& code);
|
||||||
|
virtual ~EmitA64();
|
||||||
|
|
||||||
|
/// Looks up an emitted host block in the cache.
|
||||||
|
std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
|
||||||
|
|
||||||
|
/// Empties the entire cache.
|
||||||
|
virtual void ClearCache();
|
||||||
|
|
||||||
|
/// Invalidates a selection of basic blocks.
|
||||||
|
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// Microinstruction emitters
|
||||||
|
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
|
||||||
|
#define A32OPC(...)
|
||||||
|
#define A64OPC(...)
|
||||||
|
#include "backend/A64/opcodes.inc"
|
||||||
|
#undef OPCODE
|
||||||
|
#undef A32OPC
|
||||||
|
#undef A64OPC
|
||||||
|
|
||||||
|
// Helpers
|
||||||
|
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
|
||||||
|
void EmitAddCycles(size_t cycles);
|
||||||
|
FixupBranch EmitCond(IR::Cond cond);
|
||||||
|
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
|
||||||
|
void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
|
||||||
|
|
||||||
|
// Terminal instruction emitters
|
||||||
|
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
|
||||||
|
|
||||||
|
// Patching
|
||||||
|
struct PatchInformation {
|
||||||
|
std::vector<CodePtr> jg;
|
||||||
|
std::vector<CodePtr> jmp;
|
||||||
|
std::vector<CodePtr> mov_x0;
|
||||||
|
};
|
||||||
|
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
|
||||||
|
virtual void Unpatch(const IR::LocationDescriptor& target_desc);
|
||||||
|
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
|
||||||
|
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
|
||||||
|
virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
|
||||||
|
|
||||||
|
// State
|
||||||
|
BlockOfCode& code;
|
||||||
|
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
|
||||||
|
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
1128
src/dynarmic/backend/A64/emit_a64_data_processing.cpp
Normal file
1128
src/dynarmic/backend/A64/emit_a64_data_processing.cpp
Normal file
File diff suppressed because it is too large
Load Diff
471
src/dynarmic/backend/A64/emit_a64_floating_point.cpp
Normal file
471
src/dynarmic/backend/A64/emit_a64_floating_point.cpp
Normal file
@ -0,0 +1,471 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "backend/A64/abi.h"
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/emit_a64.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/fp/fpcr.h"
|
||||||
|
#include "common/fp/fpsr.h"
|
||||||
|
#include "common/fp/info.h"
|
||||||
|
#include "common/fp/op.h"
|
||||||
|
#include "common/fp/rounding_mode.h"
|
||||||
|
#include "common/fp/util.h"
|
||||||
|
#include "frontend/ir/basic_block.h"
|
||||||
|
#include "frontend/ir/microinstruction.h"
|
||||||
|
#include "frontend/ir/opcodes.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rounding_mode) {
|
||||||
|
switch (rounding_mode) {
|
||||||
|
case FP::RoundingMode::ToNearest_TieEven:
|
||||||
|
return RoundingMode::ROUND_N;
|
||||||
|
case FP::RoundingMode::TowardsPlusInfinity:
|
||||||
|
return RoundingMode::ROUND_P;
|
||||||
|
case FP::RoundingMode::TowardsMinusInfinity:
|
||||||
|
return RoundingMode::ROUND_M;
|
||||||
|
case FP::RoundingMode::TowardsZero:
|
||||||
|
return RoundingMode::ROUND_Z;
|
||||||
|
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||||
|
return RoundingMode::ROUND_A;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <size_t fsize, typename Function>
|
||||||
|
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
|
||||||
|
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
|
||||||
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.fp_emitter.*fn)(result, result);
|
||||||
|
} else {
|
||||||
|
fn(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <size_t fsize, typename Function>
|
||||||
|
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
|
||||||
|
ARM64Reg operand = ctx.reg_alloc.UseScratchFpr(args[1]);
|
||||||
|
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
|
||||||
|
operand = fsize == 32 ? EncodeRegToSingle(operand) : EncodeRegToDouble(operand);
|
||||||
|
|
||||||
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.fp_emitter.*fn)(result, result, operand);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fn(result, result, operand);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
//void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
//
|
||||||
|
// code.pand(result, code.MConst(xword, f16_non_sign_mask));
|
||||||
|
//
|
||||||
|
// ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
//}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
|
||||||
|
code.fp_emitter.FABS(result, result);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
|
||||||
|
code.fp_emitter.FABS(result, result);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
//void EmitA64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
//
|
||||||
|
// code.pxor(result, code.MConst(xword, f16_negative_zero));
|
||||||
|
//
|
||||||
|
// ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
//}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
|
||||||
|
code.fp_emitter.FNEG(result, result);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
|
||||||
|
code.fp_emitter.FNEG(result, result);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
|
||||||
|
}
|
||||||
|
void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPTwoOp<64>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
||||||
|
ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
|
||||||
|
// Fpsr's nzcv is copied across integer nzcv
|
||||||
|
code.MRS(nzcv, FIELD_NZCV);
|
||||||
|
return nzcv;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ARM64Reg reg_a = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[0]));
|
||||||
|
ARM64Reg reg_b = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
bool exc_on_qnan = args[2].GetImmediateU1();
|
||||||
|
|
||||||
|
if (exc_on_qnan) {
|
||||||
|
code.fp_emitter.FCMPE(reg_a, reg_b);
|
||||||
|
} else {
|
||||||
|
code.fp_emitter.FCMP(reg_a, reg_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg reg_a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[0]));
|
||||||
|
const ARM64Reg reg_b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
bool exc_on_qnan = args[2].GetImmediateU1();
|
||||||
|
|
||||||
|
if (exc_on_qnan) {
|
||||||
|
code.fp_emitter.FCMPE(reg_a, reg_b);
|
||||||
|
} else {
|
||||||
|
code.fp_emitter.FCMP(reg_a, reg_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
|
||||||
|
code.fp_emitter.FCVT(64, 16, result, result);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
code.fp_emitter.FCVT(32, 16, result, result);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
|
||||||
|
code.fp_emitter.FCVT(64, 32, result, result);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
code.fp_emitter.FCVT(16, 32, result, result);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
code.fp_emitter.FCVT(16, 64, result, result);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
code.fp_emitter.FCVT(32, 64, result, result);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t fsize, bool unsigned_, size_t isize>
|
||||||
|
static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
const auto round_imm = ConvertRoundingModeToA64RoundingMode(rounding_mode);
|
||||||
|
|
||||||
|
ASSERT_MSG(fbits == 0, "fixed point conversions are not supported yet");
|
||||||
|
|
||||||
|
ARM64Reg src = ctx.reg_alloc.UseScratchFpr(args[0]);
|
||||||
|
ARM64Reg result = ctx.reg_alloc.ScratchGpr();
|
||||||
|
src = fsize == 64 ? EncodeRegToDouble(src) : EncodeRegToSingle(src);
|
||||||
|
result = isize == 64 ? result : DecodeReg(result);
|
||||||
|
|
||||||
|
if constexpr (unsigned_) {
|
||||||
|
code.fp_emitter.FCVTU(result, src, round_imm);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.FCVTS(result, src, round_imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<64, false, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<64, false, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<64, true, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<64, true, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<32, false, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<32, false, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<32, true, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<32, true, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.SCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.SCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.UCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.UCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.SCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.SCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.SCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.SCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.SCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.SCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.UCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.UCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
||||||
|
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.UCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.UCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
||||||
|
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
|
||||||
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.fp_emitter.UCVTF(result, from, fbits);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.UCVTF(result, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
} // namespace Dynarmic::BackendA64
|
469
src/dynarmic/backend/A64/emit_a64_packed.cpp
Normal file
469
src/dynarmic/backend/A64/emit_a64_packed.cpp
Normal file
@ -0,0 +1,469 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/emit_a64.h"
|
||||||
|
#include "frontend/ir/microinstruction.h"
|
||||||
|
#include "frontend/ir/opcodes.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.ADD(B, sum, sum, b);
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.CMHI(B, ge, b, sum);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.SQADD(B, ge, a, b);
|
||||||
|
code.fp_emitter.CMGE_zero(B, ge, ge);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.ADD(B, a, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.ADD(H, sum, sum, b);
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.CMHI(H, ge, b, sum);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.SQADD(H, ge, a, b);
|
||||||
|
code.fp_emitter.CMGE_zero(H, ge, ge);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.ADD(H, a, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.CMHS(B, ge, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.SUB(B, a, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.SQSUB(B, ge, a, b);
|
||||||
|
code.fp_emitter.CMGE_zero(B, ge, ge);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.SUB(B, a, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.CMHS(H, ge, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.SUB(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||||
|
|
||||||
|
code.fp_emitter.SQSUB(H, ge, a, b);
|
||||||
|
code.fp_emitter.CMGE_zero(H, ge, ge);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.fp_emitter.SUB(H, a, a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UHADD(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UHADD(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SHADD(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SHADD(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UHSUB(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SHSUB(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UHSUB(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SHSUB(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
const ARM64Reg reg_a_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
|
||||||
|
const ARM64Reg reg_b_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[1]));
|
||||||
|
const ARM64Reg reg_a_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
const ARM64Reg reg_b_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
ARM64Reg reg_sum, reg_diff;
|
||||||
|
|
||||||
|
if (is_signed) {
|
||||||
|
code.SXTH(reg_a_lo, reg_a_hi);
|
||||||
|
code.SXTH(reg_b_lo, reg_b_hi);
|
||||||
|
code.ASR(reg_a_hi, reg_a_hi, 16);
|
||||||
|
code.ASR(reg_b_hi, reg_b_hi, 16);
|
||||||
|
} else {
|
||||||
|
code.UXTH(reg_a_lo, reg_a_hi);
|
||||||
|
code.UXTH(reg_b_lo, reg_b_hi);
|
||||||
|
code.LSR(reg_a_hi, reg_a_hi, 16);
|
||||||
|
code.LSR(reg_b_hi, reg_b_hi, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hi_is_sum) {
|
||||||
|
code.SUB(reg_a_lo, reg_a_lo, reg_b_hi);
|
||||||
|
code.ADD(reg_a_hi, reg_a_hi, reg_b_lo);
|
||||||
|
reg_diff = reg_a_lo;
|
||||||
|
reg_sum = reg_a_hi;
|
||||||
|
} else {
|
||||||
|
code.ADD(reg_a_lo, reg_a_lo, reg_b_hi);
|
||||||
|
code.SUB(reg_a_hi, reg_a_hi, reg_b_lo);
|
||||||
|
reg_diff = reg_a_hi;
|
||||||
|
reg_sum = reg_a_lo;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
// The reg_b registers are no longer required.
|
||||||
|
const ARM64Reg ge_sum = reg_b_hi;
|
||||||
|
const ARM64Reg ge_diff = reg_b_lo;
|
||||||
|
|
||||||
|
if (!is_signed) {
|
||||||
|
code.LSL(ge_sum, reg_sum, 15);
|
||||||
|
code.ASR(ge_sum, ge_sum, 31);
|
||||||
|
} else {
|
||||||
|
code.MVN(ge_sum, reg_sum);
|
||||||
|
code.ASR(ge_sum, ge_sum, 31);
|
||||||
|
}
|
||||||
|
code.MVN(ge_diff, reg_diff);
|
||||||
|
code.ASR(ge_diff, ge_diff, 31);
|
||||||
|
code.ANDI2R(ge_sum, ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
|
||||||
|
code.ANDI2R(ge_diff, ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
|
||||||
|
code.ORR(ge_sum, ge_sum, ge_diff);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_halving) {
|
||||||
|
code.LSR(reg_a_hi, reg_a_hi, 1);
|
||||||
|
code.LSR(reg_a_lo, reg_a_lo, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
||||||
|
// Merge them.
|
||||||
|
code.BFM(reg_a_lo, reg_a_hi, 16, 15);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, reg_a_lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, true, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, true, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, false, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, false, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, true, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, true, true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, false, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitPackedSubAdd(code, ctx, inst, false, true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UQADD(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SQADD(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UQSUB(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SQSUB(B, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UQADD(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SQADD(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UQSUB(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.SQSUB(H, a, a, b);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
code.fp_emitter.UABD(B, a, a, b);
|
||||||
|
code.fp_emitter.UADDLV(B, a, a);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[2]));
|
||||||
|
|
||||||
|
code.fp_emitter.BSL(ge, b, a);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, ge);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
167
src/dynarmic/backend/A64/emit_a64_saturation.cpp
Normal file
167
src/dynarmic/backend/A64/emit_a64_saturation.cpp
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/emit_a64.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "frontend/ir/basic_block.h"
|
||||||
|
#include "frontend/ir/microinstruction.h"
|
||||||
|
#include "frontend/ir/opcodes.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
enum class Op {
|
||||||
|
Add,
|
||||||
|
Sub,
|
||||||
|
};
|
||||||
|
|
||||||
|
template<Op op, size_t size>
|
||||||
|
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||||
|
ARM64Reg addend = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||||
|
|
||||||
|
if constexpr (op == Op::Add) {
|
||||||
|
code.fp_emitter.SQADD(size, result, result, addend);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
code.fp_emitter.SQSUB(size, result, result, addend);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
ARM64Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
code.MRS(overflow, FIELD_FPSR);
|
||||||
|
code.UBFX(overflow, overflow, 27, 1);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const size_t N = args[1].GetImmediateU8();
|
||||||
|
ASSERT(N >= 1 && N <= 32);
|
||||||
|
|
||||||
|
if (N == 32) {
|
||||||
|
if (overflow_inst) {
|
||||||
|
const auto no_overflow = IR::Value(false);
|
||||||
|
overflow_inst->ReplaceUsesWith(no_overflow);
|
||||||
|
}
|
||||||
|
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 mask = (1u << N) - 1;
|
||||||
|
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||||
|
const u32 negative_saturated_value = 1u << (N - 1);
|
||||||
|
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
||||||
|
|
||||||
|
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||||
|
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
|
||||||
|
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||||
|
code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
|
||||||
|
|
||||||
|
// Put the appropriate saturated value in result
|
||||||
|
code.MOVI2R(tmp, positive_saturated_value);
|
||||||
|
code.CMP(reg_a, tmp);
|
||||||
|
code.MOVI2R(result, sext_negative_satured_value);
|
||||||
|
code.CSEL(result, tmp, result, CC_GT);
|
||||||
|
|
||||||
|
// Do the saturation
|
||||||
|
code.CMPI2R(overflow, mask, tmp);
|
||||||
|
code.CSEL(result, reg_a, result, CC_LS);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
code.CSET(overflow, CC_HI);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const size_t N = args[1].GetImmediateU8();
|
||||||
|
ASSERT(N <= 31);
|
||||||
|
|
||||||
|
const u32 saturated_value = (1u << N) - 1;
|
||||||
|
|
||||||
|
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||||
|
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||||
|
|
||||||
|
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||||
|
code.MOVI2R(result, saturated_value);
|
||||||
|
code.CMP(reg_a, result);
|
||||||
|
code.CSEL(result, WZR, result, CC_LE);
|
||||||
|
code.CSEL(result, reg_a, result, CC_LS);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
code.CSET(overflow, CC_HI);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
3897
src/dynarmic/backend/A64/emitter/a64_emitter.cpp
Normal file
3897
src/dynarmic/backend/A64/emitter/a64_emitter.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1172
src/dynarmic/backend/A64/emitter/a64_emitter.h
Normal file
1172
src/dynarmic/backend/A64/emitter/a64_emitter.h
Normal file
File diff suppressed because it is too large
Load Diff
28
src/dynarmic/backend/A64/emitter/arm_common.h
Normal file
28
src/dynarmic/backend/A64/emitter/arm_common.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
|
||||||
|
// Licensed under GPLv2+
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
enum CCFlags {
|
||||||
|
CC_EQ = 0, // Equal
|
||||||
|
CC_NEQ, // Not equal
|
||||||
|
CC_CS, // Carry Set
|
||||||
|
CC_CC, // Carry Clear
|
||||||
|
CC_MI, // Minus (Negative)
|
||||||
|
CC_PL, // Plus
|
||||||
|
CC_VS, // Overflow
|
||||||
|
CC_VC, // No Overflow
|
||||||
|
CC_HI, // Unsigned higher
|
||||||
|
CC_LS, // Unsigned lower or same
|
||||||
|
CC_GE, // Signed greater than or equal
|
||||||
|
CC_LT, // Signed less than
|
||||||
|
CC_GT, // Signed greater than
|
||||||
|
CC_LE, // Signed less than or equal
|
||||||
|
CC_AL, // Always (unconditional) 14
|
||||||
|
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
|
||||||
|
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
|
||||||
|
};
|
||||||
|
const u32 NO_COND = 0xE0000000;
|
||||||
|
} // namespace Dynarmic::BackendA64
|
139
src/dynarmic/backend/A64/emitter/code_block.h
Normal file
139
src/dynarmic/backend/A64/emitter/code_block.h
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
|
||||||
|
// Licensed under GPLv2+
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
// Everything that needs to generate code should inherit from this.
|
||||||
|
// You get memory management for free, plus, you can use all emitter functions
|
||||||
|
// without having to prefix them with gen-> or something similar. Example
|
||||||
|
// implementation: class JIT : public CodeBlock<ARMXEmitter> {}
|
||||||
|
template <class T>
|
||||||
|
class CodeBlock : public T {
|
||||||
|
private:
|
||||||
|
// A privately used function to set the executable RAM space to something
|
||||||
|
// invalid. For debugging usefulness it should be used to set the RAM to a
|
||||||
|
// host specific breakpoint instruction
|
||||||
|
virtual void PoisonMemory() = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
u8* region = nullptr;
|
||||||
|
// Size of region we can use.
|
||||||
|
size_t region_size = 0;
|
||||||
|
// Original size of the region we allocated.
|
||||||
|
size_t total_region_size = 0;
|
||||||
|
|
||||||
|
bool m_is_child = false;
|
||||||
|
std::vector<CodeBlock*> m_children;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CodeBlock() = default;
|
||||||
|
virtual ~CodeBlock() {
|
||||||
|
if (region)
|
||||||
|
FreeCodeSpace();
|
||||||
|
}
|
||||||
|
CodeBlock(const CodeBlock&) = delete;
|
||||||
|
CodeBlock& operator=(const CodeBlock&) = delete;
|
||||||
|
CodeBlock(CodeBlock&&) = delete;
|
||||||
|
CodeBlock& operator=(CodeBlock&&) = delete;
|
||||||
|
|
||||||
|
// Call this before you generate any code.
|
||||||
|
void AllocCodeSpace(size_t size) {
|
||||||
|
region_size = size;
|
||||||
|
total_region_size = size;
|
||||||
|
#if defined(_WIN32)
|
||||||
|
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||||
|
#else
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
|
||||||
|
#else
|
||||||
|
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (ptr == MAP_FAILED)
|
||||||
|
ptr = nullptr;
|
||||||
|
#endif
|
||||||
|
ASSERT_MSG(ptr != nullptr, "Failed to allocate executable memory");
|
||||||
|
region = static_cast<u8*>(ptr);
|
||||||
|
T::SetCodePtr(region);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always clear code space with breakpoints, so that if someone accidentally
|
||||||
|
// executes uninitialized, it just breaks into the debugger.
|
||||||
|
void ClearCodeSpace() {
|
||||||
|
PoisonMemory();
|
||||||
|
ResetCodePtr();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call this when shutting down. Don't rely on the destructor, even though
|
||||||
|
// it'll do the job.
|
||||||
|
void FreeCodeSpace() {
|
||||||
|
ASSERT(!m_is_child);
|
||||||
|
ASSERT(munmap(region, total_region_size) == 0);
|
||||||
|
region = nullptr;
|
||||||
|
region_size = 0;
|
||||||
|
total_region_size = 0;
|
||||||
|
for (CodeBlock* child : m_children) {
|
||||||
|
child->region = nullptr;
|
||||||
|
child->region_size = 0;
|
||||||
|
child->total_region_size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsInSpace(const u8* ptr) const {
|
||||||
|
return ptr >= region && ptr < (region + region_size);
|
||||||
|
}
|
||||||
|
// Cannot currently be undone. Will write protect the entire code region.
|
||||||
|
// Start over if you need to change the code (call FreeCodeSpace(),
|
||||||
|
// AllocCodeSpace()).
|
||||||
|
void WriteProtect() {
|
||||||
|
ASSERT(mprotect(region, region_size, PROT_READ | PROT_EXEC) != 0);
|
||||||
|
}
|
||||||
|
void ResetCodePtr() {
|
||||||
|
T::SetCodePtr(region);
|
||||||
|
}
|
||||||
|
size_t GetSpaceLeft() const {
|
||||||
|
ASSERT(static_cast<size_t>(T::GetCodePtr() - region) < region_size);
|
||||||
|
return region_size - (T::GetCodePtr() - region);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsAlmostFull() const {
|
||||||
|
// This should be bigger than the biggest block ever.
|
||||||
|
return GetSpaceLeft() < 0x10000;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasChildren() const {
|
||||||
|
return region_size != total_region_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8* AllocChildCodeSpace(size_t child_size) {
|
||||||
|
ASSERT_MSG(child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
|
||||||
|
u8* child_region = region + region_size - child_size;
|
||||||
|
region_size -= child_size;
|
||||||
|
return child_region;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddChildCodeSpace(CodeBlock* child, size_t child_size) {
|
||||||
|
u8* child_region = AllocChildCodeSpace(child_size);
|
||||||
|
child->m_is_child = true;
|
||||||
|
child->region = child_region;
|
||||||
|
child->region_size = child_size;
|
||||||
|
child->total_region_size = child_size;
|
||||||
|
child->ResetCodePtr();
|
||||||
|
m_children.emplace_back(child);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace Dynarmic::BackendA64
|
39
src/dynarmic/backend/A64/exception_handler.h
Normal file
39
src/dynarmic/backend/A64/exception_handler.h
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include "backend/A64/a32_jitstate.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
|
struct A64State {
|
||||||
|
std::array<u64, 32> X;
|
||||||
|
std::array<std::array<u64, 2>, 16> Q;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
|
||||||
|
|
||||||
|
class ExceptionHandler final {
|
||||||
|
public:
|
||||||
|
ExceptionHandler();
|
||||||
|
~ExceptionHandler();
|
||||||
|
|
||||||
|
void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
|
||||||
|
|
||||||
|
bool SupportsFastmem() const;
|
||||||
|
private:
|
||||||
|
struct Impl;
|
||||||
|
std::unique_ptr<Impl> impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
25
src/dynarmic/backend/A64/exception_handler_generic.cpp
Normal file
25
src/dynarmic/backend/A64/exception_handler_generic.cpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/A64/exception_handler.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
struct ExceptionHandler::Impl final {
|
||||||
|
};
|
||||||
|
|
||||||
|
ExceptionHandler::ExceptionHandler() = default;
|
||||||
|
ExceptionHandler::~ExceptionHandler() = default;
|
||||||
|
|
||||||
|
void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ExceptionHandler::SupportsFastmem() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
166
src/dynarmic/backend/A64/exception_handler_posix.cpp
Normal file
166
src/dynarmic/backend/A64/exception_handler_posix.cpp
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2019 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Copyright 2008 Dolphin Emulator Project
|
||||||
|
// Licensed under GPLv2+
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <csignal>
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <sys/ucontext.h>
|
||||||
|
#else
|
||||||
|
#include <ucontext.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "backend/A64/a32_jitstate.h"
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/exception_handler.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/cast_util.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct CodeBlockInfo {
|
||||||
|
BlockOfCode* block;
|
||||||
|
std::function<void(CodePtr)> callback;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SigHandler {
|
||||||
|
public:
|
||||||
|
SigHandler();
|
||||||
|
|
||||||
|
~SigHandler();
|
||||||
|
|
||||||
|
void AddCodeBlock(CodeBlockInfo info);
|
||||||
|
|
||||||
|
void RemoveCodeBlock(CodePtr PC);
|
||||||
|
|
||||||
|
private:
|
||||||
|
auto FindCodeBlockInfo(CodePtr PC) {
|
||||||
|
return std::find_if(code_block_infos.begin(), code_block_infos.end(),
|
||||||
|
[&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize() > PC; });
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<CodeBlockInfo> code_block_infos;
|
||||||
|
std::mutex code_block_infos_mutex;
|
||||||
|
|
||||||
|
struct sigaction old_sa_segv;
|
||||||
|
struct sigaction old_sa_bus;
|
||||||
|
|
||||||
|
static void SigAction(int sig, siginfo_t* info, void* raw_context);
|
||||||
|
};
|
||||||
|
|
||||||
|
SigHandler sig_handler;
|
||||||
|
|
||||||
|
SigHandler::SigHandler() {
|
||||||
|
// Method below from dolphin.
|
||||||
|
|
||||||
|
const size_t signal_stack_size = std::max<size_t>(SIGSTKSZ, 2 * 1024 * 1024);
|
||||||
|
|
||||||
|
stack_t signal_stack;
|
||||||
|
signal_stack.ss_sp = malloc(signal_stack_size);
|
||||||
|
signal_stack.ss_size = signal_stack_size;
|
||||||
|
signal_stack.ss_flags = 0;
|
||||||
|
ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
|
||||||
|
"dynarmic: POSIX SigHandler: init failure at sigaltstack");
|
||||||
|
|
||||||
|
struct sigaction sa;
|
||||||
|
sa.sa_handler = nullptr;
|
||||||
|
sa.sa_sigaction = &SigHandler::SigAction;
|
||||||
|
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
|
||||||
|
sigemptyset(&sa.sa_mask);
|
||||||
|
sigaction(SIGSEGV, &sa, &old_sa_segv);
|
||||||
|
}
|
||||||
|
|
||||||
|
SigHandler::~SigHandler() {
|
||||||
|
// No cleanup required.
|
||||||
|
}
|
||||||
|
|
||||||
|
void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
|
||||||
|
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||||
|
ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
|
||||||
|
code_block_infos.push_back(std::move(cb));
|
||||||
|
}
|
||||||
|
|
||||||
|
void SigHandler::RemoveCodeBlock(CodePtr PC) {
|
||||||
|
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||||
|
const auto iter = FindCodeBlockInfo(PC);
|
||||||
|
ASSERT(iter != code_block_infos.end());
|
||||||
|
code_block_infos.erase(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||||
|
ASSERT(sig == SIGSEGV || sig == SIGBUS);
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
|
||||||
|
#ifdef __APPLE__
|
||||||
|
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext->__ss.__pc);
|
||||||
|
#else
|
||||||
|
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
|
||||||
|
#endif
|
||||||
|
const auto iter = sig_handler.FindCodeBlockInfo(PC);
|
||||||
|
if (iter != sig_handler.code_block_infos.end()) {
|
||||||
|
iter->callback(PC);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt::print(
|
||||||
|
stderr,
|
||||||
|
"dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
|
||||||
|
PC);
|
||||||
|
|
||||||
|
struct sigaction* retry_sa =
|
||||||
|
sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
|
||||||
|
if (retry_sa->sa_flags & SA_SIGINFO) {
|
||||||
|
retry_sa->sa_sigaction(sig, info, raw_context);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (retry_sa->sa_handler == SIG_DFL) {
|
||||||
|
signal(sig, SIG_DFL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (retry_sa->sa_handler == SIG_IGN) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
retry_sa->sa_handler(sig);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
struct ExceptionHandler::Impl final {
|
||||||
|
Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
|
||||||
|
code_begin = code.GetRegion();
|
||||||
|
sig_handler.AddCodeBlock({&code, std::move(cb)});
|
||||||
|
}
|
||||||
|
|
||||||
|
~Impl() {
|
||||||
|
sig_handler.RemoveCodeBlock(code_begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
CodePtr code_begin;
|
||||||
|
};
|
||||||
|
|
||||||
|
ExceptionHandler::ExceptionHandler() = default;
|
||||||
|
|
||||||
|
ExceptionHandler::~ExceptionHandler() = default;
|
||||||
|
|
||||||
|
void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
|
||||||
|
if (cb)
|
||||||
|
impl = std::make_unique<Impl>(code, std::move(cb));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ExceptionHandler::SupportsFastmem() const {
|
||||||
|
return static_cast<bool>(impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
21
src/dynarmic/backend/A64/hostloc.cpp
Normal file
21
src/dynarmic/backend/A64/hostloc.cpp
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/A64/hostloc.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc) {
|
||||||
|
ASSERT(HostLocIsGPR(loc));
|
||||||
|
return static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(Arm64Gen::X0) + static_cast<int>(loc));
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) {
|
||||||
|
ASSERT(HostLocIsFPR(loc));
|
||||||
|
return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
176
src/dynarmic/backend/A64/hostloc.h
Normal file
176
src/dynarmic/backend/A64/hostloc.h
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "backend/A64/emitter/a64_emitter.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
enum class HostLoc {
|
||||||
|
// Ordering of the registers is intentional. See also: HostLocToA64.
|
||||||
|
|
||||||
|
// 64bit GPR registers
|
||||||
|
X0,
|
||||||
|
X1,
|
||||||
|
X2,
|
||||||
|
X3,
|
||||||
|
X4,
|
||||||
|
X5,
|
||||||
|
X6,
|
||||||
|
X7,
|
||||||
|
X8,
|
||||||
|
X9,
|
||||||
|
X10,
|
||||||
|
X11,
|
||||||
|
X12,
|
||||||
|
X13,
|
||||||
|
X14,
|
||||||
|
X15,
|
||||||
|
X16,
|
||||||
|
X17,
|
||||||
|
X18,
|
||||||
|
X19,
|
||||||
|
X20,
|
||||||
|
X21,
|
||||||
|
X22,
|
||||||
|
X23,
|
||||||
|
X24,
|
||||||
|
X25,
|
||||||
|
X26,
|
||||||
|
X27,
|
||||||
|
X28,
|
||||||
|
X29,
|
||||||
|
X30,
|
||||||
|
|
||||||
|
SP, // 64bit stack pointer
|
||||||
|
|
||||||
|
// Qword FPR registers
|
||||||
|
Q0,
|
||||||
|
Q1,
|
||||||
|
Q2,
|
||||||
|
Q3,
|
||||||
|
Q4,
|
||||||
|
Q5,
|
||||||
|
Q6,
|
||||||
|
Q7,
|
||||||
|
Q8,
|
||||||
|
Q9,
|
||||||
|
Q10,
|
||||||
|
Q11,
|
||||||
|
Q12,
|
||||||
|
Q13,
|
||||||
|
Q14,
|
||||||
|
Q15,
|
||||||
|
Q16,
|
||||||
|
Q17,
|
||||||
|
Q18,
|
||||||
|
Q19,
|
||||||
|
Q20,
|
||||||
|
Q21,
|
||||||
|
Q22,
|
||||||
|
Q23,
|
||||||
|
Q24,
|
||||||
|
Q25,
|
||||||
|
Q26,
|
||||||
|
Q27,
|
||||||
|
Q28,
|
||||||
|
Q29,
|
||||||
|
Q30,
|
||||||
|
Q31,
|
||||||
|
|
||||||
|
FirstSpill,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
|
||||||
|
|
||||||
|
inline bool HostLocIsGPR(HostLoc reg) {
|
||||||
|
return reg >= HostLoc::X0 && reg <= HostLoc::X30;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool HostLocIsFPR(HostLoc reg) {
|
||||||
|
return reg >= HostLoc::Q0 && reg <= HostLoc::Q31;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool HostLocIsRegister(HostLoc reg) {
|
||||||
|
return HostLocIsGPR(reg) || HostLocIsFPR(reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline HostLoc HostLocRegIdx(int idx) {
|
||||||
|
ASSERT(idx >= 0 && idx <= 30);
|
||||||
|
return static_cast<HostLoc>(idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline HostLoc HostLocFprIdx(int idx) {
|
||||||
|
ASSERT(idx >= 0 && idx <= 31);
|
||||||
|
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::Q0) + idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline HostLoc HostLocSpill(size_t i) {
|
||||||
|
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool HostLocIsSpill(HostLoc reg) {
|
||||||
|
return reg >= HostLoc::FirstSpill;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t HostLocBitWidth(HostLoc loc) {
|
||||||
|
if (HostLocIsGPR(loc))
|
||||||
|
return 64;
|
||||||
|
if (HostLocIsFPR(loc))
|
||||||
|
return 128;
|
||||||
|
if (HostLocIsSpill(loc))
|
||||||
|
return 128;
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
using HostLocList = std::initializer_list<HostLoc>;
|
||||||
|
|
||||||
|
// X18 may be reserved.(Windows and iOS)
|
||||||
|
// X26 holds the cycle counter
|
||||||
|
// X27 contains an emulated memory relate pointer
|
||||||
|
// X28 used for holding the JitState.
|
||||||
|
// X30 is the link register.
|
||||||
|
// In order of desireablity based first on ABI
|
||||||
|
constexpr HostLocList any_gpr = {
|
||||||
|
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
|
||||||
|
HostLoc::X24, HostLoc::X25,
|
||||||
|
|
||||||
|
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12,
|
||||||
|
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
|
||||||
|
|
||||||
|
HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3,
|
||||||
|
HostLoc::X2, HostLoc::X1, HostLoc::X0,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr HostLocList any_fpr = {
|
||||||
|
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
|
||||||
|
HostLoc::Q14, HostLoc::Q15,
|
||||||
|
|
||||||
|
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
|
||||||
|
HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
|
||||||
|
HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
|
||||||
|
|
||||||
|
HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2,
|
||||||
|
HostLoc::Q1, HostLoc::Q0,
|
||||||
|
};
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
|
||||||
|
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc);
|
||||||
|
|
||||||
|
template <typename JitStateType>
|
||||||
|
size_t SpillToOpArg(HostLoc loc) {
|
||||||
|
ASSERT(HostLocIsSpill(loc));
|
||||||
|
|
||||||
|
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||||
|
ASSERT_MSG(i < JitStateType::SpillCount,
|
||||||
|
"Spill index greater than number of available spill locations");
|
||||||
|
|
||||||
|
return JitStateType::GetSpillLocationOffsetFromIndex(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
44
src/dynarmic/backend/A64/jitstate_info.h
Normal file
44
src/dynarmic/backend/A64/jitstate_info.h
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
struct JitStateInfo {
|
||||||
|
template <typename JitStateType>
|
||||||
|
JitStateInfo(const JitStateType&)
|
||||||
|
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
|
||||||
|
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
|
||||||
|
, offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR))
|
||||||
|
, offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr))
|
||||||
|
, offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr))
|
||||||
|
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
|
||||||
|
, rsb_ptr_mask(JitStateType::RSBPtrMask)
|
||||||
|
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
||||||
|
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
||||||
|
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
|
||||||
|
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
|
||||||
|
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
|
||||||
|
{}
|
||||||
|
|
||||||
|
const size_t offsetof_cycles_remaining;
|
||||||
|
const size_t offsetof_cycles_to_run;
|
||||||
|
const size_t offsetof_save_host_FPCR;
|
||||||
|
const size_t offsetof_guest_fpcr;
|
||||||
|
const size_t offsetof_guest_fpsr;
|
||||||
|
const size_t offsetof_rsb_ptr;
|
||||||
|
const size_t rsb_ptr_mask;
|
||||||
|
const size_t offsetof_rsb_location_descriptors;
|
||||||
|
const size_t offsetof_rsb_codeptrs;
|
||||||
|
const size_t offsetof_cpsr_nzcv;
|
||||||
|
const size_t offsetof_fpsr_exc;
|
||||||
|
const size_t offsetof_fpsr_qc;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
651
src/dynarmic/backend/A64/opcodes.inc
Normal file
651
src/dynarmic/backend/A64/opcodes.inc
Normal file
@ -0,0 +1,651 @@
|
|||||||
|
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ...
|
||||||
|
|
||||||
|
OPCODE(Void, Void, )
|
||||||
|
OPCODE(Identity, Opaque, Opaque )
|
||||||
|
OPCODE(Breakpoint, Void, )
|
||||||
|
|
||||||
|
// A32 Context getters/setters
|
||||||
|
A32OPC(SetCheckBit, Void, U1 )
|
||||||
|
A32OPC(GetRegister, U32, A32Reg )
|
||||||
|
A32OPC(GetExtendedRegister32, U32, A32ExtReg )
|
||||||
|
A32OPC(GetExtendedRegister64, U64, A32ExtReg )
|
||||||
|
A32OPC(SetRegister, Void, A32Reg, U32 )
|
||||||
|
A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 )
|
||||||
|
A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 )
|
||||||
|
A32OPC(GetCpsr, U32, )
|
||||||
|
A32OPC(SetCpsr, Void, U32 )
|
||||||
|
A32OPC(SetCpsrNZCVRaw, Void, U32 )
|
||||||
|
A32OPC(SetCpsrNZCV, Void, NZCV )
|
||||||
|
A32OPC(SetCpsrNZCVQ, Void, U32 )
|
||||||
|
A32OPC(GetNFlag, U1, )
|
||||||
|
A32OPC(SetNFlag, Void, U1 )
|
||||||
|
A32OPC(GetZFlag, U1, )
|
||||||
|
A32OPC(SetZFlag, Void, U1 )
|
||||||
|
A32OPC(GetCFlag, U1, )
|
||||||
|
A32OPC(SetCFlag, Void, U1 )
|
||||||
|
A32OPC(GetVFlag, U1, )
|
||||||
|
A32OPC(SetVFlag, Void, U1 )
|
||||||
|
A32OPC(OrQFlag, Void, U1 )
|
||||||
|
A32OPC(GetGEFlags, U32, )
|
||||||
|
A32OPC(SetGEFlags, Void, U32 )
|
||||||
|
A32OPC(SetGEFlagsCompressed, Void, U32 )
|
||||||
|
A32OPC(BXWritePC, Void, U32 )
|
||||||
|
A32OPC(CallSupervisor, Void, U32 )
|
||||||
|
A32OPC(ExceptionRaised, Void, U32, U64 )
|
||||||
|
A32OPC(GetFpscr, U32, )
|
||||||
|
A32OPC(SetFpscr, Void, U32, )
|
||||||
|
A32OPC(GetFpscrNZCV, U32, )
|
||||||
|
A32OPC(SetFpscrNZCV, Void, NZCV )
|
||||||
|
|
||||||
|
// A64 Context getters/setters
|
||||||
|
//A64OPC(SetCheckBit, Void, U1 )
|
||||||
|
//A64OPC(GetCFlag, U1, )
|
||||||
|
//A64OPC(GetNZCVRaw, U32, )
|
||||||
|
//A64OPC(SetNZCVRaw, Void, U32 )
|
||||||
|
//A64OPC(SetNZCV, Void, NZCV )
|
||||||
|
//A64OPC(GetW, U32, A64Reg )
|
||||||
|
//A64OPC(GetX, U64, A64Reg )
|
||||||
|
//A64OPC(GetS, U128, A64Vec )
|
||||||
|
//A64OPC(GetD, U128, A64Vec )
|
||||||
|
//A64OPC(GetQ, U128, A64Vec )
|
||||||
|
//A64OPC(GetSP, U64, )
|
||||||
|
//A64OPC(GetFPCR, U32, )
|
||||||
|
//A64OPC(GetFPSR, U32, )
|
||||||
|
//A64OPC(SetW, Void, A64Reg, U32 )
|
||||||
|
//A64OPC(SetX, Void, A64Reg, U64 )
|
||||||
|
//A64OPC(SetS, Void, A64Vec, U128 )
|
||||||
|
//A64OPC(SetD, Void, A64Vec, U128 )
|
||||||
|
//A64OPC(SetQ, Void, A64Vec, U128 )
|
||||||
|
//A64OPC(SetSP, Void, U64 )
|
||||||
|
//A64OPC(SetFPCR, Void, U32 )
|
||||||
|
//A64OPC(SetFPSR, Void, U32 )
|
||||||
|
//A64OPC(OrQC, Void, U1 )
|
||||||
|
//A64OPC(SetPC, Void, U64 )
|
||||||
|
//A64OPC(CallSupervisor, Void, U32 )
|
||||||
|
//A64OPC(ExceptionRaised, Void, U64, U64 )
|
||||||
|
//A64OPC(DataCacheOperationRaised, Void, U64, U64 )
|
||||||
|
//A64OPC(DataSynchronizationBarrier, Void, )
|
||||||
|
//A64OPC(DataMemoryBarrier, Void, )
|
||||||
|
//A64OPC(InstructionSynchronizationBarrier, Void, )
|
||||||
|
//A64OPC(GetCNTFRQ, U32, )
|
||||||
|
//A64OPC(GetCNTPCT, U64, )
|
||||||
|
//A64OPC(GetCTR, U32, )
|
||||||
|
//A64OPC(GetDCZID, U32, )
|
||||||
|
//A64OPC(GetTPIDR, U64, )
|
||||||
|
//A64OPC(GetTPIDRRO, U64, )
|
||||||
|
//A64OPC(SetTPIDR, Void, U64 )
|
||||||
|
|
||||||
|
// Hints
|
||||||
|
OPCODE(PushRSB, Void, U64 )
|
||||||
|
|
||||||
|
// Pseudo-operation, handled specially at final emit
|
||||||
|
OPCODE(GetCarryFromOp, U1, Opaque )
|
||||||
|
OPCODE(GetOverflowFromOp, U1, Opaque )
|
||||||
|
OPCODE(GetGEFromOp, U32, Opaque )
|
||||||
|
OPCODE(GetNZCVFromOp, NZCV, Opaque )
|
||||||
|
OPCODE(GetUpperFromOp, U128, Opaque )
|
||||||
|
OPCODE(GetLowerFromOp, U128, Opaque )
|
||||||
|
|
||||||
|
OPCODE(NZCVFromPackedFlags, NZCV, U32 )
|
||||||
|
|
||||||
|
// Calculations
|
||||||
|
OPCODE(Pack2x32To1x64, U64, U32, U32 )
|
||||||
|
//OPCODE(Pack2x64To1x128, U128, U64, U64 )
|
||||||
|
OPCODE(LeastSignificantWord, U32, U64 )
|
||||||
|
OPCODE(MostSignificantWord, U32, U64 )
|
||||||
|
OPCODE(LeastSignificantHalf, U16, U32 )
|
||||||
|
OPCODE(LeastSignificantByte, U8, U32 )
|
||||||
|
OPCODE(MostSignificantBit, U1, U32 )
|
||||||
|
OPCODE(IsZero32, U1, U32 )
|
||||||
|
OPCODE(IsZero64, U1, U64 )
|
||||||
|
OPCODE(TestBit, U1, U64, U8 )
|
||||||
|
OPCODE(ConditionalSelect32, U32, Cond, U32, U32 )
|
||||||
|
OPCODE(ConditionalSelect64, U64, Cond, U64, U64 )
|
||||||
|
OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV )
|
||||||
|
OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 )
|
||||||
|
OPCODE(LogicalShiftLeft64, U64, U64, U8 )
|
||||||
|
OPCODE(LogicalShiftRight32, U32, U32, U8, U1 )
|
||||||
|
OPCODE(LogicalShiftRight64, U64, U64, U8 )
|
||||||
|
OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 )
|
||||||
|
//OPCODE(ArithmeticShiftRight64, U64, U64, U8 )
|
||||||
|
OPCODE(RotateRight32, U32, U32, U8, U1 )
|
||||||
|
OPCODE(RotateRight64, U64, U64, U8 )
|
||||||
|
OPCODE(RotateRightExtended, U32, U32, U1 )
|
||||||
|
OPCODE(Add32, U32, U32, U32, U1 )
|
||||||
|
OPCODE(Add64, U64, U64, U64, U1 )
|
||||||
|
OPCODE(Sub32, U32, U32, U32, U1 )
|
||||||
|
OPCODE(Sub64, U64, U64, U64, U1 )
|
||||||
|
OPCODE(Mul32, U32, U32, U32 )
|
||||||
|
OPCODE(Mul64, U64, U64, U64 )
|
||||||
|
//OPCODE(SignedMultiplyHigh64, U64, U64, U64 )
|
||||||
|
//OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 )
|
||||||
|
OPCODE(UnsignedDiv32, U32, U32, U32 )
|
||||||
|
OPCODE(UnsignedDiv64, U64, U64, U64 )
|
||||||
|
OPCODE(SignedDiv32, U32, U32, U32 )
|
||||||
|
OPCODE(SignedDiv64, U64, U64, U64 )
|
||||||
|
OPCODE(And32, U32, U32, U32 )
|
||||||
|
OPCODE(And64, U64, U64, U64 )
|
||||||
|
OPCODE(Eor32, U32, U32, U32 )
|
||||||
|
OPCODE(Eor64, U64, U64, U64 )
|
||||||
|
OPCODE(Or32, U32, U32, U32 )
|
||||||
|
OPCODE(Or64, U64, U64, U64 )
|
||||||
|
OPCODE(Not32, U32, U32 )
|
||||||
|
OPCODE(Not64, U64, U64 )
|
||||||
|
OPCODE(SignExtendByteToWord, U32, U8 )
|
||||||
|
OPCODE(SignExtendHalfToWord, U32, U16 )
|
||||||
|
OPCODE(SignExtendByteToLong, U64, U8 )
|
||||||
|
OPCODE(SignExtendHalfToLong, U64, U16 )
|
||||||
|
OPCODE(SignExtendWordToLong, U64, U32 )
|
||||||
|
OPCODE(ZeroExtendByteToWord, U32, U8 )
|
||||||
|
OPCODE(ZeroExtendHalfToWord, U32, U16 )
|
||||||
|
OPCODE(ZeroExtendByteToLong, U64, U8 )
|
||||||
|
OPCODE(ZeroExtendHalfToLong, U64, U16 )
|
||||||
|
OPCODE(ZeroExtendWordToLong, U64, U32 )
|
||||||
|
//OPCODE(ZeroExtendLongToQuad, U128, U64 )
|
||||||
|
//OPCODE(ByteReverseDual, U64, U64 )
|
||||||
|
OPCODE(ByteReverseWord, U32, U32 )
|
||||||
|
OPCODE(ByteReverseHalf, U16, U16 )
|
||||||
|
OPCODE(CountLeadingZeros32, U32, U32 )
|
||||||
|
OPCODE(CountLeadingZeros64, U64, U64 )
|
||||||
|
//OPCODE(ExtractRegister32, U32, U32, U32, U8 )
|
||||||
|
//OPCODE(ExtractRegister64, U64, U64, U64, U8 )
|
||||||
|
//OPCODE(MaxSigned32, U32, U32, U32 )
|
||||||
|
//OPCODE(MaxSigned64, U64, U64, U64 )
|
||||||
|
//OPCODE(MaxUnsigned32, U32, U32, U32 )
|
||||||
|
//OPCODE(MaxUnsigned64, U64, U64, U64 )
|
||||||
|
//OPCODE(MinSigned32, U32, U32, U32 )
|
||||||
|
//OPCODE(MinSigned64, U64, U64, U64 )
|
||||||
|
//OPCODE(MinUnsigned32, U32, U32, U32 )
|
||||||
|
//OPCODE(MinUnsigned64, U64, U64, U64 )
|
||||||
|
|
||||||
|
// Saturated instructions
|
||||||
|
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
|
||||||
|
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
|
||||||
|
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
|
||||||
|
OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
|
||||||
|
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 )
|
||||||
|
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 )
|
||||||
|
OPCODE(SignedSaturatedSub8, U8, U8, U8 )
|
||||||
|
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
|
||||||
|
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
|
||||||
|
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
|
||||||
|
OPCODE(SignedSaturation, U32, U32, U8 )
|
||||||
|
//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
|
||||||
|
//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
|
||||||
|
//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
|
||||||
|
//OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 )
|
||||||
|
//OPCODE(UnsignedSaturatedSub8, U8, U8, U8 )
|
||||||
|
//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
|
||||||
|
//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
|
||||||
|
//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
|
||||||
|
OPCODE(UnsignedSaturation, U32, U32, U8 )
|
||||||
|
|
||||||
|
// Packed instructions
|
||||||
|
OPCODE(PackedAddU8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedAddS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSubU8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSubS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedAddU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedAddS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSubU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSubS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedAddSubU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedAddSubS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSubAddU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSubAddS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingAddU8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingAddS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingSubU8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingSubS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingAddU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingAddS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingSubU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingSubS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingAddSubU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingAddSubS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingSubAddU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedHalvingSubAddS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedSubU8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedSubS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedAddU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
|
||||||
|
OPCODE(PackedAbsDiffSumS8, U32, U32, U32 )
|
||||||
|
OPCODE(PackedSelect, U32, U32, U32, U32 )
|
||||||
|
|
||||||
|
// CRC instructions
|
||||||
|
//OPCODE(CRC32Castagnoli8, U32, U32, U32 )
|
||||||
|
//OPCODE(CRC32Castagnoli16, U32, U32, U32 )
|
||||||
|
//OPCODE(CRC32Castagnoli32, U32, U32, U32 )
|
||||||
|
//OPCODE(CRC32Castagnoli64, U32, U32, U64 )
|
||||||
|
//OPCODE(CRC32ISO8, U32, U32, U32 )
|
||||||
|
//OPCODE(CRC32ISO16, U32, U32, U32 )
|
||||||
|
//OPCODE(CRC32ISO32, U32, U32, U32 )
|
||||||
|
//OPCODE(CRC32ISO64, U32, U32, U64 )
|
||||||
|
|
||||||
|
// AES instructions
|
||||||
|
//OPCODE(AESDecryptSingleRound, U128, U128 )
|
||||||
|
//OPCODE(AESEncryptSingleRound, U128, U128 )
|
||||||
|
//OPCODE(AESInverseMixColumns, U128, U128 )
|
||||||
|
//OPCODE(AESMixColumns, U128, U128 )
|
||||||
|
|
||||||
|
// SM4 instructions
|
||||||
|
//OPCODE(SM4AccessSubstitutionBox, U8, U8 )
|
||||||
|
|
||||||
|
// Vector instructions
|
||||||
|
//OPCODE(VectorGetElement8, U8, U128, U8 )
|
||||||
|
//OPCODE(VectorGetElement16, U16, U128, U8 )
|
||||||
|
//OPCODE(VectorGetElement32, U32, U128, U8 )
|
||||||
|
//OPCODE(VectorGetElement64, U64, U128, U8 )
|
||||||
|
//OPCODE(VectorSetElement8, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(VectorSetElement16, U128, U128, U8, U16 )
|
||||||
|
//OPCODE(VectorSetElement32, U128, U128, U8, U32 )
|
||||||
|
//OPCODE(VectorSetElement64, U128, U128, U8, U64 )
|
||||||
|
//OPCODE(VectorAbs8, U128, U128 )
|
||||||
|
//OPCODE(VectorAbs16, U128, U128 )
|
||||||
|
//OPCODE(VectorAbs32, U128, U128 )
|
||||||
|
//OPCODE(VectorAbs64, U128, U128 )
|
||||||
|
//OPCODE(VectorAdd8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorAdd16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorAdd32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorAdd64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorAnd, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorArithmeticVShift8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorArithmeticVShift16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorArithmeticVShift32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorArithmeticVShift64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorBroadcastLower8, U128, U8 )
|
||||||
|
//OPCODE(VectorBroadcastLower16, U128, U16 )
|
||||||
|
//OPCODE(VectorBroadcastLower32, U128, U32 )
|
||||||
|
//OPCODE(VectorBroadcast8, U128, U8 )
|
||||||
|
//OPCODE(VectorBroadcast16, U128, U16 )
|
||||||
|
//OPCODE(VectorBroadcast32, U128, U32 )
|
||||||
|
//OPCODE(VectorBroadcast64, U128, U64 )
|
||||||
|
//OPCODE(VectorCountLeadingZeros8, U128, U128 )
|
||||||
|
//OPCODE(VectorCountLeadingZeros16, U128, U128 )
|
||||||
|
//OPCODE(VectorCountLeadingZeros32, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveEven8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveEven16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveEven32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveEven64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorEor, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorEqual8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorEqual16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorEqual32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorEqual64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorEqual128, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorExtract, U128, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorExtractLower, U128, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorGreaterS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorGreaterS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorGreaterS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorGreaterS64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingAddS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingAddS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingAddS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingAddU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingAddU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingAddU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingSubS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingSubS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingSubS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingSubU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingSubU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorHalvingSubU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveLower8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveLower16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveLower32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveLower64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveUpper8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveUpper16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveUpper32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorInterleaveUpper64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftRight8, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftRight16, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftRight32, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalShiftRight64, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorLogicalVShift8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorLogicalVShift16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorLogicalVShift32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorLogicalVShift64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxS64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMaxU64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinS64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMinU64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMultiply8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMultiply16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMultiply32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorMultiply64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorNarrow16, U128, U128 )
|
||||||
|
//OPCODE(VectorNarrow32, U128, U128 )
|
||||||
|
//OPCODE(VectorNarrow64, U128, U128 )
|
||||||
|
//OPCODE(VectorNot, U128, U128 )
|
||||||
|
//OPCODE(VectorOr, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddLower8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddLower16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddLower32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddSignedWiden8, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddSignedWiden16, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddSignedWiden32, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAdd8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAdd16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAdd32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedAdd64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMaxS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMaxS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMaxS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMaxU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMaxU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMaxU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMinS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMinS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMinS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMinU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMinU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPairedMinU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPolynomialMultiply8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorPopulationCount, U128, U128 )
|
||||||
|
//OPCODE(VectorReverseBits, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorShuffleWords, U128, U128, U8 )
|
||||||
|
//OPCODE(VectorSignExtend8, U128, U128 )
|
||||||
|
//OPCODE(VectorSignExtend16, U128, U128 )
|
||||||
|
//OPCODE(VectorSignExtend32, U128, U128 )
|
||||||
|
//OPCODE(VectorSignExtend64, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedMultiply16, Void, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedMultiply32, Void, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAbs8, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAbs16, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAbs32, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAbs64, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNeg8, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSub8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSub16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSub32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorSub64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
|
||||||
|
//OPCODE(VectorTableLookup, U128, U128, Table, U128 )
|
||||||
|
//OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedMultiply16, Void, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedMultiply32, Void, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedRecipEstimate, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedAccumulateSigned8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedAccumulateSigned16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedAccumulateSigned32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 )
|
||||||
|
//OPCODE(VectorZeroExtend8, U128, U128 )
|
||||||
|
//OPCODE(VectorZeroExtend16, U128, U128 )
|
||||||
|
//OPCODE(VectorZeroExtend32, U128, U128 )
|
||||||
|
//OPCODE(VectorZeroExtend64, U128, U128 )
|
||||||
|
//OPCODE(VectorZeroUpper, U128, U128 )
|
||||||
|
//OPCODE(ZeroVector, U128, )
|
||||||
|
|
||||||
|
// Floating-point operations
|
||||||
|
//OPCODE(FPAbs16, U16, U16 )
|
||||||
|
OPCODE(FPAbs32, U32, U32 )
|
||||||
|
OPCODE(FPAbs64, U64, U64 )
|
||||||
|
OPCODE(FPAdd32, U32, U32, U32 )
|
||||||
|
OPCODE(FPAdd64, U64, U64, U64 )
|
||||||
|
OPCODE(FPCompare32, NZCV, U32, U32, U1 )
|
||||||
|
OPCODE(FPCompare64, NZCV, U64, U64, U1 )
|
||||||
|
OPCODE(FPDiv32, U32, U32, U32 )
|
||||||
|
OPCODE(FPDiv64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPMax32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPMax64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPMaxNumeric32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPMaxNumeric64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPMin32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPMin64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPMinNumeric32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPMinNumeric64, U64, U64, U64 )
|
||||||
|
OPCODE(FPMul32, U32, U32, U32 )
|
||||||
|
OPCODE(FPMul64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPMulAdd16, U16, U16, U16, U16 )
|
||||||
|
//OPCODE(FPMulAdd32, U32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPMulAdd64, U64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPMulX32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPMulX64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPNeg16, U16, U16 )
|
||||||
|
OPCODE(FPNeg32, U32, U32 )
|
||||||
|
OPCODE(FPNeg64, U64, U64 )
|
||||||
|
//OPCODE(FPRecipEstimate16, U16, U16 )
|
||||||
|
//OPCODE(FPRecipEstimate32, U32, U32 )
|
||||||
|
//OPCODE(FPRecipEstimate64, U64, U64 )
|
||||||
|
//OPCODE(FPRecipExponent16, U16, U16 )
|
||||||
|
//OPCODE(FPRecipExponent32, U32, U32 )
|
||||||
|
//OPCODE(FPRecipExponent64, U64, U64 )
|
||||||
|
//OPCODE(FPRecipStepFused16, U16, U16, U16 )
|
||||||
|
//OPCODE(FPRecipStepFused32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPRecipStepFused64, U64, U64, U64 )
|
||||||
|
//OPCODE(FPRoundInt16, U16, U16, U8, U1 )
|
||||||
|
//OPCODE(FPRoundInt32, U32, U32, U8, U1 )
|
||||||
|
//OPCODE(FPRoundInt64, U64, U64, U8, U1 )
|
||||||
|
//OPCODE(FPRSqrtEstimate16, U16, U16 )
|
||||||
|
//OPCODE(FPRSqrtEstimate32, U32, U32 )
|
||||||
|
//OPCODE(FPRSqrtEstimate64, U64, U64 )
|
||||||
|
//OPCODE(FPRSqrtStepFused16, U16, U16, U16 )
|
||||||
|
//OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
|
||||||
|
//OPCODE(FPRSqrtStepFused64, U64, U64, U64 )
|
||||||
|
OPCODE(FPSqrt32, U32, U32 )
|
||||||
|
OPCODE(FPSqrt64, U64, U64 )
|
||||||
|
OPCODE(FPSub32, U32, U32, U32 )
|
||||||
|
OPCODE(FPSub64, U64, U64, U64 )
|
||||||
|
|
||||||
|
// Floating-point conversions
|
||||||
|
OPCODE(FPHalfToDouble, U64, U16, U8 )
|
||||||
|
OPCODE(FPHalfToSingle, U32, U16, U8 )
|
||||||
|
OPCODE(FPSingleToDouble, U64, U32, U8 )
|
||||||
|
OPCODE(FPSingleToHalf, U16, U32, U8 )
|
||||||
|
OPCODE(FPDoubleToHalf, U16, U64, U8 )
|
||||||
|
OPCODE(FPDoubleToSingle, U32, U64, U8 )
|
||||||
|
OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 )
|
||||||
|
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
|
||||||
|
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
|
||||||
|
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
|
||||||
|
//OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 )
|
||||||
|
//OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 )
|
||||||
|
//OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 )
|
||||||
|
//OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 )
|
||||||
|
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
|
||||||
|
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
|
||||||
|
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
|
||||||
|
OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 )
|
||||||
|
OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 )
|
||||||
|
OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 )
|
||||||
|
OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 )
|
||||||
|
OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 )
|
||||||
|
OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 )
|
||||||
|
OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 )
|
||||||
|
OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 )
|
||||||
|
OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 )
|
||||||
|
|
||||||
|
// Floating-point vector instructions
|
||||||
|
//OPCODE(FPVectorAbs16, U128, U128 )
|
||||||
|
//OPCODE(FPVectorAbs32, U128, U128 )
|
||||||
|
//OPCODE(FPVectorAbs64, U128, U128 )
|
||||||
|
//OPCODE(FPVectorAdd32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorAdd64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorDiv32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorDiv64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorEqual32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorEqual64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorGreater32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorGreater64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorGreaterEqual32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorGreaterEqual64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMax32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMax64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMin32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMin64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMul32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMul64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMulAdd16, U128, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMulX32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorMulX64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorNeg16, U128, U128 )
|
||||||
|
//OPCODE(FPVectorNeg32, U128, U128 )
|
||||||
|
//OPCODE(FPVectorNeg64, U128, U128 )
|
||||||
|
//OPCODE(FPVectorPairedAdd32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorPairedAdd64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorPairedAddLower32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorPairedAddLower64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRecipEstimate16, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRecipEstimate32, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRecipEstimate64, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRecipStepFused16, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRecipStepFused32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRecipStepFused64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
|
||||||
|
//OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
|
||||||
|
//OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
|
||||||
|
//OPCODE(FPVectorRSqrtEstimate16, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorSqrt32, U128, U128 )
|
||||||
|
//OPCODE(FPVectorSqrt64, U128, U128 )
|
||||||
|
//OPCODE(FPVectorSub32, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorSub64, U128, U128, U128 )
|
||||||
|
//OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
|
||||||
|
//OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
|
||||||
|
|
||||||
|
// A32 Memory access
|
||||||
|
A32OPC(ClearExclusive, Void, )
|
||||||
|
A32OPC(SetExclusive, Void, U32, U8 )
|
||||||
|
A32OPC(ReadMemory8, U8, U32 )
|
||||||
|
A32OPC(ReadMemory16, U16, U32 )
|
||||||
|
A32OPC(ReadMemory32, U32, U32 )
|
||||||
|
A32OPC(ReadMemory64, U64, U32 )
|
||||||
|
A32OPC(WriteMemory8, Void, U32, U8 )
|
||||||
|
A32OPC(WriteMemory16, Void, U32, U16 )
|
||||||
|
A32OPC(WriteMemory32, Void, U32, U32 )
|
||||||
|
A32OPC(WriteMemory64, Void, U32, U64 )
|
||||||
|
A32OPC(ExclusiveWriteMemory8, U32, U32, U8 )
|
||||||
|
A32OPC(ExclusiveWriteMemory16, U32, U32, U16 )
|
||||||
|
A32OPC(ExclusiveWriteMemory32, U32, U32, U32 )
|
||||||
|
A32OPC(ExclusiveWriteMemory64, U32, U32, U64 )
|
||||||
|
|
||||||
|
// A64 Memory access
|
||||||
|
//A64OPC(ClearExclusive, Void, )
|
||||||
|
//A64OPC(SetExclusive, Void, U64, U8 )
|
||||||
|
//A64OPC(ReadMemory8, U8, U64 )
|
||||||
|
//A64OPC(ReadMemory16, U16, U64 )
|
||||||
|
//A64OPC(ReadMemory32, U32, U64 )
|
||||||
|
//A64OPC(ReadMemory64, U64, U64 )
|
||||||
|
//A64OPC(ReadMemory128, U128, U64 )
|
||||||
|
//A64OPC(WriteMemory8, Void, U64, U8 )
|
||||||
|
//A64OPC(WriteMemory16, Void, U64, U16 )
|
||||||
|
//A64OPC(WriteMemory32, Void, U64, U32 )
|
||||||
|
//A64OPC(WriteMemory64, Void, U64, U64 )
|
||||||
|
//A64OPC(WriteMemory128, Void, U64, U128 )
|
||||||
|
//A64OPC(ExclusiveWriteMemory8, U32, U64, U8 )
|
||||||
|
//A64OPC(ExclusiveWriteMemory16, U32, U64, U16 )
|
||||||
|
//A64OPC(ExclusiveWriteMemory32, U32, U64, U32 )
|
||||||
|
//A64OPC(ExclusiveWriteMemory64, U32, U64, U64 )
|
||||||
|
//A64OPC(ExclusiveWriteMemory128, U32, U64, U128 )
|
||||||
|
|
||||||
|
// Coprocessor
|
||||||
|
A32OPC(CoprocInternalOperation, Void, CoprocInfo )
|
||||||
|
A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 )
|
||||||
|
A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 )
|
||||||
|
A32OPC(CoprocGetOneWord, U32, CoprocInfo )
|
||||||
|
A32OPC(CoprocGetTwoWords, U64, CoprocInfo )
|
||||||
|
A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 )
|
||||||
|
A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )
|
89
src/dynarmic/backend/A64/perf_map.cpp
Normal file
89
src/dynarmic/backend/A64/perf_map.cpp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <mutex>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
std::mutex mutex;
|
||||||
|
std::FILE* file = nullptr;
|
||||||
|
|
||||||
|
void OpenFile() {
|
||||||
|
const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
|
||||||
|
if (!perf_dir) {
|
||||||
|
file = nullptr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pid_t pid = getpid();
|
||||||
|
const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
|
||||||
|
|
||||||
|
file = std::fopen(filename.c_str(), "w");
|
||||||
|
if (!file) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::setvbuf(file, nullptr, _IONBF, 0);
|
||||||
|
}
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
|
||||||
|
std::lock_guard guard{mutex};
|
||||||
|
|
||||||
|
if (!file) {
|
||||||
|
OpenFile();
|
||||||
|
if (!file) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
|
||||||
|
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
|
||||||
|
}
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
void PerfMapClear() {
|
||||||
|
std::lock_guard guard{mutex};
|
||||||
|
|
||||||
|
if (!file) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fclose(file);
|
||||||
|
file = nullptr;
|
||||||
|
OpenFile();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
void PerfMapRegister(const void*, const void*, const std::string&) {}
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
void PerfMapClear() {}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
||||||
|
#endif
|
27
src/dynarmic/backend/A64/perf_map.h
Normal file
27
src/dynarmic/backend/A64/perf_map.h
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "common/cast_util.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
|
||||||
|
detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerfMapClear();
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendX64
|
650
src/dynarmic/backend/A64/reg_alloc.cpp
Normal file
650
src/dynarmic/backend/A64/reg_alloc.cpp
Normal file
@ -0,0 +1,650 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include <fmt/ostream.h>
|
||||||
|
|
||||||
|
#include "backend/A64/abi.h"
|
||||||
|
#include "backend/A64/reg_alloc.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
static u64 ImmediateToU64(const IR::Value& imm) {
|
||||||
|
switch (imm.GetType()) {
|
||||||
|
case IR::Type::U1:
|
||||||
|
return u64(imm.GetU1());
|
||||||
|
case IR::Type::U8:
|
||||||
|
return u64(imm.GetU8());
|
||||||
|
case IR::Type::U16:
|
||||||
|
return u64(imm.GetU16());
|
||||||
|
case IR::Type::U32:
|
||||||
|
return u64(imm.GetU32());
|
||||||
|
case IR::Type::U64:
|
||||||
|
return u64(imm.GetU64());
|
||||||
|
default:
|
||||||
|
ASSERT_FALSE("This should never happen.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool CanExchange(HostLoc a, HostLoc b) {
|
||||||
|
return HostLocIsGPR(a) && HostLocIsGPR(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minimum number of bits required to represent a type
|
||||||
|
static size_t GetBitWidth(IR::Type type) {
|
||||||
|
switch (type) {
|
||||||
|
case IR::Type::A32Reg:
|
||||||
|
case IR::Type::A32ExtReg:
|
||||||
|
case IR::Type::A64Reg:
|
||||||
|
case IR::Type::A64Vec:
|
||||||
|
case IR::Type::CoprocInfo:
|
||||||
|
case IR::Type::Cond:
|
||||||
|
case IR::Type::Void:
|
||||||
|
case IR::Type::Table:
|
||||||
|
ASSERT_FALSE("Type {} cannot be represented at runtime", type);
|
||||||
|
return 0;
|
||||||
|
case IR::Type::Opaque:
|
||||||
|
ASSERT_FALSE("Not a concrete type");
|
||||||
|
return 0;
|
||||||
|
case IR::Type::U1:
|
||||||
|
return 8;
|
||||||
|
case IR::Type::U8:
|
||||||
|
return 8;
|
||||||
|
case IR::Type::U16:
|
||||||
|
return 16;
|
||||||
|
case IR::Type::U32:
|
||||||
|
return 32;
|
||||||
|
case IR::Type::U64:
|
||||||
|
return 64;
|
||||||
|
case IR::Type::U128:
|
||||||
|
return 128;
|
||||||
|
case IR::Type::NZCVFlags:
|
||||||
|
return 32; // TODO: Update to 16 when flags optimization is done
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsValuelessType(IR::Type type) {
|
||||||
|
switch (type) {
|
||||||
|
case IR::Type::Table:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HostLocInfo::IsLocked() const {
|
||||||
|
return is_being_used_count > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HostLocInfo::IsEmpty() const {
|
||||||
|
return is_being_used_count == 0 && values.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HostLocInfo::IsLastUse() const {
|
||||||
|
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostLocInfo::ReadLock() {
|
||||||
|
ASSERT(!is_scratch);
|
||||||
|
is_being_used_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostLocInfo::WriteLock() {
|
||||||
|
ASSERT(is_being_used_count == 0);
|
||||||
|
is_being_used_count++;
|
||||||
|
is_scratch = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostLocInfo::AddArgReference() {
|
||||||
|
current_references++;
|
||||||
|
ASSERT(accumulated_uses + current_references <= total_uses);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostLocInfo::ReleaseOne() {
|
||||||
|
is_being_used_count--;
|
||||||
|
is_scratch = false;
|
||||||
|
|
||||||
|
if (current_references == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
accumulated_uses++;
|
||||||
|
current_references--;
|
||||||
|
|
||||||
|
if (current_references == 0)
|
||||||
|
ReleaseAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostLocInfo::ReleaseAll() {
|
||||||
|
accumulated_uses += current_references;
|
||||||
|
current_references = 0;
|
||||||
|
|
||||||
|
ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
|
||||||
|
|
||||||
|
if (total_uses == accumulated_uses) {
|
||||||
|
values.clear();
|
||||||
|
accumulated_uses = 0;
|
||||||
|
total_uses = 0;
|
||||||
|
max_bit_width = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_being_used_count = 0;
|
||||||
|
is_scratch = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
|
||||||
|
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t HostLocInfo::GetMaxBitWidth() const {
|
||||||
|
return max_bit_width;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostLocInfo::AddValue(IR::Inst* inst) {
|
||||||
|
values.push_back(inst);
|
||||||
|
total_uses += inst->UseCount();
|
||||||
|
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Type Argument::GetType() const {
|
||||||
|
return value.GetType();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::IsImmediate() const {
|
||||||
|
return value.IsImmediate();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::IsVoid() const {
|
||||||
|
return GetType() == IR::Type::Void;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::FitsInImmediateU32() const {
|
||||||
|
if (!IsImmediate())
|
||||||
|
return false;
|
||||||
|
u64 imm = ImmediateToU64(value);
|
||||||
|
return imm < 0x100000000;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::FitsInImmediateS32() const {
|
||||||
|
if (!IsImmediate())
|
||||||
|
return false;
|
||||||
|
s64 imm = static_cast<s64>(ImmediateToU64(value));
|
||||||
|
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::GetImmediateU1() const {
|
||||||
|
return value.GetU1();
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 Argument::GetImmediateU8() const {
|
||||||
|
u64 imm = ImmediateToU64(value);
|
||||||
|
ASSERT(imm < 0x100);
|
||||||
|
return u8(imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 Argument::GetImmediateU16() const {
|
||||||
|
u64 imm = ImmediateToU64(value);
|
||||||
|
ASSERT(imm < 0x10000);
|
||||||
|
return u16(imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Argument::GetImmediateU32() const {
|
||||||
|
u64 imm = ImmediateToU64(value);
|
||||||
|
ASSERT(imm < 0x100000000);
|
||||||
|
return u32(imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 Argument::GetImmediateS32() const {
|
||||||
|
ASSERT(FitsInImmediateS32());
|
||||||
|
u64 imm = ImmediateToU64(value);
|
||||||
|
return imm;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 Argument::GetImmediateU64() const {
|
||||||
|
return ImmediateToU64(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Cond Argument::GetImmediateCond() const {
|
||||||
|
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
|
||||||
|
return value.GetCond();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::IsInGpr() const {
|
||||||
|
if (IsImmediate())
|
||||||
|
return false;
|
||||||
|
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::IsInFpr() const {
|
||||||
|
if (IsImmediate())
|
||||||
|
return false;
|
||||||
|
return HostLocIsFPR(*reg_alloc.ValueLocation(value.GetInst()));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Argument::IsInMemory() const {
|
||||||
|
if (IsImmediate())
|
||||||
|
return false;
|
||||||
|
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
|
||||||
|
}
|
||||||
|
|
||||||
|
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||||
|
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||||
|
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||||
|
const IR::Value& arg = inst->GetArg(i);
|
||||||
|
ret[i].value = arg;
|
||||||
|
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
|
||||||
|
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
||||||
|
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg RegAlloc::UseGpr(Argument& arg) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
return HostLocToReg64(UseImpl(arg.value, any_gpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg RegAlloc::UseFpr(Argument& arg) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
return HostLocToFpr(UseImpl(arg.value, any_fpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
//OpArg RegAlloc::UseOpArg(Argument& arg) {
|
||||||
|
// return UseGpr(arg);
|
||||||
|
//}
|
||||||
|
|
||||||
|
void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
UseImpl(arg.value, {host_loc});
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg RegAlloc::UseScratchGpr(Argument& arg) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg RegAlloc::UseScratchFpr(Argument& arg) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
return HostLocToFpr(UseScratchImpl(arg.value, any_fpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
UseScratchImpl(arg.value, {host_loc});
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg) {
|
||||||
|
ASSERT(IsVector(reg) || IsGPR(reg));
|
||||||
|
HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
|
||||||
|
DefineValueImpl(inst, hostloc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
|
||||||
|
ASSERT(!arg.allocated);
|
||||||
|
arg.allocated = true;
|
||||||
|
DefineValueImpl(inst, arg.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::Release(const Arm64Gen::ARM64Reg& reg) {
|
||||||
|
ASSERT(IsVector(reg) || IsGPR(reg));
|
||||||
|
const HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
|
||||||
|
LocInfo(hostloc).ReleaseOne();
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg RegAlloc::ScratchGpr(HostLocList desired_locations) {
|
||||||
|
return HostLocToReg64(ScratchImpl(desired_locations));
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg RegAlloc::ScratchFpr(HostLocList desired_locations) {
|
||||||
|
return HostLocToFpr(ScratchImpl(desired_locations));
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
|
||||||
|
if (use_value.IsImmediate()) {
|
||||||
|
return LoadImmediate(use_value, ScratchImpl(desired_locations));
|
||||||
|
}
|
||||||
|
|
||||||
|
const IR::Inst* use_inst = use_value.GetInst();
|
||||||
|
const HostLoc current_location = *ValueLocation(use_inst);
|
||||||
|
const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
|
||||||
|
|
||||||
|
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||||
|
if (can_use_current_location) {
|
||||||
|
LocInfo(current_location).ReadLock();
|
||||||
|
return current_location;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LocInfo(current_location).IsLocked()) {
|
||||||
|
return UseScratchImpl(use_value, desired_locations);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HostLoc destination_location = SelectARegister(desired_locations);
|
||||||
|
if (max_bit_width > HostLocBitWidth(destination_location)) {
|
||||||
|
return UseScratchImpl(use_value, desired_locations);
|
||||||
|
} else if (CanExchange(destination_location, current_location)) {
|
||||||
|
Exchange(destination_location, current_location);
|
||||||
|
} else {
|
||||||
|
MoveOutOfTheWay(destination_location);
|
||||||
|
Move(destination_location, current_location);
|
||||||
|
}
|
||||||
|
LocInfo(destination_location).ReadLock();
|
||||||
|
return destination_location;
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
|
||||||
|
if (use_value.IsImmediate()) {
|
||||||
|
return LoadImmediate(use_value, ScratchImpl(desired_locations));
|
||||||
|
}
|
||||||
|
|
||||||
|
const IR::Inst* use_inst = use_value.GetInst();
|
||||||
|
const HostLoc current_location = *ValueLocation(use_inst);
|
||||||
|
const size_t bit_width = GetBitWidth(use_inst->GetType());
|
||||||
|
|
||||||
|
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||||
|
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
|
||||||
|
if (!LocInfo(current_location).IsLastUse()) {
|
||||||
|
MoveOutOfTheWay(current_location);
|
||||||
|
}
|
||||||
|
LocInfo(current_location).WriteLock();
|
||||||
|
return current_location;
|
||||||
|
}
|
||||||
|
|
||||||
|
const HostLoc destination_location = SelectARegister(desired_locations);
|
||||||
|
MoveOutOfTheWay(destination_location);
|
||||||
|
CopyToScratch(bit_width, destination_location, current_location);
|
||||||
|
LocInfo(destination_location).WriteLock();
|
||||||
|
return destination_location;
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
|
||||||
|
HostLoc location = SelectARegister(desired_locations);
|
||||||
|
MoveOutOfTheWay(location);
|
||||||
|
LocInfo(location).WriteLock();
|
||||||
|
return location;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0,
|
||||||
|
std::optional<Argument::copyable_reference> arg1,
|
||||||
|
std::optional<Argument::copyable_reference> arg2,
|
||||||
|
std::optional<Argument::copyable_reference> arg3,
|
||||||
|
std::optional<Argument::copyable_reference> arg4,
|
||||||
|
std::optional<Argument::copyable_reference> arg5,
|
||||||
|
std::optional<Argument::copyable_reference> arg6,
|
||||||
|
std::optional<Argument::copyable_reference> arg7) {
|
||||||
|
constexpr size_t args_count = 8;
|
||||||
|
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 };
|
||||||
|
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7};
|
||||||
|
|
||||||
|
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
|
||||||
|
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
|
||||||
|
|
||||||
|
for (auto hostloc : args_hostloc)
|
||||||
|
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < args_count; i++) {
|
||||||
|
if (args[i]) {
|
||||||
|
UseScratch(*args[i], args_hostloc[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < args_count; i++) {
|
||||||
|
if (!args[i]) {
|
||||||
|
// TODO: Force spill
|
||||||
|
ScratchGpr({args_hostloc[i]});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (HostLoc caller_saved : other_caller_save) {
|
||||||
|
ScratchImpl({caller_saved});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result_def) {
|
||||||
|
DefineValueImpl(result_def, ABI_RETURN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::EndOfAllocScope() {
|
||||||
|
for (auto& iter : hostloc_info) {
|
||||||
|
iter.ReleaseAll();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::AssertNoMoreUses() {
|
||||||
|
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
|
||||||
|
std::vector<HostLoc> candidates = desired_locations;
|
||||||
|
|
||||||
|
// Find all locations that have not been allocated..
|
||||||
|
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
|
||||||
|
return !this->LocInfo(loc).IsLocked();
|
||||||
|
});
|
||||||
|
candidates.erase(allocated_locs, candidates.end());
|
||||||
|
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
|
||||||
|
|
||||||
|
// Selects the best location out of the available locations.
|
||||||
|
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
|
||||||
|
|
||||||
|
std::partition(candidates.begin(), candidates.end(), [this](auto loc){
|
||||||
|
return this->LocInfo(loc).IsEmpty();
|
||||||
|
});
|
||||||
|
|
||||||
|
return candidates.front();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
|
||||||
|
for (size_t i = 0; i < hostloc_info.size(); i++)
|
||||||
|
if (hostloc_info[i].ContainsValue(value))
|
||||||
|
return static_cast<HostLoc>(i);
|
||||||
|
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
|
||||||
|
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||||
|
LocInfo(host_loc).AddValue(def_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
|
||||||
|
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||||
|
|
||||||
|
if (use_inst.IsImmediate()) {
|
||||||
|
HostLoc location = ScratchImpl(any_gpr);
|
||||||
|
DefineValueImpl(def_inst, location);
|
||||||
|
LoadImmediate(use_inst, location);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
|
||||||
|
HostLoc location = *ValueLocation(use_inst.GetInst());
|
||||||
|
DefineValueImpl(def_inst, location);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
||||||
|
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
|
||||||
|
|
||||||
|
if (HostLocIsGPR(host_loc)) {
|
||||||
|
Arm64Gen::ARM64Reg reg = HostLocToReg64(host_loc);
|
||||||
|
u64 imm_value = ImmediateToU64(imm);
|
||||||
|
code.MOVI2R(reg, imm_value);
|
||||||
|
return host_loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HostLocIsFPR(host_loc)) {
|
||||||
|
Arm64Gen::ARM64Reg reg = Arm64Gen::EncodeRegToDouble(HostLocToFpr(host_loc));
|
||||||
|
u64 imm_value = ImmediateToU64(imm);
|
||||||
|
if (imm_value == 0)
|
||||||
|
code.fp_emitter.FMOV(reg, 0);
|
||||||
|
else {
|
||||||
|
code.EmitPatchLDR(reg, imm_value);
|
||||||
|
}
|
||||||
|
return host_loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::Move(HostLoc to, HostLoc from) {
|
||||||
|
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
||||||
|
|
||||||
|
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||||
|
ASSERT(bit_width <= HostLocBitWidth(to));
|
||||||
|
|
||||||
|
if (LocInfo(from).IsEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitMove(bit_width, to, from);
|
||||||
|
|
||||||
|
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
|
||||||
|
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
|
||||||
|
|
||||||
|
EmitMove(bit_width, to, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
|
||||||
|
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
|
||||||
|
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
|
||||||
|
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
|
||||||
|
|
||||||
|
if (LocInfo(a).IsEmpty()) {
|
||||||
|
Move(a, b);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LocInfo(b).IsEmpty()) {
|
||||||
|
Move(b, a);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitExchange(a, b);
|
||||||
|
|
||||||
|
std::swap(LocInfo(a), LocInfo(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
|
||||||
|
ASSERT(!LocInfo(reg).IsLocked());
|
||||||
|
if (!LocInfo(reg).IsEmpty()) {
|
||||||
|
SpillRegister(reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::SpillRegister(HostLoc loc) {
|
||||||
|
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
||||||
|
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
|
||||||
|
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
|
||||||
|
|
||||||
|
HostLoc new_loc = FindFreeSpill();
|
||||||
|
Move(new_loc, loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLoc RegAlloc::FindFreeSpill() const {
|
||||||
|
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
|
||||||
|
HostLoc loc = static_cast<HostLoc>(i);
|
||||||
|
if (LocInfo(loc).IsEmpty())
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE("All spill locations are full");
|
||||||
|
}
|
||||||
|
|
||||||
|
HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
|
||||||
|
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
|
||||||
|
return hostloc_info[static_cast<size_t>(loc)];
|
||||||
|
}
|
||||||
|
|
||||||
|
const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
|
||||||
|
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
|
||||||
|
return hostloc_info[static_cast<size_t>(loc)];
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
|
||||||
|
if (HostLocIsFPR(to) && HostLocIsFPR(from)) {
|
||||||
|
// bit_width == 128
|
||||||
|
//mov(HostLocToFpr(to), HostLocToFpr(from));
|
||||||
|
|
||||||
|
ASSERT_FALSE("Unimplemented");
|
||||||
|
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||||
|
ASSERT(bit_width != 128);
|
||||||
|
if (bit_width == 64) {
|
||||||
|
code.MOV(HostLocToReg64(to), HostLocToReg64(from));
|
||||||
|
} else {
|
||||||
|
code.MOV(DecodeReg(HostLocToReg64(to)), DecodeReg(HostLocToReg64(from)));
|
||||||
|
}
|
||||||
|
} else if (HostLocIsFPR(to) && HostLocIsGPR(from)) {
|
||||||
|
ASSERT(bit_width != 128);
|
||||||
|
if (bit_width == 64) {
|
||||||
|
code.fp_emitter.FMOV(EncodeRegToDouble(HostLocToFpr(to)), HostLocToReg64(from));
|
||||||
|
} else {
|
||||||
|
code.fp_emitter.FMOV(EncodeRegToSingle(HostLocToFpr(to)), DecodeReg(HostLocToReg64(from)));
|
||||||
|
}
|
||||||
|
} else if (HostLocIsGPR(to) && HostLocIsFPR(from)) {
|
||||||
|
ASSERT(bit_width != 128);
|
||||||
|
if (bit_width == 64) {
|
||||||
|
code.fp_emitter.FMOV(HostLocToReg64(to), EncodeRegToDouble(HostLocToFpr(from)));
|
||||||
|
} else {
|
||||||
|
code.fp_emitter.FMOV(DecodeReg(HostLocToReg64(to)), EncodeRegToSingle(HostLocToFpr(from)));
|
||||||
|
}
|
||||||
|
} else if (HostLocIsFPR(to) && HostLocIsSpill(from)) {
|
||||||
|
s32 spill_addr = spill_to_addr(from);
|
||||||
|
// ASSERT(spill_addr.getBit() >= bit_width);
|
||||||
|
code.fp_emitter.LDR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(to), Arm64Gen::X28, spill_addr);
|
||||||
|
} else if (HostLocIsSpill(to) && HostLocIsFPR(from)) {
|
||||||
|
s32 spill_addr = spill_to_addr(to);
|
||||||
|
// ASSERT(spill_addr.getBit() >= bit_width);
|
||||||
|
code.fp_emitter.STR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(from), Arm64Gen::X28, spill_addr);
|
||||||
|
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||||
|
ASSERT(bit_width != 128);
|
||||||
|
if (bit_width == 64) {
|
||||||
|
code.LDR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(to), Arm64Gen::X28, spill_to_addr(from));
|
||||||
|
} else {
|
||||||
|
code.LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(to)), Arm64Gen::X28, spill_to_addr(from));
|
||||||
|
}
|
||||||
|
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||||
|
ASSERT(bit_width != 128);
|
||||||
|
if (bit_width == 64) {
|
||||||
|
code.STR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(from), Arm64Gen::X28, spill_to_addr(to));
|
||||||
|
} else {
|
||||||
|
code.STR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(from)), Arm64Gen::X28, spill_to_addr(to));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
|
||||||
|
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
|
||||||
|
// Is this the best way to do it?
|
||||||
|
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
|
||||||
|
code.EOR(HostLocToReg64(b), HostLocToReg64(a), HostLocToReg64(b));
|
||||||
|
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
|
||||||
|
} else if (HostLocIsFPR(a) && HostLocIsFPR(b)) {
|
||||||
|
ASSERT_FALSE("Check your code: Exchanging XMM registers is unnecessary");
|
||||||
|
} else {
|
||||||
|
ASSERT_FALSE("Invalid RegAlloc::EmitExchange");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
167
src/dynarmic/backend/A64/reg_alloc.h
Normal file
167
src/dynarmic/backend/A64/reg_alloc.h
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <functional>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
#include "backend/A64/block_of_code.h"
|
||||||
|
#include "backend/A64/hostloc.h"
|
||||||
|
//#include "backend/A64/oparg.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "frontend/ir/cond.h"
|
||||||
|
#include "frontend/ir/microinstruction.h"
|
||||||
|
#include "frontend/ir/value.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::BackendA64 {
|
||||||
|
|
||||||
|
class RegAlloc;
|
||||||
|
|
||||||
|
struct HostLocInfo {
|
||||||
|
public:
|
||||||
|
bool IsLocked() const;
|
||||||
|
bool IsEmpty() const;
|
||||||
|
bool IsLastUse() const;
|
||||||
|
|
||||||
|
void ReadLock();
|
||||||
|
void WriteLock();
|
||||||
|
void AddArgReference();
|
||||||
|
void ReleaseOne();
|
||||||
|
void ReleaseAll();
|
||||||
|
|
||||||
|
bool ContainsValue(const IR::Inst* inst) const;
|
||||||
|
size_t GetMaxBitWidth() const;
|
||||||
|
|
||||||
|
void AddValue(IR::Inst* inst);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Current instruction state
|
||||||
|
size_t is_being_used_count = 0;
|
||||||
|
bool is_scratch = false;
|
||||||
|
|
||||||
|
// Block state
|
||||||
|
size_t current_references = 0;
|
||||||
|
size_t accumulated_uses = 0;
|
||||||
|
size_t total_uses = 0;
|
||||||
|
|
||||||
|
// Value state
|
||||||
|
std::vector<IR::Inst*> values;
|
||||||
|
size_t max_bit_width = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Argument {
|
||||||
|
public:
|
||||||
|
using copyable_reference = std::reference_wrapper<Argument>;
|
||||||
|
|
||||||
|
IR::Type GetType() const;
|
||||||
|
bool IsImmediate() const;
|
||||||
|
bool IsVoid() const;
|
||||||
|
|
||||||
|
bool FitsInImmediateU32() const;
|
||||||
|
bool FitsInImmediateS32() const;
|
||||||
|
|
||||||
|
bool GetImmediateU1() const;
|
||||||
|
u8 GetImmediateU8() const;
|
||||||
|
u16 GetImmediateU16() const;
|
||||||
|
u32 GetImmediateU32() const;
|
||||||
|
u64 GetImmediateS32() const;
|
||||||
|
u64 GetImmediateU64() const;
|
||||||
|
IR::Cond GetImmediateCond() const;
|
||||||
|
|
||||||
|
/// Is this value currently in a GPR?
|
||||||
|
bool IsInGpr() const;
|
||||||
|
/// Is this value currently in a FPR?
|
||||||
|
bool IsInFpr() const;
|
||||||
|
/// Is this value currently in memory?
|
||||||
|
bool IsInMemory() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class RegAlloc;
|
||||||
|
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
||||||
|
|
||||||
|
bool allocated = false;
|
||||||
|
RegAlloc& reg_alloc;
|
||||||
|
IR::Value value;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegAlloc final {
|
||||||
|
public:
|
||||||
|
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||||
|
|
||||||
|
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<u64(HostLoc)> spill_to_addr)
|
||||||
|
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
||||||
|
|
||||||
|
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg UseGpr(Argument& arg);
|
||||||
|
Arm64Gen::ARM64Reg UseFpr(Argument& arg);
|
||||||
|
//OpArg UseOpArg(Argument& arg);
|
||||||
|
void Use(Argument& arg, HostLoc host_loc);
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg UseScratchGpr(Argument& arg);
|
||||||
|
Arm64Gen::ARM64Reg UseScratchFpr(Argument& arg);
|
||||||
|
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||||
|
|
||||||
|
void DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg);
|
||||||
|
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||||
|
|
||||||
|
void Release(const Arm64Gen::ARM64Reg& reg);
|
||||||
|
|
||||||
|
Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr);
|
||||||
|
Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr);
|
||||||
|
|
||||||
|
void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg1 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg2 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg3 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg4 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg5 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg6 = {},
|
||||||
|
std::optional<Argument::copyable_reference> arg7 = {});
|
||||||
|
|
||||||
|
// TODO: Values in host flags
|
||||||
|
|
||||||
|
void EndOfAllocScope();
|
||||||
|
|
||||||
|
void AssertNoMoreUses();
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend struct Argument;
|
||||||
|
|
||||||
|
HostLoc SelectARegister(HostLocList desired_locations) const;
|
||||||
|
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
|
||||||
|
|
||||||
|
HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
|
||||||
|
HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
|
||||||
|
HostLoc ScratchImpl(HostLocList desired_locations);
|
||||||
|
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
|
||||||
|
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
|
||||||
|
|
||||||
|
HostLoc LoadImmediate(IR::Value imm, HostLoc reg);
|
||||||
|
void Move(HostLoc to, HostLoc from);
|
||||||
|
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
|
||||||
|
void Exchange(HostLoc a, HostLoc b);
|
||||||
|
void MoveOutOfTheWay(HostLoc reg);
|
||||||
|
|
||||||
|
void SpillRegister(HostLoc loc);
|
||||||
|
HostLoc FindFreeSpill() const;
|
||||||
|
|
||||||
|
std::vector<HostLocInfo> hostloc_info;
|
||||||
|
HostLocInfo& LocInfo(HostLoc loc);
|
||||||
|
const HostLocInfo& LocInfo(HostLoc loc) const;
|
||||||
|
|
||||||
|
BlockOfCode& code;
|
||||||
|
std::function<u32(HostLoc)> spill_to_addr;
|
||||||
|
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
|
||||||
|
void EmitExchange(HostLoc a, HostLoc b);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Dynarmic::BackendA64
|
@ -44,4 +44,9 @@ u8 RecipEstimate(u64 a);
|
|||||||
*/
|
*/
|
||||||
u8 RecipSqrtEstimate(u64 a);
|
u8 RecipSqrtEstimate(u64 a);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
constexpr bool IsPow2(T imm){
|
||||||
|
return imm > 0 && (imm & (imm - 1)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
Loading…
x
Reference in New Issue
Block a user