clang-format

This commit is contained in:
SachinVin 2022-05-30 21:20:47 +05:30
parent d459cb6f59
commit 1f302f397f
37 changed files with 746 additions and 813 deletions

View File

@ -4,21 +4,21 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/a32_emit_a64.h"
#include <iterator> #include <iterator>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include <dynarmic/interface/A32/coprocessor.h>
#include <fmt/format.h> #include <fmt/format.h>
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include <mcl/bit_cast.hpp> #include <mcl/bit_cast.hpp>
#include <mcl/stdint.hpp>
#include <mcl/scope_exit.hpp> #include <mcl/scope_exit.hpp>
#include <mcl/stdint.hpp>
#include <dynarmic/interface/A32/coprocessor.h>
#include "dynarmic/backend/A64/a32_emit_a64.h"
#include "dynarmic/backend/A64/a32_jitstate.h" #include "dynarmic/backend/A64/a32_jitstate.h"
#include "dynarmic/backend/A64/abi.h" #include "dynarmic/backend/A64/abi.h"
#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/block_of_code.h"
@ -56,7 +56,8 @@ static size_t MJitStateExtReg(A32::ExtReg reg) {
ASSERT_FALSE("Should never happen."); ASSERT_FALSE("Should never happen.");
} }
A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block) : EmitContext(reg_alloc, block) {} A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: EmitContext(reg_alloc, block) {}
A32::LocationDescriptor A32EmitContext::Location() const { A32::LocationDescriptor A32EmitContext::Location() const {
return A32::LocationDescriptor{block.Location()}; return A32::LocationDescriptor{block.Location()};
@ -92,7 +93,7 @@ std::ptrdiff_t A32EmitContext::GetInstOffset(IR::Inst* inst) const {
A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface)
: EmitA64(code), config(std::move(config)), jit_interface(jit_interface) { : EmitA64(code), config(std::move(config)), jit_interface(jit_interface) {
exception_handler.Register(code, [this](CodePtr PC){FastmemCallback(PC);}); exception_handler.Register(code, [this](CodePtr PC) { FastmemCallback(PC); });
GenMemoryAccessors(); GenMemoryAccessors();
GenTerminalHandlers(); GenTerminalHandlers();
code.PreludeComplete(); code.PreludeComplete();
@ -121,7 +122,6 @@ A32EmitA64::BlockDescriptor A32EmitA64::Emit(IR::Block& block) {
// Call the relevant Emit* member function. // Call the relevant Emit* member function.
switch (inst->GetOpcode()) { switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \ #define OPCODE(name, type, ...) \
case IR::Opcode::name: \ case IR::Opcode::name: \
A32EmitA64::Emit##name(ctx, inst); \ A32EmitA64::Emit##name(ctx, inst); \
@ -343,7 +343,7 @@ void A32EmitA64::GenTerminalHandlers() {
code.BR(code.ABI_SCRATCH1); code.BR(code.ABI_SCRATCH1);
code.SetJumpTarget(fast_dispatch_cache_miss); code.SetJumpTarget(fast_dispatch_cache_miss);
code.STR(INDEX_UNSIGNED, location_descriptor_reg, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, location_descriptor) ); code.STR(INDEX_UNSIGNED, location_descriptor_reg, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, location_descriptor));
code.LookupBlock(); code.LookupBlock();
code.STR(INDEX_UNSIGNED, code.ABI_RETURN, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, code_ptr)); code.STR(INDEX_UNSIGNED, code.ABI_RETURN, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, code_ptr));
code.BR(code.ABI_RETURN); code.BR(code.ABI_RETURN);
@ -359,7 +359,6 @@ void A32EmitA64::GenTerminalHandlers() {
} }
} }
void A32EmitA64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitA64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
A32::Reg reg = inst->GetArg(0).GetA32RegRef(); A32::Reg reg = inst->GetArg(0).GetA32RegRef();
@ -418,8 +417,7 @@ void A32EmitA64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins
if (args[1].IsInFpr()) { if (args[1].IsInFpr()) {
ARM64Reg to_store = ctx.reg_alloc.UseFpr(args[1]); ARM64Reg to_store = ctx.reg_alloc.UseFpr(args[1]);
code.fp_emitter.STR(64, INDEX_UNSIGNED, to_store, X28, MJitStateExtReg(reg)); code.fp_emitter.STR(64, INDEX_UNSIGNED, to_store, X28, MJitStateExtReg(reg));
} } else {
else {
ARM64Reg to_store = ctx.reg_alloc.UseGpr(args[1]); ARM64Reg to_store = ctx.reg_alloc.UseGpr(args[1]);
code.STR(INDEX_UNSIGNED, to_store, X28, MJitStateExtReg(reg)); code.STR(INDEX_UNSIGNED, to_store, X28, MJitStateExtReg(reg));
} }
@ -813,7 +811,7 @@ void A32EmitA64::DoNotFastmem(const DoNotFastmemMarker& marker) {
InvalidateBasicBlocks({std::get<0>(marker)}); InvalidateBasicBlocks({std::get<0>(marker)});
} }
template <typename T> template<typename T>
void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) { void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) {
constexpr size_t bit_size = mcl::bitsizeof<T>; constexpr size_t bit_size = mcl::bitsizeof<T>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -858,7 +856,6 @@ void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr c
code.MOV(result, code.ABI_RETURN); code.MOV(result, code.ABI_RETURN);
}; };
if (ShouldFastmem(do_not_fastmem_marker)) { if (ShouldFastmem(do_not_fastmem_marker)) {
const CodePtr patch_location = code.GetCodePtr(); const CodePtr patch_location = code.GetCodePtr();
switch (bit_size) { switch (bit_size) {
@ -882,7 +879,7 @@ void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr c
fastmem_patch_info.emplace( fastmem_patch_info.emplace(
patch_location, patch_location,
FastmemPatchInfo{ FastmemPatchInfo{
[this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker]{ [this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker] {
CodePtr save_code_ptr = code.GetCodePtr(); CodePtr save_code_ptr = code.GetCodePtr();
code.SetCodePtr(patch_location); code.SetCodePtr(patch_location);
FixupBranch thunk = code.B(); FixupBranch thunk = code.B();
@ -904,8 +901,7 @@ void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr c
code.SwitchToNearCode(); code.SwitchToNearCode();
DoNotFastmem(do_not_fastmem_marker); DoNotFastmem(do_not_fastmem_marker);
} }});
});
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -957,7 +953,8 @@ void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr
code.STRH(DecodeReg(value), addr, vaddr); code.STRH(DecodeReg(value), addr, vaddr);
break; break;
case 32: case 32:
code.STR(DecodeReg(value), addr, vaddr);; code.STR(DecodeReg(value), addr, vaddr);
;
break; break;
case 64: case 64:
code.STR(value, addr, vaddr); code.STR(value, addr, vaddr);
@ -994,7 +991,7 @@ void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr
fastmem_patch_info.emplace( fastmem_patch_info.emplace(
patch_location, patch_location,
FastmemPatchInfo{ FastmemPatchInfo{
[this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker]{ [this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker] {
CodePtr save_code_ptr = code.GetCodePtr(); CodePtr save_code_ptr = code.GetCodePtr();
code.SetCodePtr(patch_location); code.SetCodePtr(patch_location);
FixupBranch thunk = code.B(); FixupBranch thunk = code.B();
@ -1015,8 +1012,7 @@ void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr
code.SwitchToNearCode(); code.SwitchToNearCode();
DoNotFastmem(do_not_fastmem_marker); DoNotFastmem(do_not_fastmem_marker);
} }});
});
return; return;
} }
@ -1062,7 +1058,7 @@ void A32EmitA64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<u64>(ctx, inst, write_memory_64); WriteMemory<u64>(ctx, inst, write_memory_64);
} }
template <typename T, void (A32::UserCallbacks::*fn)(A32::VAddr, T)> template<typename T, void (A32::UserCallbacks::*fn)(A32::VAddr, T)>
static void ExclusiveWrite(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config) { static void ExclusiveWrite(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config) {
auto args = reg_alloc.GetArgumentInfo(inst); auto args = reg_alloc.GetArgumentInfo(inst);
reg_alloc.HostCall(nullptr, {}, args[0], args[1]); reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
@ -1112,8 +1108,7 @@ static void EmitCoprocessorException() {
ASSERT_FALSE("Should raise coproc exception here"); ASSERT_FALSE("Should raise coproc exception here");
} }
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
reg_alloc.HostCall(inst, {}, {}, arg0, arg1); reg_alloc.HostCall(inst, {}, {}, arg0, arg1);
code.MOVP2R(code.ABI_PARAM1, jit_interface); code.MOVP2R(code.ABI_PARAM1, jit_interface);
@ -1306,7 +1301,7 @@ void A32EmitA64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
code.LDR(INDEX_UNSIGNED, DecodeReg(reg_result), reg_tmp, 0); code.LDR(INDEX_UNSIGNED, DecodeReg(reg_result), reg_tmp, 0);
code.MOVP2R(reg_tmp, source_ptrs[0]); code.MOVP2R(reg_tmp, source_ptrs[0]);
code.LDR(INDEX_UNSIGNED, DecodeReg(reg_tmp), reg_tmp, 0); code.LDR(INDEX_UNSIGNED, DecodeReg(reg_tmp), reg_tmp, 0);
code.ORR(reg_result, reg_tmp, reg_result, ArithOption{ reg_result , ST_LSL, 32}); code.ORR(reg_result, reg_tmp, reg_result, ArithOption{reg_result, ST_LSL, 32});
ctx.reg_alloc.DefineValue(inst, reg_result); ctx.reg_alloc.DefineValue(inst, reg_result);
@ -1331,7 +1326,6 @@ void A32EmitA64::EmitA32CoprocLoadWords(A32EmitContext& ctx, IR::Inst* inst) {
option = coproc_info[5]; option = coproc_info[5];
} }
std::shared_ptr<A32::Coprocessor> coproc = config.coprocessors[coproc_num]; std::shared_ptr<A32::Coprocessor> coproc = config.coprocessors[coproc_num];
if (!coproc) { if (!coproc) {
EmitCoprocessorException(); EmitCoprocessorException();
@ -1376,7 +1370,6 @@ void A32EmitA64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) {
CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]);
} }
std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const { std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const {
const A32::LocationDescriptor descriptor{ir_descriptor}; const A32::LocationDescriptor descriptor{ir_descriptor};
return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}", descriptor.TFlag() ? "t" : "a", descriptor.PC(), descriptor.EFlag() ? "be" : "le", return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}", descriptor.TFlag() ? "t" : "a", descriptor.PC(), descriptor.EFlag() ? "be" : "le",
@ -1407,7 +1400,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
} }
void A32EmitA64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { void A32EmitA64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
auto get_upper = [](const IR::LocationDescriptor &desc) -> u32 { auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32); return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
}; };
@ -1532,10 +1525,10 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDesc
void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.GetCodePtr(); const CodePtr patch_location = code.GetCodePtr();
auto long_branch_gt = [this](CodePtr ptr){ auto long_branch_gt = [this](CodePtr ptr) {
const s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(code.GetCodePtr()); const s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(code.GetCodePtr());
if((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { if ((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) {
code.B(CC_GT, ptr); code.B(CC_GT, ptr);
return; return;
} }
@ -1558,10 +1551,10 @@ void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
void A32EmitA64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void A32EmitA64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.GetCodePtr(); const CodePtr patch_location = code.GetCodePtr();
auto long_branch_gt = [this](CodePtr ptr){ auto long_branch_gt = [this](CodePtr ptr) {
const s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(code.GetCodePtr()); const s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(code.GetCodePtr());
if((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { if ((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) {
code.B(CC_EQ, ptr); code.B(CC_EQ, ptr);
return; return;
} }

View File

@ -17,9 +17,9 @@
#include "dynarmic/backend/A64/block_range_information.h" #include "dynarmic/backend/A64/block_range_information.h"
#include "dynarmic/backend/A64/emit_a64.h" #include "dynarmic/backend/A64/emit_a64.h"
#include "dynarmic/backend/A64/exception_handler.h" #include "dynarmic/backend/A64/exception_handler.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/a32.h"
#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/config.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/ir/terminal.h" #include "dynarmic/ir/terminal.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {

View File

@ -7,13 +7,12 @@
#include <memory> #include <memory>
#include <boost/icl/interval_set.hpp> #include <boost/icl/interval_set.hpp>
#include <fmt/format.h>
#include <mcl/assert.hpp>
#include <mcl/stdint.hpp>
#include <mcl/scope_exit.hpp>
#include <dynarmic/interface/A32/a32.h> #include <dynarmic/interface/A32/a32.h>
#include <dynarmic/interface/A32/context.h> #include <dynarmic/interface/A32/context.h>
#include <fmt/format.h>
#include <mcl/assert.hpp>
#include <mcl/scope_exit.hpp>
#include <mcl/stdint.hpp>
#include "dynarmic/backend/A64/a32_emit_a64.h" #include "dynarmic/backend/A64/a32_emit_a64.h"
#include "dynarmic/backend/A64/a32_jitstate.h" #include "dynarmic/backend/A64/a32_jitstate.h"
@ -21,12 +20,12 @@
#include "dynarmic/backend/A64/callback.h" #include "dynarmic/backend/A64/callback.h"
#include "dynarmic/backend/A64/devirtualize.h" #include "dynarmic/backend/A64/devirtualize.h"
#include "dynarmic/backend/A64/jitstate_info.h" #include "dynarmic/backend/A64/jitstate_info.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/common/llvm_disassemble.h" #include "dynarmic/common/llvm_disassemble.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/location_descriptor.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt/passes.h"
#include "dynarmic/common/atomic.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -46,8 +45,7 @@ struct Jit::Impl {
: block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}) : block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
, emitter(block_of_code, config, jit) , emitter(block_of_code, config, jit)
, config(std::move(config)) , config(std::move(config))
, jit_interface(jit) , jit_interface(jit) {}
{}
A32JitState jit_state; A32JitState jit_state;
BlockOfCode block_of_code; BlockOfCode block_of_code;
@ -61,7 +59,7 @@ struct Jit::Impl {
bool invalidate_entire_cache = false; bool invalidate_entire_cache = false;
HaltReason Execute() { HaltReason Execute() {
const CodePtr current_codeptr = [this]{ const CodePtr current_codeptr = [this] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
@ -174,7 +172,8 @@ private:
} }
}; };
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {} Jit::Jit(UserConfig config)
: impl(std::make_unique<Impl>(this, std::move(config))) {}
Jit::~Jit() = default; Jit::~Jit() = default;
@ -263,10 +262,15 @@ struct Context::Impl {
size_t invalid_cache_generation; size_t invalid_cache_generation;
}; };
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); } Context::Context()
: impl(std::make_unique<Context::Impl>()) {
impl->jit_state.ResetRSB();
}
Context::~Context() = default; Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {} Context::Context(const Context& ctx)
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {} : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept
: impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) { Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl; *impl = *ctx.impl;
return *this; return *this;

View File

@ -4,11 +4,12 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/a32_jitstate.h"
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include <mcl/bit_cast.hpp> #include <mcl/bit_cast.hpp>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
#include "dynarmic/backend/A64/a32_jitstate.h"
#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/block_of_code.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h"

View File

@ -7,6 +7,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
@ -14,8 +15,8 @@ namespace Dynarmic::BackendA64 {
class BlockOfCode; class BlockOfCode;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct A32JitState { struct A32JitState {
@ -102,7 +103,7 @@ struct A32JitState {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
using CodePtr = const void*; using CodePtr = const void*;

View File

@ -14,18 +14,18 @@
// 20th Sep 2018: This code was modified for Dynarmic. // 20th Sep 2018: This code was modified for Dynarmic.
#include "dynarmic/backend/A64/abi.h"
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
#include "dynarmic/backend/A64/abi.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
template<typename RegisterArrayT> template<typename RegisterArrayT>
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) { void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0 , fprs = 0; u32 gprs = 0, fprs = 0;
for (HostLoc reg : regs) { for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) { if (HostLocIsGPR(reg)) {
@ -83,4 +83,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc e
ABI_PopRegistersAndAdjustStack(code, regs); ABI_PopRegistersAndAdjustStack(code, regs);
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64

View File

@ -107,4 +107,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64

View File

@ -4,6 +4,8 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/block_of_code.h"
#include <array> #include <array>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
@ -12,18 +14,17 @@
#include "dynarmic/backend/A64/a32_jitstate.h" #include "dynarmic/backend/A64/a32_jitstate.h"
#include "dynarmic/backend/A64/abi.h" #include "dynarmic/backend/A64/abi.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/backend/A64/block_of_code.h"
#include "dynarmic/backend/A64/perf_map.h" #include "dynarmic/backend/A64/perf_map.h"
#include "dynarmic/interface/halt_reason.h"
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> # include <windows.h>
#else #else
#include <sys/mman.h> # include <sys/mman.h>
#endif #endif
#ifdef __APPLE__ #ifdef __APPLE__
#include <pthread.h> # include <pthread.h>
#endif #endif
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
@ -54,18 +55,18 @@ constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT #ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory([[maybe_unused]] const void* base, [[maybe_unused]] size_t size, bool is_executable) { void ProtectMemory([[maybe_unused]] const void* base, [[maybe_unused]] size_t size, bool is_executable) {
#if defined(_WIN32) # if defined(_WIN32)
DWORD oldProtect = 0; DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect); VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#elif defined(__APPLE__) # elif defined(__APPLE__)
pthread_jit_write_protect_np(is_executable); pthread_jit_write_protect_np(is_executable);
#else # else
static const size_t pageSize = sysconf(_SC_PAGESIZE); static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base); const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1)); const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE); const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode); mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif # endif
} }
#endif #endif
@ -154,7 +155,7 @@ void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) {
} }
void BlockOfCode::GenRunCode() { void BlockOfCode::GenRunCode() {
const u8* loop, *enter_fpscr_then_loop; const u8 *loop, *enter_fpscr_then_loop;
std::vector<Arm64Gen::FixupBranch> return_to_caller_fpscr_already_exited; std::vector<Arm64Gen::FixupBranch> return_to_caller_fpscr_already_exited;
AlignCode16(); AlignCode16();

View File

@ -14,8 +14,8 @@
#include "dynarmic/backend/A64/callback.h" #include "dynarmic/backend/A64/callback.h"
#include "dynarmic/backend/A64/constant_pool.h" #include "dynarmic/backend/A64/constant_pool.h"
#include "dynarmic/backend/A64/jitstate_info.h"
#include "dynarmic/backend/A64/emitter/a64_emitter.h" #include "dynarmic/backend/A64/emitter/a64_emitter.h"
#include "dynarmic/backend/A64/jitstate_info.h"
#include "dynarmic/interface/halt_reason.h" #include "dynarmic/interface/halt_reason.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
@ -34,7 +34,6 @@ public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi); BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
BlockOfCode(const BlockOfCode&) = delete; BlockOfCode(const BlockOfCode&) = delete;
/// Call when external emitters have finished emitting their preludes. /// Call when external emitters have finished emitting their preludes.
void PreludeComplete(); void PreludeComplete();

View File

@ -4,35 +4,33 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/block_range_information.h"
#include <unordered_set> #include <unordered_set>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp> #include <boost/icl/interval_set.hpp>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
#include <mcl/stdint.hpp>
#include "dynarmic/backend/A64/block_range_information.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType> template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) { void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location})); block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
} }
template <typename ProgramCounterType> template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() { void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear(); block_ranges.clear();
} }
template <typename ProgramCounterType> template<typename ProgramCounterType>
std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) { std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
std::unordered_set<IR::LocationDescriptor> erase_locations; std::unordered_set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) { for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval); auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) { for (auto it = pair.first; it != pair.second; ++it) {
for (const auto &descriptor : it->second) { for (const auto& descriptor : it->second) {
erase_locations.insert(descriptor); erase_locations.insert(descriptor);
} }
} }

View File

@ -15,7 +15,7 @@
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType> template<typename ProgramCounterType>
class BlockRangeInformation { class BlockRangeInformation {
public: public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location); void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);

View File

@ -5,6 +5,7 @@
*/ */
#include "dynarmic/backend/A64/callback.h" #include "dynarmic/backend/A64/callback.h"
#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/block_of_code.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
@ -38,4 +39,4 @@ void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<voi
code.QuickCallFunction(fn); code.QuickCallFunction(fn);
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64

View File

@ -23,16 +23,19 @@ class Callback {
public: public:
virtual ~Callback(); virtual ~Callback();
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0; virtual void EmitCall(
BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0; virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0;
}; };
class SimpleCallback final : public Callback { class SimpleCallback final : public Callback {
public: public:
template <typename Function> template<typename Function>
SimpleCallback(Function fn) : fn(reinterpret_cast<void (*)()>(fn)) {} SimpleCallback(Function fn)
: fn(reinterpret_cast<void (*)()>(fn)) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override; void EmitCall(
BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private: private:
@ -41,10 +44,12 @@ private:
class ArgCallback final : public Callback { class ArgCallback final : public Callback {
public: public:
template <typename Function> template<typename Function>
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {} ArgCallback(Function fn, u64 arg)
: fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override; void EmitCall(
BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private: private:

View File

@ -4,22 +4,24 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/constant_pool.h"
#include <cstring> #include <cstring>
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/block_of_code.h"
#include "dynarmic/backend/A64/constant_pool.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {} ConstantPool::ConstantPool(BlockOfCode& code)
: code(code) {}
void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) { void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
const auto constant = std::make_tuple(lower, upper); const auto constant = std::make_tuple(lower, upper);
auto iter = constant_info.find(constant); auto iter = constant_info.find(constant);
if (iter == constant_info.end()) { if (iter == constant_info.end()) {
struct PatchInfo p = { code.GetCodePtr(), Rt, constant }; struct PatchInfo p = {code.GetCodePtr(), Rt, constant};
patch_info.emplace_back(p); patch_info.emplace_back(p);
code.BRK(0); code.BRK(0);
return; return;
@ -29,7 +31,7 @@ void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
if (!(offset >= -0x40000 && offset <= 0x3FFFF)) { if (!(offset >= -0x40000 && offset <= 0x3FFFF)) {
constant_info.erase(constant); constant_info.erase(constant);
struct PatchInfo p = { code.GetCodePtr(), Rt, constant }; struct PatchInfo p = {code.GetCodePtr(), Rt, constant};
patch_info.emplace_back(p); patch_info.emplace_back(p);
code.BRK(0x42); code.BRK(0x42);
return; return;

View File

@ -9,10 +9,10 @@
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <mcl/type_traits/function_info.hpp>
#include <mcl/stdint.hpp>
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include <mcl/bit_cast.hpp> #include <mcl/bit_cast.hpp>
#include <mcl/stdint.hpp>
#include <mcl/type_traits/function_info.hpp>
#include "dynarmic/backend/A64/callback.h" #include "dynarmic/backend/A64/callback.h"
@ -20,11 +20,11 @@ namespace Dynarmic::BackendA64 {
namespace impl { namespace impl {
template <typename FunctionType, FunctionType mfp> template<typename FunctionType, FunctionType mfp>
struct ThunkBuilder; struct ThunkBuilder;
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)> template<typename C, typename R, typename... Args, R (C::*mfp)(Args...)>
struct ThunkBuilder<R(C::*)(Args...), mfp> { struct ThunkBuilder<R (C::*)(Args...), mfp> {
static R Thunk(C* this_, Args... args) { static R Thunk(C* this_, Args... args) {
return (this_->*mfp)(std::forward<Args>(args)...); return (this_->*mfp)(std::forward<Args>(args)...);
} }
@ -33,7 +33,7 @@ struct ThunkBuilder<R(C::*)(Args...), mfp> {
} // namespace impl } // namespace impl
template<auto mfp> template<auto mfp>
ArgCallback DevirtualizeGeneric(mcl::class_type<decltype(mfp)> * this_) { ArgCallback DevirtualizeGeneric(mcl::class_type<decltype(mfp)>* this_) {
return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)}; return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
} }

View File

@ -4,16 +4,17 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/emit_a64.h"
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include <mcl/bit/bit_field.hpp> #include <mcl/bit/bit_field.hpp>
#include <mcl/stdint.hpp>
#include <mcl/scope_exit.hpp> #include <mcl/scope_exit.hpp>
#include <mcl/stdint.hpp>
#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/block_of_code.h"
#include "dynarmic/backend/A64/emit_a64.h"
#include "dynarmic/backend/A64/hostloc.h" #include "dynarmic/backend/A64/hostloc.h"
#include "dynarmic/backend/A64/perf_map.h" #include "dynarmic/backend/A64/perf_map.h"
#include "dynarmic/backend/A64/reg_alloc.h" #include "dynarmic/backend/A64/reg_alloc.h"
@ -278,7 +279,7 @@ void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescrip
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) { for (const auto& descriptor : locations) {
auto it = block_descriptors.find(descriptor); auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) { if (it == block_descriptors.end()) {
continue; continue;

View File

@ -38,7 +38,7 @@ using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T // Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result // relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>. // in a std::array<u32, 4>.
template <typename T> template<typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>; using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>;
struct EmitContext { struct EmitContext {
@ -124,4 +124,4 @@ protected:
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information; std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
}; };
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64

View File

@ -8,8 +8,8 @@
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/block_of_code.h"
#include "dynarmic/backend/A64/reg_alloc.h"
#include "dynarmic/backend/A64/emit_a64.h" #include "dynarmic/backend/A64/emit_a64.h"
#include "dynarmic/backend/A64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h" #include "dynarmic/ir/opcodes.h"
@ -82,7 +82,7 @@ void EmitA64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
// TODO: Flag optimization // TODO: Flag optimization
code.LSR(result,result, 31); code.LSR(result, result, 31);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -128,7 +128,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
switch (args[0].GetImmediateCond()) { switch (args[0].GetImmediateCond()) {
case IR::Cond::EQ: //z case IR::Cond::EQ: //z
code.CSEL(else_, else_, then_ , CC_EQ); code.CSEL(else_, else_, then_, CC_EQ);
break; break;
case IR::Cond::NE: //!z case IR::Cond::NE: //!z
code.CSEL(else_, else_, then_, CC_NEQ); code.CSEL(else_, else_, then_, CC_NEQ);
@ -137,7 +137,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.CSEL(else_, else_, then_, CC_CS); code.CSEL(else_, else_, then_, CC_CS);
break; break;
case IR::Cond::CC: //!c case IR::Cond::CC: //!c
code.CSEL(else_, else_, then_ , CC_CC); code.CSEL(else_, else_, then_, CC_CC);
break; break;
case IR::Cond::MI: //n case IR::Cond::MI: //n
code.CSEL(else_, else_, then_, CC_MI); code.CSEL(else_, else_, then_, CC_MI);
@ -344,7 +344,7 @@ void EmitA64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
} else if (shift < 32) { } else if (shift < 32) {
code.LSR(carry, result, shift - 1); code.LSR(carry, result, shift - 1);
code.ANDI2R(carry, carry, 1); code.ANDI2R(carry, carry, 1);
code.LSR(result,result, shift); code.LSR(result, result, shift);
} else if (shift == 32) { } else if (shift == 32) {
code.UBFX(carry, result, 31, 1); code.UBFX(carry, result, 31, 1);
code.MOV(result, WZR); code.MOV(result, WZR);
@ -706,7 +706,7 @@ static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
code.CMP(DecodeReg(op_arg), DecodeReg(op_arg)); code.CMP(DecodeReg(op_arg), DecodeReg(op_arg));
code.ADCS(result, result, op_arg); code.ADCS(result, result, op_arg);
} else { } else {
code.ADDS(result,result, op_arg); code.ADDS(result, result, op_arg);
} }
} else { } else {
code.CMPI2R(DecodeReg(carry), 1); code.CMPI2R(DecodeReg(carry), 1);
@ -782,11 +782,11 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
code.SUBS(result, result, op_arg); code.SUBS(result, result, op_arg);
} else { } else {
code.ADDSI2R(DecodeReg(op_arg), DecodeReg(op_arg), 0); // Clear carry code.ADDSI2R(DecodeReg(op_arg), DecodeReg(op_arg), 0); // Clear carry
code.SBCS(result,result, op_arg); code.SBCS(result, result, op_arg);
} }
} else { } else {
code.CMPI2R(DecodeReg(carry), 0x1); code.CMPI2R(DecodeReg(carry), 0x1);
code.SBCS(result,result, op_arg); code.SBCS(result, result, op_arg);
} }
} }
@ -839,7 +839,6 @@ void EmitA64::EmitMul64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitA64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -880,7 +879,6 @@ void EmitA64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitA64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -905,8 +903,7 @@ void EmitA64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) {
if (args[1].IsImmediate()) { if (args[1].IsImmediate()) {
u32 op_arg = args[1].GetImmediateU32(); u32 op_arg = args[1].GetImmediateU32();
code.ANDI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); code.ANDI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr());
} } else {
else {
Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]);
code.AND(result, result, op_arg); code.AND(result, result, op_arg);
} }
@ -938,8 +935,7 @@ void EmitA64::EmitEor64(EmitContext& ctx, IR::Inst* inst) {
if (args[1].IsImmediate()) { if (args[1].IsImmediate()) {
u32 op_arg = args[1].GetImmediateU32(); u32 op_arg = args[1].GetImmediateU32();
code.EORI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); code.EORI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr());
} } else {
else {
Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]);
code.EOR(result, result, op_arg); code.EOR(result, result, op_arg);
} }
@ -957,7 +953,7 @@ void EmitA64::EmitOr32(EmitContext& ctx, IR::Inst* inst) {
code.ORRI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); code.ORRI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr());
} else { } else {
Arm64Gen::ARM64Reg op_arg = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); Arm64Gen::ARM64Reg op_arg = DecodeReg(ctx.reg_alloc.UseGpr(args[1]));
code.ORR(result, result , op_arg); code.ORR(result, result, op_arg);
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -971,8 +967,7 @@ void EmitA64::EmitOr64(EmitContext& ctx, IR::Inst* inst) {
if (args[1].IsImmediate()) { if (args[1].IsImmediate()) {
u32 op_arg = args[1].GetImmediateU32(); u32 op_arg = args[1].GetImmediateU32();
code.ORRI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); code.ORRI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr());
} } else {
else {
Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]);
code.ORR(result, result, op_arg); code.ORR(result, result, op_arg);
} }
@ -1001,8 +996,7 @@ void EmitA64::EmitNot64(EmitContext& ctx, IR::Inst* inst) {
if (args[0].IsImmediate()) { if (args[0].IsImmediate()) {
result = ctx.reg_alloc.ScratchGpr(); result = ctx.reg_alloc.ScratchGpr();
code.MOVI2R(result, u32(~args[0].GetImmediateU32())); code.MOVI2R(result, u32(~args[0].GetImmediateU32()));
} } else {
else {
result = ctx.reg_alloc.UseScratchGpr(args[0]); result = ctx.reg_alloc.UseScratchGpr(args[0]);
code.MVN(result, result); code.MVN(result, result);
} }

View File

@ -46,7 +46,7 @@ Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rou
} }
} }
template <size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -61,7 +61,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -72,8 +72,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result, operand); (code.fp_emitter.*fn)(result, result, operand);
} } else {
else {
fn(result, result, operand); fn(result, result, operand);
} }
@ -136,27 +135,27 @@ void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD); FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
} }
void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD); FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
} }
void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV); FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
} }
void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV); FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
} }
void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL); FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
} }
void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL); FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
} }
void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT); FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
@ -167,11 +166,11 @@ void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB); FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
} }
void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB); FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
} }
static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
@ -276,13 +275,11 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if constexpr (unsigned_) { if constexpr (unsigned_) {
code.fp_emitter.FCVTU(result, src, round_imm); code.fp_emitter.FCVTU(result, src, round_imm);
} } else {
else {
code.fp_emitter.FCVTS(result, src, round_imm); code.fp_emitter.FCVTS(result, src, round_imm);
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
@ -328,8 +325,7 @@ void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits); code.fp_emitter.SCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.SCVTF(result, from); code.fp_emitter.SCVTF(result, from);
} }
@ -347,8 +343,7 @@ void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits); code.fp_emitter.UCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.UCVTF(result, from); code.fp_emitter.UCVTF(result, from);
} }
@ -366,8 +361,7 @@ void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits); code.fp_emitter.SCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.SCVTF(result, from); code.fp_emitter.SCVTF(result, from);
} }
@ -385,8 +379,7 @@ void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits); code.fp_emitter.SCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.SCVTF(result, from); code.fp_emitter.SCVTF(result, from);
} }
@ -404,8 +397,7 @@ void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits); code.fp_emitter.SCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.SCVTF(result, from); code.fp_emitter.SCVTF(result, from);
} }
@ -423,8 +415,7 @@ void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits); code.fp_emitter.UCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.UCVTF(result, from); code.fp_emitter.UCVTF(result, from);
} }
@ -434,7 +425,6 @@ void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
@ -443,8 +433,7 @@ void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits); code.fp_emitter.UCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.UCVTF(result, from); code.fp_emitter.UCVTF(result, from);
} }
@ -454,7 +443,6 @@ void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
@ -463,8 +451,7 @@ void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits); code.fp_emitter.UCVTF(result, from, fbits);
} } else {
else {
code.fp_emitter.UCVTF(result, from); code.fp_emitter.UCVTF(result, from);
} }

View File

@ -37,8 +37,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
if constexpr (op == Op::Add) { if constexpr (op == Op::Add) {
code.fp_emitter.SQADD(size, result, result, addend); code.fp_emitter.SQADD(size, result, result, addend);
} } else {
else {
code.fp_emitter.SQSUB(size, result, result, addend); code.fp_emitter.SQSUB(size, result, result, addend);
} }

View File

@ -2,6 +2,8 @@
// Licensed under GPLv2+ // Licensed under GPLv2+
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "a64_emitter.h"
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <cinttypes> #include <cinttypes>
@ -9,11 +11,10 @@
#include <vector> #include <vector>
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include <mcl/bit_cast.hpp>
#include <mcl/bit/bit_count.hpp> #include <mcl/bit/bit_count.hpp>
#include <mcl/bit/bit_field.hpp> #include <mcl/bit/bit_field.hpp>
#include <mcl/bit_cast.hpp>
#include "a64_emitter.h"
#include "dynarmic/common/math_util.h" #include "dynarmic/common/math_util.h"
#ifdef _WIN32 #ifdef _WIN32
@ -70,8 +71,7 @@ bool IsImmArithmetic(uint64_t input, u32* val, bool* shift) {
} }
// For AND/TST/ORR/EOR etc // For AND/TST/ORR/EOR etc
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned int* imm_s, bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned int* imm_s, unsigned int* imm_r) {
unsigned int* imm_r) {
bool negate = false; bool negate = false;
// Logical immediates are encoded using parameters n, imm_s and imm_r using // Logical immediates are encoded using parameters n, imm_s and imm_r using
@ -211,8 +211,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57; int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
// Ensure that the index to the multipliers array is within bounds. // Ensure that the index to the multipliers array is within bounds.
DEBUG_ASSERT((multiplier_idx >= 0) && DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast<size_t>(multiplier_idx) < multipliers.size()));
(static_cast<size_t>(multiplier_idx) < multipliers.size()));
uint64_t multiplier = multipliers[multiplier_idx]; uint64_t multiplier = multipliers[multiplier_idx];
uint64_t candidate = (b - a) * multiplier; uint64_t candidate = (b - a) * multiplier;
@ -284,8 +283,7 @@ bool FPImm8FromFloat(float value, u8* imm_out) {
if ((exponent >> 7) == ((exponent >> 6) & 1)) if ((exponent >> 7) == ((exponent >> 6) & 1))
return false; return false;
const u8 imm8 = static_cast<u8>((sign << 7) | ((!(exponent >> 7)) << 6) | const u8 imm8 = static_cast<u8>((sign << 7) | ((!(exponent >> 7)) << 6) | ((exponent & 3) << 4) | mantissa4);
((exponent & 3) << 4) | mantissa4);
const float new_float = FPImm8ToFloat(imm8); const float new_float = FPImm8ToFloat(imm8);
if (new_float == value) if (new_float == value)
*imm_out = imm8; *imm_out = imm8;
@ -384,7 +382,8 @@ void ARM64XEmitter::FlushIcacheSection(const u8* start, const u8* end) {
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff; static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
size_t isize, dsize; size_t isize, dsize;
__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); __asm__ volatile("mrs %0, ctr_el0"
: "=r"(ctr_el0));
isize = 4 << ((ctr_el0 >> 0) & 0xf); isize = 4 << ((ctr_el0 >> 0) & 0xf);
dsize = 4 << ((ctr_el0 >> 16) & 0xf); dsize = 4 << ((ctr_el0 >> 16) & 0xf);
@ -396,15 +395,30 @@ void ARM64XEmitter::FlushIcacheSection(const u8* start, const u8* end) {
for (; addr < reinterpret_cast<u64>(end); addr += dsize) for (; addr < reinterpret_cast<u64>(end); addr += dsize)
// use "civac" instead of "cvau", as this is the suggested workaround for // use "civac" instead of "cvau", as this is the suggested workaround for
// Cortex-A53 errata 819472, 826319, 827319 and 824069. // Cortex-A53 errata 819472, 826319, 827319 and 824069.
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory"); __asm__ volatile("dc civac, %0"
__asm__ volatile("dsb ish" : : : "memory"); :
: "r"(addr)
: "memory");
__asm__ volatile("dsb ish"
:
:
: "memory");
addr = reinterpret_cast<u64>(start) & ~static_cast<u64>(isize - 1); addr = reinterpret_cast<u64>(start) & ~static_cast<u64>(isize - 1);
for (; addr < reinterpret_cast<u64>(end); addr += isize) for (; addr < reinterpret_cast<u64>(end); addr += isize)
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory"); __asm__ volatile("ic ivau, %0"
:
: "r"(addr)
: "memory");
__asm__ volatile("dsb ish" : : : "memory"); __asm__ volatile("dsb ish"
__asm__ volatile("isb" : : : "memory"); :
:
: "memory");
__asm__ volatile("isb"
:
:
: "memory");
#endif #endif
} }
@ -535,8 +549,7 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr
"{}: Received too large distance: {:X}", __func__, distance); "{}: Received too large distance: {:X}", __func__, distance);
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | ((static_cast<u32>(distance) << 5) & 0xFFFFE0) | Rt);
((static_cast<u32>(distance) << 5) & 0xFFFFE0) | Rt);
} }
void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr) { void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr) {
@ -552,8 +565,7 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi
"{}: Received too large distance: {:X}", __func__, distance); "{}: Received too large distance: {:X}", __func__, distance);
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | (bits << 19) | Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | (bits << 19) | ((static_cast<u32>(distance) << 5) & 0x7FFE0) | Rt);
((static_cast<u32>(distance) << 5) & 0x7FFE0) | Rt);
} }
void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) { void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) {
@ -579,35 +591,29 @@ void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm) {
ASSERT_MSG(!(imm & ~0xFFFF), "{}: Exception instruction too large immediate: {}", __func__, ASSERT_MSG(!(imm & ~0xFFFF), "{}: Exception instruction too large immediate: {}", __func__,
imm); imm);
Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]);
ExcEnc[instenc][2]);
} }
void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt) { void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt) {
Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt); Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt);
} }
void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
ARM64Reg Rm, ArithOption Option) {
bool b64Bit = Is64Bit(Rd); bool b64Bit = Is64Bit(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | (Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd);
(Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) |
Option.GetData() | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rm) {
bool b64Bit = Is64Bit(Rd); bool b64Bit = Is64Bit(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0xD0 << 21) | (Rm << 16) | (Rn << 5) | Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd);
Rd);
} }
void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) { void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) {
@ -617,31 +623,26 @@ void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 n
ASSERT_MSG(!(nzcv & ~0xF), "{}: Flags out of range: {}", __func__, nzcv); ASSERT_MSG(!(nzcv & ~0xF), "{}: Flags out of range: {}", __func__, nzcv);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (imm << 16) | (cond << 12) | Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv);
(1 << 11) | (Rn << 5) | nzcv);
} }
void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) {
CCFlags cond) {
bool b64Bit = Is64Bit(Rm); bool b64Bit = Is64Bit(Rm);
ASSERT_MSG(!(nzcv & ~0xF), "{}: Flags out of range: {}", __func__, nzcv); ASSERT_MSG(!(nzcv & ~0xF), "{}: Flags out of range: {}", __func__, nzcv);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (Rm << 16) | (cond << 12) | Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (Rm << 16) | (cond << 12) | (Rn << 5) | nzcv);
(Rn << 5) | nzcv);
} }
void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) {
CCFlags cond) {
bool b64Bit = Is64Bit(Rd); bool b64Bit = Is64Bit(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | (0xD4 << 21) | (Rm << 16) | Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | (0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | (Rn << 5) | Rd);
(cond << 12) | (CondSelectEnc[instenc][1] << 10) | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) { void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) {
@ -649,8 +650,7 @@ void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) {
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (0x2D6 << 21) | (Data1SrcEnc[instenc][0] << 16) | Write32((b64Bit << 31) | (0x2D6 << 21) | (Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | (Rn << 5) | Rd);
(Data1SrcEnc[instenc][1] << 10) | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
@ -659,31 +659,26 @@ void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, AR
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (0x0D6 << 21) | (Rm << 16) | (Data2SrcEnc[instenc] << 10) | (Rn << 5) | Write32((b64Bit << 31) | (0x0D6 << 21) | (Rm << 16) | (Data2SrcEnc[instenc] << 10) | (Rn << 5) | Rd);
Rd);
} }
void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
ARM64Reg Ra) {
bool b64Bit = Is64Bit(Rd); bool b64Bit = Is64Bit(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Ra = DecodeReg(Ra); Ra = DecodeReg(Ra);
Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | (Rm << 16) | Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | (Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | (Ra << 10) | (Rn << 5) | Rd);
(Data3SrcEnc[instenc][1] << 15) | (Ra << 10) | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
ArithOption Shift) {
bool b64Bit = Is64Bit(Rd); bool b64Bit = Is64Bit(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | (LogicalEnc[instenc][1] << 21) | Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);
(LogicalEnc[instenc][1] << 21) | Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, s32 imm) { void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, s32 imm) {
@ -698,20 +693,15 @@ void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, s32 imm) {
Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (MaskImm19(imm) << 5) | Rt); Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (MaskImm19(imm) << 5) | Rt);
} }
void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt) {
ARM64Reg Rt) {
Rs = DecodeReg(Rs); Rs = DecodeReg(Rs);
Rt2 = DecodeReg(Rt2); Rt2 = DecodeReg(Rt2);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | (LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | (LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
(LoadStoreExcEnc[instenc][1] << 23) | (LoadStoreExcEnc[instenc][2] << 22) |
(LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | (LoadStoreExcEnc[instenc][4] << 15) |
(Rt2 << 10) | (Rn << 5) | Rt);
} }
void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) {
u32 imm) {
bool b64Bit = Is64Bit(Rt); bool b64Bit = Is64Bit(Rt);
bool b128Bit = IsQuad(Rt); bool b128Bit = IsQuad(Rt);
bool bVec = IsVector(Rt); bool bVec = IsVector(Rt);
@ -749,8 +739,7 @@ void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt);
Rt);
} }
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size) { void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size) {
@ -786,36 +775,30 @@ void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | (immr << 16) | Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd);
(imms << 10) | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
ArithOption Rm) {
ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "Shifted registers are not supported used Indexed registers"); ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "Shifted registers are not supported used Indexed registers");
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg()); ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | Rm.GetData() | Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
(1 << 11) | (Rn << 5) | Rt);
} }
void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd) {
ARM64Reg Rd) {
bool b64Bit = Is64Bit(Rd); bool b64Bit = Is64Bit(Rd);
ASSERT_MSG(!(imm & ~0xFFF), "{}: immediate too large: {:X}", __func__, imm); ASSERT_MSG(!(imm & ~0xFFF), "{}: immediate too large: {:X}", __func__, imm);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | (imm << 10) | (Rn << 5) | Rd);
(imm << 10) | (Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n) {
int n) {
// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit. // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
// Use Rn to determine bitness here. // Use Rn to determine bitness here.
bool b64Bit = Is64Bit(Rn); bool b64Bit = Is64Bit(Rn);
@ -823,12 +806,10 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) | Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
ARM64Reg Rn, s32 imm) {
bool b64Bit = Is64Bit(Rt); bool b64Bit = Is64Bit(Rt);
u32 type_encode = 0; u32 type_encode = 0;
@ -858,8 +839,7 @@ void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64R
Rt2 = DecodeReg(Rt2); Rt2 = DecodeReg(Rt2);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) | Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
(Rt2 << 10) | (Rn << 5) | Rt);
} }
void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm) { void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm) {
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
@ -877,7 +857,7 @@ void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64
// FixupBranch branching // FixupBranch branching
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) { void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) {
if(!target) if (!target)
target = m_code; target = m_code;
bool Not = false; bool Not = false;
u32 inst = 0; u32 inst = 0;
@ -909,8 +889,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) {
ASSERT_MSG(IsInRangeImm14(distance), "{}({}): Received too large distance: {:X}", ASSERT_MSG(IsInRangeImm14(distance), "{}({}): Received too large distance: {:X}",
__func__, branch.type, distance); __func__, branch.type, distance);
ARM64Reg reg = DecodeReg(branch.reg); ARM64Reg reg = DecodeReg(branch.reg);
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
} break; } break;
case 5: // B (unconditional) case 5: // B (unconditional)
ASSERT_MSG(IsInRangeImm26(distance), "{}({}): Received too large distance: {:X}", ASSERT_MSG(IsInRangeImm26(distance), "{}({}): Received too large distance: {:X}",
@ -1778,8 +1757,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) {
return; return;
} }
if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) || if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) || (!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max())) {
(!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max())) {
// Max unsigned value (or if signed, -1) // Max unsigned value (or if signed, -1)
// Set to ~ZR // Set to ~ZR
ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP; ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;
@ -1943,8 +1921,7 @@ void ARM64XEmitter::ABI_PopRegisters(u32 registers) {
} }
// Float Emitter // Float Emitter
void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
ARM64Reg Rn, s32 imm) {
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
u32 encoded_size = 0; u32 encoded_size = 0;
@ -1986,35 +1963,29 @@ void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type,
encoded_imm |= 3; encoded_imm |= 3;
} }
Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
(size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rn, ARM64Reg Rm) {
ASSERT_MSG(!IsQuad(Rd), "{} only supports double and single registers!", __func__); ASSERT_MSG(!IsQuad(Rd), "{} only supports double and single registers!", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (Rm << 16) | Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (Rm << 16) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rm) {
ASSERT_MSG(!IsSingle(Rd), "{} doesn't support singles!", __func__); ASSERT_MSG(!IsSingle(Rd), "{} doesn't support singles!", __func__);
bool quad = IsQuad(Rd); bool quad = IsQuad(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (Rm << 16) | Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
(opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rm) {
ASSERT_MSG(!IsQuad(Rd), "{} doesn't support quads!", __func__); ASSERT_MSG(!IsQuad(Rd), "{} doesn't support quads!", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
@ -2038,74 +2009,60 @@ void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64R
break; break;
} }
Write32((U << 29) | (0b1011110001 << 21) | (esize << 22) | (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
Write32((U << 29) | (0b1011110001 << 21) | (esize << 22) | (Rm << 16) |
(opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn) { void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn) {
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | (1 << 10) | Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | (1 << 10) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
ARM64Reg Rn) {
ASSERT_MSG(!IsSingle(Rd), "{} doesn't support singles!", __func__); ASSERT_MSG(!IsSingle(Rd), "{} doesn't support singles!", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((Q << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) | Write32((Q << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn) {
ARM64Reg Rt, ARM64Reg Rn) {
ASSERT_MSG(!IsSingle(Rt), "{} doesn't support singles!", __func__); ASSERT_MSG(!IsSingle(Rt), "{} doesn't support singles!", __func__);
bool quad = IsQuad(Rt); bool quad = IsQuad(Rt);
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | (S << 12) | Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt);
(size << 10) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT_MSG(!IsSingle(Rt), "{} doesn't support singles!", __func__); ASSERT_MSG(!IsSingle(Rt), "{} doesn't support singles!", __func__);
bool quad = IsQuad(Rt); bool quad = IsQuad(Rt);
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | (opcode << 13) | Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | (opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt);
(S << 12) | (size << 10) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
ARM64Reg Rn) {
ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__); ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
ARM64Reg Rd, ARM64Reg Rn) {
ASSERT_MSG(Rn <= SP, "{} only supports GPR as source!", __func__); ASSERT_MSG(Rn <= SP, "{} only supports GPR as source!", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | (opcode << 16) | Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign) {
bool sign) {
DEBUG_ASSERT_MSG(IsScalar(Rn), "fcvts: Rn must be floating point"); DEBUG_ASSERT_MSG(IsScalar(Rn), "fcvts: Rn must be floating point");
if (IsGPR(Rd)) { if (IsGPR(Rd)) {
// Use the encoding that transfers the result to a GPR. // Use the encoding that transfers the result to a GPR.
@ -2160,8 +2117,7 @@ void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, Roundin
sz |= 2; sz |= 2;
break; break;
} }
Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
} }
@ -2173,13 +2129,11 @@ void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
EmitConvertScalarToInt(Rd, Rn, round, true); EmitConvertScalarToInt(Rd, Rn, round, true);
} }
void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn) {
u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn) {
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
(rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm) { void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm) {
@ -2189,12 +2143,10 @@ void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Re
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (op << 14) | Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (op << 14) | (1 << 13) | (Rn << 5) | opcode2);
(1 << 13) | (Rn << 5) | opcode2);
} }
void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rm) {
ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__); ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__);
bool is_double = IsDouble(Rd); bool is_double = IsDouble(Rd);
@ -2202,8 +2154,7 @@ void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (cond << 12) | Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (cond << 12) | (3 << 10) | (Rn << 5) | Rd);
(3 << 10) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
@ -2223,8 +2174,7 @@ void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn,
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | (1 << 11) | Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | (1 << 11) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8) { void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8) {
@ -2234,32 +2184,26 @@ void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64R
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | (imm8 << 13) | Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | (imm8 << 13) | (1 << 12) | (imm5 << 5) | Rd);
(1 << 12) | (imm5 << 5) | Rd);
} }
void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
ARM64Reg Rn) {
ASSERT_MSG(immh, "{} bad encoding! Can't have zero immh", __func__); ASSERT_MSG(immh, "{} bad encoding! Can't have zero immh", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | (opcode << 11) | Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
(1 << 10) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
ARM64Reg Rn) {
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
(1 << 10) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn) {
ARM64Reg Rn) {
bool quad = IsQuad(Rt); bool quad = IsQuad(Rt);
u32 encoded_size = 0; u32 encoded_size = 0;
@ -2273,12 +2217,10 @@ void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opc
Rt = DecodeReg(Rt); Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | (encoded_size << 10) | Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | (encoded_size << 10) | (Rn << 5) | Rt);
(Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
bool quad = IsQuad(Rt); bool quad = IsQuad(Rt);
u32 encoded_size = 0; u32 encoded_size = 0;
@ -2293,31 +2235,26 @@ void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | (encoded_size << 10) | (Rn << 5) | Rt);
(encoded_size << 10) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
ARM64Reg Rn) {
ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__); ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | (Rn << 5) | Rd);
(Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
bool quad = IsQuad(Rd); bool quad = IsQuad(Rd);
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm); Rm = DecodeReg(Rm);
Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | (Rm << 16) | Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | (Rm << 16) | (opcode << 12) | (H << 11) | (Rn << 5) | Rd);
(opcode << 12) | (H << 11) | (Rn << 5) | Rd);
} }
void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
@ -2328,8 +2265,7 @@ void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM
Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt); Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
u32 type_encode = 0; u32 type_encode = 0;
u32 opc = 0; u32 opc = 0;
@ -2366,12 +2302,10 @@ void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type,
Rt2 = DecodeReg(Rt2); Rt2 = DecodeReg(Rt2);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) | Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
ArithOption Rm) {
ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG,
"{} must contain an extended reg as Rm!", __func__); "{} must contain an extended reg as Rm!", __func__);
@ -2402,8 +2336,7 @@ void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg()); ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) | Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) | Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) { void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) {
@ -2416,8 +2349,7 @@ void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd
} v; } v;
v.hex = abcdefgh; v.hex = abcdefgh;
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.in.abc << 16) | (cmode << 12) | (o2 << 11) | Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.in.abc << 16) | (cmode << 12) | (o2 << 11) | (1 << 10) | (v.in.defgh << 5) | Rd);
(1 << 10) | (v.in.defgh << 5) | Rd);
} }
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
@ -2675,8 +2607,7 @@ void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) {
opcode = 0b0010; opcode = 0b0010;
EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn); EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
} }
void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rm) {
ASSERT_MSG(!(count == 0 || count > 4), "{} must have a count of 1 to 4 registers!", __func__); ASSERT_MSG(!(count == 0 || count > 4), "{} must have a count of 1 to 4 registers!", __func__);
ASSERT_MSG(type == INDEX_POST, "{} only supports post indexing!", __func__); ASSERT_MSG(type == INDEX_POST, "{} only supports post indexing!", __func__);
@ -2704,8 +2635,7 @@ void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) {
opcode = 0b0010; opcode = 0b0010;
EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn); EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
} }
void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
ARM64Reg Rm) {
ASSERT_MSG(!(count == 0 || count > 4), "{} must have a count of 1 to 4 registers!", __func__); ASSERT_MSG(!(count == 0 || count > 4), "{} must have a count of 1 to 4 registers!", __func__);
ASSERT_MSG(type == INDEX_POST, "{} only supports post indexing!", __func__); ASSERT_MSG(type == INDEX_POST, "{} only supports post indexing!", __func__);
@ -2756,12 +2686,10 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top) {
} }
// Loadstore paired // Loadstore paired
void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
s32 imm) {
EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm); EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
} }
void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
s32 imm) {
EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm); EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
} }
@ -2825,8 +2753,7 @@ void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg R
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3); EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
} }
void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
ARM64Reg Ra, int opcode) {
int type = isDouble ? 1 : 0; int type = isDouble ? 1 : 0;
Rd = DecodeReg(Rd); Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn); Rn = DecodeReg(Rn);
@ -2834,8 +2761,7 @@ void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg R
Ra = DecodeReg(Ra); Ra = DecodeReg(Ra);
int o1 = opcode >> 1; int o1 = opcode >> 1;
int o0 = opcode & 1; int o0 = opcode & 1;
m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | (Ra << 10) | (Rn << 5) | Rd);
(Ra << 10) | (Rn << 5) | Rd);
} }
// Scalar three same // Scalar three same
@ -3706,8 +3632,7 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
} }
} }
void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, bool flags) {
bool flags) {
switch ((negative << 1) | static_cast<unsigned int>(flags)) { switch ((negative << 1) | static_cast<unsigned int>(flags)) {
case 0: case 0:
ADD(Rd, Rn, static_cast<u32>(imm), shift); ADD(Rd, Rn, static_cast<u32>(imm), shift);
@ -3724,8 +3649,7 @@ void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift,
} }
} }
void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch) {
ARM64Reg scratch) {
bool has_scratch = scratch != INVALID_REG; bool has_scratch = scratch != INVALID_REG;
u64 imm_neg = Is64Bit(Rd) ? ~imm + 1 : (~imm + 1) & 0xFFFFFFFFuLL; u64 imm_neg = Is64Bit(Rd) ? ~imm + 1 : (~imm + 1) & 0xFFFFFFFFuLL;
bool neg_neg = negative ? false : true; bool neg_neg = negative ? false : true;

View File

@ -265,7 +265,11 @@ constexpr ARM64Reg EncodeRegToQuad(ARM64Reg reg) {
return static_cast<ARM64Reg>(reg | 0xC0); return static_cast<ARM64Reg>(reg | 0xC0);
} }
enum OpType { TYPE_IMM = 0, TYPE_REG, TYPE_IMMSREG, TYPE_RSR, TYPE_MEM }; enum OpType { TYPE_IMM = 0,
TYPE_REG,
TYPE_IMMSREG,
TYPE_RSR,
TYPE_MEM };
enum ShiftType { enum ShiftType {
ST_LSL = 0, ST_LSL = 0,
@ -474,8 +478,7 @@ private:
void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn); void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn);
void EncodeExceptionInst(u32 instenc, u32 imm); void EncodeExceptionInst(u32 instenc, u32 imm);
void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt); void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt);
void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
ArithOption Option);
void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
@ -494,8 +497,7 @@ private:
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n); void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
ARM64Reg Rn, s32 imm);
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm); void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
@ -503,7 +505,8 @@ protected:
void Write32(u32 value); void Write32(u32 value);
public: public:
ARM64XEmitter() : m_code(nullptr), m_lastCacheFlushEnd(nullptr) { ARM64XEmitter()
: m_code(nullptr), m_lastCacheFlushEnd(nullptr) {
} }
ARM64XEmitter(u8* code_ptr) { ARM64XEmitter(u8* code_ptr) {
@ -831,7 +834,7 @@ public:
// Wrapper around MOVZ+MOVK // Wrapper around MOVZ+MOVK
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true); void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
bool MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2); bool MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2);
template <class P> template<class P>
void MOVP2R(ARM64Reg Rd, P* ptr) { void MOVP2R(ARM64Reg Rd, P* ptr) {
ASSERT_MSG(Is64Bit(Rd), "Can't store pointers in 32-bit registers"); ASSERT_MSG(Is64Bit(Rd), "Can't store pointers in 32-bit registers");
MOVI2R(Rd, (uintptr_t)ptr); MOVI2R(Rd, (uintptr_t)ptr);
@ -848,8 +851,7 @@ public:
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch);
ARM64Reg scratch);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
@ -872,14 +874,14 @@ public:
// Unfortunately, calling operator() directly is undefined behavior in C++ // Unfortunately, calling operator() directly is undefined behavior in C++
// (this method might be a thunk in the case of multi-inheritance) so we // (this method might be a thunk in the case of multi-inheritance) so we
// have to go through a trampoline function. // have to go through a trampoline function.
template <typename T, typename... Args> template<typename T, typename... Args>
static T CallLambdaTrampoline(const std::function<T(Args...)>* f, Args... args) { static T CallLambdaTrampoline(const std::function<T(Args...)>* f, Args... args) {
return (*f)(args...); return (*f)(args...);
} }
// This function expects you to have set up the state. // This function expects you to have set up the state.
// Overwrites X0 and X30 // Overwrites X0 and X30
template <typename T, typename... Args> template<typename T, typename... Args>
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f) { ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f) {
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>; auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
MOVI2R(X30, (uintptr_t)trampoline); MOVI2R(X30, (uintptr_t)trampoline);
@ -889,7 +891,7 @@ public:
// Plain function call // Plain function call
void QuickCallFunction(const void* func, ARM64Reg scratchreg = X16); void QuickCallFunction(const void* func, ARM64Reg scratchreg = X16);
template <typename T> template<typename T>
void QuickCallFunction(T func, ARM64Reg scratchreg = X16) { void QuickCallFunction(T func, ARM64Reg scratchreg = X16) {
QuickCallFunction((const void*)func, scratchreg); QuickCallFunction((const void*)func, scratchreg);
} }
@ -897,7 +899,8 @@ public:
class ARM64FloatEmitter { class ARM64FloatEmitter {
public: public:
ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) { ARM64FloatEmitter(ARM64XEmitter* emit)
: m_emit(emit) {
} }
void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
@ -1110,22 +1113,17 @@ private:
} }
// Emitting functions // Emitting functions
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
s32 imm); void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn); void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
ARM64Reg Rn); void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,
ARM64Reg Rn, ARM64Reg Rm);
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
int scale, ARM64Reg Rd, ARM64Reg Rn);
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm); void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -1133,19 +1131,14 @@ private:
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn); void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
ARM64Reg Rm);
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
ARM64Reg Rm);
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign); void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
int opcode); void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
ARM64Reg Rn, s32 imm);
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn,
ArithOption Rm);
void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh); void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper); void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);

View File

@ -8,9 +8,9 @@
#include <vector> #include <vector>
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> # include <windows.h>
#else #else
#include <sys/mman.h> # include <sys/mman.h>
#endif #endif
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
@ -21,7 +21,7 @@ namespace Dynarmic::BackendA64 {
// You get memory management for free, plus, you can use all emitter functions // You get memory management for free, plus, you can use all emitter functions
// without having to prefix them with gen-> or something similar. Example // without having to prefix them with gen-> or something similar. Example
// implementation: class JIT : public CodeBlock<ARMXEmitter> {} // implementation: class JIT : public CodeBlock<ARMXEmitter> {}
template <class T> template<class T>
class CodeBlock : public T { class CodeBlock : public T {
private: private:
// A privately used function to set the executable RAM space to something // A privately used function to set the executable RAM space to something
@ -57,11 +57,11 @@ public:
#if defined(_WIN32) #if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else #else
#if defined(__APPLE__) # if defined(__APPLE__)
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0); void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
#else # else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif # endif
if (ptr == MAP_FAILED) if (ptr == MAP_FAILED)
ptr = nullptr; ptr = nullptr;

View File

@ -7,8 +7,8 @@
#pragma once #pragma once
#include <array> #include <array>
#include <memory>
#include <functional> #include <functional>
#include <memory>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
@ -32,6 +32,7 @@ public:
void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr); void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
bool SupportsFastmem() const; bool SupportsFastmem() const;
private: private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;

View File

@ -8,14 +8,13 @@
// Licensed under GPLv2+ // Licensed under GPLv2+
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <csignal>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include <csignal>
#ifdef __APPLE__ #ifdef __APPLE__
#include <sys/ucontext.h> # include <sys/ucontext.h>
#else #else
#include <ucontext.h> # include <ucontext.h>
#endif #endif
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
@ -117,8 +116,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
"dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n", "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
PC); PC);
struct sigaction* retry_sa = struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) { if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context); retry_sa->sa_sigaction(sig, info, raw_context);
return; return;

View File

@ -18,4 +18,4 @@ Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) {
return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0))); return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0)));
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64

View File

@ -138,32 +138,76 @@ using HostLocList = std::initializer_list<HostLoc>;
// X30 is the link register. // X30 is the link register.
// In order of desireablity based first on ABI // In order of desireablity based first on ABI
constexpr HostLocList any_gpr = { constexpr HostLocList any_gpr = {
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, HostLoc::X19,
HostLoc::X24, HostLoc::X25, HostLoc::X20,
HostLoc::X21,
HostLoc::X22,
HostLoc::X23,
HostLoc::X24,
HostLoc::X25,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, HostLoc::X8,
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17, HostLoc::X9,
HostLoc::X10,
HostLoc::X11,
HostLoc::X12,
HostLoc::X13,
HostLoc::X14,
HostLoc::X15,
HostLoc::X16,
HostLoc::X17,
HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3, HostLoc::X7,
HostLoc::X2, HostLoc::X1, HostLoc::X0, HostLoc::X6,
HostLoc::X5,
HostLoc::X4,
HostLoc::X3,
HostLoc::X2,
HostLoc::X1,
HostLoc::X0,
}; };
constexpr HostLocList any_fpr = { constexpr HostLocList any_fpr = {
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13, HostLoc::Q8,
HostLoc::Q14, HostLoc::Q15, HostLoc::Q9,
HostLoc::Q10,
HostLoc::Q11,
HostLoc::Q12,
HostLoc::Q13,
HostLoc::Q14,
HostLoc::Q15,
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21, HostLoc::Q16,
HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27, HostLoc::Q17,
HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31, HostLoc::Q18,
HostLoc::Q19,
HostLoc::Q20,
HostLoc::Q21,
HostLoc::Q22,
HostLoc::Q23,
HostLoc::Q24,
HostLoc::Q25,
HostLoc::Q26,
HostLoc::Q27,
HostLoc::Q28,
HostLoc::Q29,
HostLoc::Q30,
HostLoc::Q31,
HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2, HostLoc::Q7,
HostLoc::Q1, HostLoc::Q0, HostLoc::Q6,
HostLoc::Q5,
HostLoc::Q4,
HostLoc::Q3,
HostLoc::Q2,
HostLoc::Q1,
HostLoc::Q0,
}; };
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc); Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc); Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc);
template <typename JitStateType> template<typename JitStateType>
size_t SpillToOpArg(HostLoc loc) { size_t SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc)); ASSERT(HostLocIsSpill(loc));

View File

@ -11,7 +11,7 @@
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
struct JitStateInfo { struct JitStateInfo {
template <typename JitStateType> template<typename JitStateType>
JitStateInfo(const JitStateType&) JitStateInfo(const JitStateType&)
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining)) : offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run)) , offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
@ -25,8 +25,7 @@ struct JitStateInfo {
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
, offsetof_halt_reason(offsetof(JitStateType, halt_reason)) , offsetof_halt_reason(offsetof(JitStateType, halt_reason)) {}
{}
const size_t offsetof_cycles_remaining; const size_t offsetof_cycles_remaining;
const size_t offsetof_cycles_to_run; const size_t offsetof_cycles_to_run;

View File

@ -9,14 +9,14 @@
#ifdef __linux__ #ifdef __linux__
#include <cstdio> # include <cstdio>
#include <cstdlib> # include <cstdlib>
#include <mutex> # include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h> # include <fmt/format.h>
#include <mcl/stdint.hpp> # include <mcl/stdint.hpp>
# include <sys/types.h>
# include <unistd.h>
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
@ -71,7 +71,7 @@ void PerfMapClear() {
OpenFile(); OpenFile();
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64
#else #else
@ -83,6 +83,6 @@ void PerfMapRegister(const void*, const void*, const std::string&) {}
void PerfMapClear() {} void PerfMapClear() {}
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64
#endif #endif

View File

@ -24,4 +24,4 @@ void PerfMapRegister(T start, const void* end, const std::string& friendly_name)
void PerfMapClear(); void PerfMapClear();
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendA64

View File

@ -4,6 +4,8 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include "dynarmic/backend/A64/reg_alloc.h"
#include <algorithm> #include <algorithm>
#include <numeric> #include <numeric>
#include <utility> #include <utility>
@ -12,7 +14,6 @@
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include "dynarmic/backend/A64/abi.h" #include "dynarmic/backend/A64/abi.h"
#include "dynarmic/backend/A64/reg_alloc.h"
namespace Dynarmic::BackendA64 { namespace Dynarmic::BackendA64 {
@ -379,16 +380,9 @@ HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
return location; return location;
} }
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0, void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3, std::optional<Argument::copyable_reference> arg4, std::optional<Argument::copyable_reference> arg5, std::optional<Argument::copyable_reference> arg6, std::optional<Argument::copyable_reference> arg7) {
std::optional<Argument::copyable_reference> arg1,
std::optional<Argument::copyable_reference> arg2,
std::optional<Argument::copyable_reference> arg3,
std::optional<Argument::copyable_reference> arg4,
std::optional<Argument::copyable_reference> arg5,
std::optional<Argument::copyable_reference> arg6,
std::optional<Argument::copyable_reference> arg7) {
constexpr size_t args_count = 8; constexpr size_t args_count = 8;
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 }; constexpr std::array<HostLoc, args_count> args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8};
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7};
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() { static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
@ -436,7 +430,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
std::vector<HostLoc> candidates = desired_locations; std::vector<HostLoc> candidates = desired_locations;
// Find all locations that have not been allocated.. // Find all locations that have not been allocated..
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){ const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) {
return !this->LocInfo(loc).IsLocked(); return !this->LocInfo(loc).IsLocked();
}); });
candidates.erase(allocated_locs, candidates.end()); candidates.erase(allocated_locs, candidates.end());
@ -445,7 +439,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
// Selects the best location out of the available locations. // Selects the best location out of the available locations.
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible. // TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
std::partition(candidates.begin(), candidates.end(), [this](auto loc){ std::partition(candidates.begin(), candidates.end(), [this](auto loc) {
return this->LocInfo(loc).IsEmpty(); return this->LocInfo(loc).IsEmpty();
}); });

View File

@ -8,9 +8,9 @@
#include <array> #include <array>
#include <functional> #include <functional>
#include <optional>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <optional>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
@ -84,7 +84,8 @@ public:
private: private:
friend class RegAlloc; friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} explicit Argument(RegAlloc& reg_alloc)
: reg_alloc(reg_alloc) {}
bool allocated = false; bool allocated = false;
RegAlloc& reg_alloc; RegAlloc& reg_alloc;
@ -117,14 +118,7 @@ public:
Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr); Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr);
Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr); Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr);
void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}, std::optional<Argument::copyable_reference> arg2 = {}, std::optional<Argument::copyable_reference> arg3 = {}, std::optional<Argument::copyable_reference> arg4 = {}, std::optional<Argument::copyable_reference> arg5 = {}, std::optional<Argument::copyable_reference> arg6 = {}, std::optional<Argument::copyable_reference> arg7 = {});
std::optional<Argument::copyable_reference> arg1 = {},
std::optional<Argument::copyable_reference> arg2 = {},
std::optional<Argument::copyable_reference> arg3 = {},
std::optional<Argument::copyable_reference> arg4 = {},
std::optional<Argument::copyable_reference> arg5 = {},
std::optional<Argument::copyable_reference> arg6 = {},
std::optional<Argument::copyable_reference> arg7 = {});
// TODO: Values in host flags // TODO: Values in host flags

View File

@ -44,8 +44,8 @@ u8 RecipEstimate(u64 a);
*/ */
u8 RecipSqrtEstimate(u64 a); u8 RecipSqrtEstimate(u64 a);
template <typename T> template<typename T>
constexpr bool IsPow2(T imm){ constexpr bool IsPow2(T imm) {
return imm > 0 && (imm & (imm - 1)) == 0; return imm > 0 && (imm & (imm - 1)) == 0;
} }