diff --git a/src/dynarmic/backend/A64/a32_emit_a64.cpp b/src/dynarmic/backend/A64/a32_emit_a64.cpp index c71ff418..e47d531b 100644 --- a/src/dynarmic/backend/A64/a32_emit_a64.cpp +++ b/src/dynarmic/backend/A64/a32_emit_a64.cpp @@ -4,21 +4,21 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/a32_emit_a64.h" + #include #include #include #include +#include #include #include #include #include -#include #include +#include -#include - -#include "dynarmic/backend/A64/a32_emit_a64.h" #include "dynarmic/backend/A64/a32_jitstate.h" #include "dynarmic/backend/A64/abi.h" #include "dynarmic/backend/A64/block_of_code.h" @@ -41,7 +41,7 @@ namespace Dynarmic::BackendA64 { // Note that unlike the x64 backend these only returns ONLY the offset to register and not the address! static size_t MJitStateReg(A32::Reg reg) { - return offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg); + return offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg); } static size_t MJitStateExtReg(A32::ExtReg reg) { @@ -56,7 +56,8 @@ static size_t MJitStateExtReg(A32::ExtReg reg) { ASSERT_FALSE("Should never happen."); } -A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block) : EmitContext(reg_alloc, block) {} +A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block) + : EmitContext(reg_alloc, block) {} A32::LocationDescriptor A32EmitContext::Location() const { return A32::LocationDescriptor{block.Location()}; @@ -91,8 +92,8 @@ std::ptrdiff_t A32EmitContext::GetInstOffset(IR::Inst* inst) const { } A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) - : EmitA64(code), config(std::move(config)), jit_interface(jit_interface) { - exception_handler.Register(code, [this](CodePtr PC){FastmemCallback(PC);}); + : EmitA64(code), config(std::move(config)), jit_interface(jit_interface) { + exception_handler.Register(code, [this](CodePtr PC) { FastmemCallback(PC); }); GenMemoryAccessors(); GenTerminalHandlers(); code.PreludeComplete(); @@ -109,7 +110,7 @@ A32EmitA64::BlockDescriptor A32EmitA64::Emit(IR::Block& block) { }; RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg}; - A32EmitContext ctx{reg_alloc, block}; + A32EmitContext ctx{reg_alloc, block}; const u8* entrypoint = code.AlignCode16(); @@ -121,15 +122,14 @@ A32EmitA64::BlockDescriptor A32EmitA64::Emit(IR::Block& block) { // Call the relevant Emit* member function. switch (inst->GetOpcode()) { - -#define OPCODE(name, type, ...) \ - case IR::Opcode::name: \ - A32EmitA64::Emit##name(ctx, inst); \ - break; -#define A32OPC(name, type, ...) \ - case IR::Opcode::A32##name: \ - A32EmitA64::EmitA32##name(ctx, inst); \ - break; +#define OPCODE(name, type, ...) \ + case IR::Opcode::name: \ + A32EmitA64::Emit##name(ctx, inst); \ + break; +#define A32OPC(name, type, ...) \ + case IR::Opcode::A32##name: \ + A32EmitA64::EmitA32##name(ctx, inst); \ + break; #define A64OPC(...) #include "dynarmic/backend/A64/opcodes.inc" #undef OPCODE @@ -184,7 +184,7 @@ void A32EmitA64::EmitCondPrelude(const A32EmitContext& ctx) { FixupBranch pass = EmitCond(ctx.block.GetCondition()); EmitAddCycles(ctx.block.ConditionFailedCycleCount()); - EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.block.Location(), ctx.IsSingleStep()); + EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.block.Location(), ctx.IsSingleStep()); code.SetJumpTarget(pass); } @@ -343,7 +343,7 @@ void A32EmitA64::GenTerminalHandlers() { code.BR(code.ABI_SCRATCH1); code.SetJumpTarget(fast_dispatch_cache_miss); - code.STR(INDEX_UNSIGNED, location_descriptor_reg, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, location_descriptor) ); + code.STR(INDEX_UNSIGNED, location_descriptor_reg, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, location_descriptor)); code.LookupBlock(); code.STR(INDEX_UNSIGNED, code.ABI_RETURN, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, code_ptr)); code.BR(code.ABI_RETURN); @@ -359,7 +359,6 @@ void A32EmitA64::GenTerminalHandlers() { } } - void A32EmitA64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { A32::Reg reg = inst->GetArg(0).GetA32RegRef(); @@ -418,8 +417,7 @@ void A32EmitA64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins if (args[1].IsInFpr()) { ARM64Reg to_store = ctx.reg_alloc.UseFpr(args[1]); code.fp_emitter.STR(64, INDEX_UNSIGNED, to_store, X28, MJitStateExtReg(reg)); - } - else { + } else { ARM64Reg to_store = ctx.reg_alloc.UseGpr(args[1]); code.STR(INDEX_UNSIGNED, to_store, X28, MJitStateExtReg(reg)); } @@ -442,18 +440,18 @@ static void SetCpsrImpl(u32 value, A32JitState* jit_state) { void A32EmitA64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - + // TODO:Inline ctx.reg_alloc.HostCall(nullptr, args[0]); // Use an unused HostCall register ARM64Reg host_fpsr = X9; - + if (config.always_little_endian) { code.ANDI2R(code.ABI_PARAM1, code.ABI_PARAM1, 0xFFFFFDFF, ctx.reg_alloc.ScratchGpr()); } - // Since this is one of the only places where the ~sticky~ + // Since this is one of the only places where the ~sticky~ // guest's Q flag can be cleared it is also a great place to clear the host's Q flag code.MRS(host_fpsr, FIELD_FPSR); code.ANDI2R(host_fpsr, host_fpsr, ~(1 << 27)); @@ -496,7 +494,7 @@ void A32EmitA64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, cpsr_nzcv)); } - // Since this is one of the only places where the ~sticky~ + // Since this is one of the only places where the ~sticky~ // guest's Q flag can be cleared it is also a great place to clear the host's Q flag. // TODO : possibly a better job at explaining. code.MRS(host_fpsr, FIELD_FPSR); @@ -676,7 +674,7 @@ void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { // cpsr.T = false; // } // We rely on the fact we disallow EFlag from changing within a block. - + if (arg.IsImmediate()) { const ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr()); u32 new_pc = arg.GetImmediateU32(); @@ -697,7 +695,7 @@ void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { code.ADD(new_upper, new_upper, mask); code.STR(INDEX_UNSIGNED, new_upper, X28, offsetof(A32JitState, upper_location_descriptor)); code.LSL(mask, mask, 1); - code.SUBI2R(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code.SUBI2R(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code.AND(new_pc, new_pc, mask); code.STR(INDEX_UNSIGNED, new_pc, X28, MJitStateReg(A32::Reg::PC)); } @@ -813,7 +811,7 @@ void A32EmitA64::DoNotFastmem(const DoNotFastmemMarker& marker) { InvalidateBasicBlocks({std::get<0>(marker)}); } -template +template void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn) { constexpr size_t bit_size = mcl::bitsizeof; auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -836,21 +834,21 @@ void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr c FixupBranch abort = code.CBZ(result); code.ANDI2R(vaddr, vaddr, 4095); switch (bit_size) { - case 8: - code.LDRB(DecodeReg(result), result, vaddr); - break; - case 16: - code.LDRH(DecodeReg(result), result, vaddr); - break; - case 32: - code.LDR(DecodeReg(result), result, vaddr); - break; - case 64: - code.LDR(result, result, vaddr); - break; - default: - ASSERT_FALSE("Invalid bit_size"); - break; + case 8: + code.LDRB(DecodeReg(result), result, vaddr); + break; + case 16: + code.LDRH(DecodeReg(result), result, vaddr); + break; + case 32: + code.LDR(DecodeReg(result), result, vaddr); + break; + case 64: + code.LDR(result, result, vaddr); + break; + default: + ASSERT_FALSE("Invalid bit_size"); + break; } end = code.B(); code.SetJumpTarget(abort); @@ -858,54 +856,52 @@ void A32EmitA64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr c code.MOV(result, code.ABI_RETURN); }; - if (ShouldFastmem(do_not_fastmem_marker)) { const CodePtr patch_location = code.GetCodePtr(); switch (bit_size) { - case 8: - code.LDRB(DecodeReg(result), X27, vaddr); - break; - case 16: - code.LDRH(DecodeReg(result), X27, vaddr); - break; - case 32: - code.LDR(DecodeReg(result), X27, vaddr); - break; - case 64: - code.LDR(result, X27, vaddr); - break; - default: - ASSERT_FALSE("Invalid bit_size"); - break; + case 8: + code.LDRB(DecodeReg(result), X27, vaddr); + break; + case 16: + code.LDRH(DecodeReg(result), X27, vaddr); + break; + case 32: + code.LDR(DecodeReg(result), X27, vaddr); + break; + case 64: + code.LDR(result, X27, vaddr); + break; + default: + ASSERT_FALSE("Invalid bit_size"); + break; } fastmem_patch_info.emplace( - patch_location, - FastmemPatchInfo{ - [this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker]{ - CodePtr save_code_ptr = code.GetCodePtr(); - code.SetCodePtr(patch_location); - FixupBranch thunk = code.B(); - u8* end_ptr = code.GetWritableCodePtr(); - code.FlushIcacheSection(reinterpret_cast(patch_location), end_ptr); - code.SetCodePtr(save_code_ptr); - code.SwitchToFarCode(); - code.SetJumpTarget(thunk); - if (config.page_table) { - FixupBranch end{}; - page_table_lookup(end); - code.SetJumpTarget(end, end_ptr); - } else { - code.BL(callback_fn); - code.MOV(result, code.ABI_RETURN); - } - code.B(end_ptr); - code.FlushIcache(); - code.SwitchToNearCode(); + patch_location, + FastmemPatchInfo{ + [this, patch_location, page_table_lookup, callback_fn, result, do_not_fastmem_marker] { + CodePtr save_code_ptr = code.GetCodePtr(); + code.SetCodePtr(patch_location); + FixupBranch thunk = code.B(); + u8* end_ptr = code.GetWritableCodePtr(); + code.FlushIcacheSection(reinterpret_cast(patch_location), end_ptr); + code.SetCodePtr(save_code_ptr); + code.SwitchToFarCode(); + code.SetJumpTarget(thunk); + if (config.page_table) { + FixupBranch end{}; + page_table_lookup(end); + code.SetJumpTarget(end, end_ptr); + } else { + code.BL(callback_fn); + code.MOV(result, code.ABI_RETURN); + } + code.B(end_ptr); + code.FlushIcache(); + code.SwitchToNearCode(); - DoNotFastmem(do_not_fastmem_marker); - } - }); + DoNotFastmem(do_not_fastmem_marker); + }}); ctx.reg_alloc.DefineValue(inst, result); return; @@ -950,21 +946,22 @@ void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr FixupBranch abort = code.CBZ(addr); code.ANDI2R(vaddr, vaddr, 4095); switch (bit_size) { - case 8: - code.STRB(DecodeReg(value), addr, vaddr); - break; - case 16: - code.STRH(DecodeReg(value), addr, vaddr); - break; - case 32: - code.STR(DecodeReg(value), addr, vaddr);; - break; - case 64: - code.STR(value, addr, vaddr); - break; - default: - ASSERT_FALSE("Invalid bit_size"); - break; + case 8: + code.STRB(DecodeReg(value), addr, vaddr); + break; + case 16: + code.STRH(DecodeReg(value), addr, vaddr); + break; + case 32: + code.STR(DecodeReg(value), addr, vaddr); + ; + break; + case 64: + code.STR(value, addr, vaddr); + break; + default: + ASSERT_FALSE("Invalid bit_size"); + break; } end = code.B(); code.SetJumpTarget(abort); @@ -974,49 +971,48 @@ void A32EmitA64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr if (ShouldFastmem(do_not_fastmem_marker)) { const CodePtr patch_location = code.GetCodePtr(); switch (bit_size) { - case 8: - code.STRB(DecodeReg(value), X27, vaddr); - break; - case 16: - code.STRH(DecodeReg(value), X27, vaddr); - break; - case 32: - code.STR(DecodeReg(value), X27, vaddr); - break; - case 64: - code.STR(value, X27, vaddr); - break; - default: - ASSERT_FALSE("Invalid bit_size"); - break; + case 8: + code.STRB(DecodeReg(value), X27, vaddr); + break; + case 16: + code.STRH(DecodeReg(value), X27, vaddr); + break; + case 32: + code.STR(DecodeReg(value), X27, vaddr); + break; + case 64: + code.STR(value, X27, vaddr); + break; + default: + ASSERT_FALSE("Invalid bit_size"); + break; } fastmem_patch_info.emplace( - patch_location, - FastmemPatchInfo{ - [this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker]{ - CodePtr save_code_ptr = code.GetCodePtr(); - code.SetCodePtr(patch_location); - FixupBranch thunk = code.B(); - u8* end_ptr = code.GetWritableCodePtr(); - code.FlushIcacheSection(reinterpret_cast(patch_location), end_ptr); - code.SetCodePtr(save_code_ptr); - code.SwitchToFarCode(); - code.SetJumpTarget(thunk); - if (config.page_table) { - FixupBranch end{}; - page_table_lookup(end); - code.SetJumpTarget(end, end_ptr); - } else { - code.BL(callback_fn); - } - code.B(end_ptr); - code.FlushIcache(); - code.SwitchToNearCode(); + patch_location, + FastmemPatchInfo{ + [this, patch_location, page_table_lookup, callback_fn, do_not_fastmem_marker] { + CodePtr save_code_ptr = code.GetCodePtr(); + code.SetCodePtr(patch_location); + FixupBranch thunk = code.B(); + u8* end_ptr = code.GetWritableCodePtr(); + code.FlushIcacheSection(reinterpret_cast(patch_location), end_ptr); + code.SetCodePtr(save_code_ptr); + code.SwitchToFarCode(); + code.SetJumpTarget(thunk); + if (config.page_table) { + FixupBranch end{}; + page_table_lookup(end); + code.SetJumpTarget(end, end_ptr); + } else { + code.BL(callback_fn); + } + code.B(end_ptr); + code.FlushIcache(); + code.SwitchToNearCode(); - DoNotFastmem(do_not_fastmem_marker); - } - }); + DoNotFastmem(do_not_fastmem_marker); + }}); return; } @@ -1062,7 +1058,7 @@ void A32EmitA64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { WriteMemory(ctx, inst, write_memory_64); } -template +template static void ExclusiveWrite(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config) { auto args = reg_alloc.GetArgumentInfo(inst); reg_alloc.HostCall(nullptr, {}, args[0], args[1]); @@ -1086,7 +1082,7 @@ static void ExclusiveWrite(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* ins code.MOVI2R(passed, 0); for (FixupBranch e : end) { - code.SetJumpTarget(e); + code.SetJumpTarget(e); } reg_alloc.DefineValue(inst, passed); @@ -1112,8 +1108,7 @@ static void EmitCoprocessorException() { ASSERT_FALSE("Should raise coproc exception here"); } -static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, - IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { +static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { reg_alloc.HostCall(inst, {}, {}, arg0, arg1); code.MOVP2R(code.ABI_PARAM1, jit_interface); @@ -1306,7 +1301,7 @@ void A32EmitA64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) { code.LDR(INDEX_UNSIGNED, DecodeReg(reg_result), reg_tmp, 0); code.MOVP2R(reg_tmp, source_ptrs[0]); code.LDR(INDEX_UNSIGNED, DecodeReg(reg_tmp), reg_tmp, 0); - code.ORR(reg_result, reg_tmp, reg_result, ArithOption{ reg_result , ST_LSL, 32}); + code.ORR(reg_result, reg_tmp, reg_result, ArithOption{reg_result, ST_LSL, 32}); ctx.reg_alloc.DefineValue(inst, reg_result); @@ -1331,7 +1326,6 @@ void A32EmitA64::EmitA32CoprocLoadWords(A32EmitContext& ctx, IR::Inst* inst) { option = coproc_info[5]; } - std::shared_ptr coproc = config.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -1376,7 +1370,6 @@ void A32EmitA64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) { CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); } - std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const { const A32::LocationDescriptor descriptor{ir_descriptor}; return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}", descriptor.TFlag() ? "t" : "a", descriptor.PC(), descriptor.EFlag() ? "be" : "le", @@ -1399,7 +1392,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_PARAM2), X28, MJitStateReg(A32::Reg::PC)); code.SwitchFpscrOnExit(); Devirtualize<&A32::UserCallbacks::InterpreterFallback>(config.callbacks).EmitCall(code); - code.ReturnFromRunCode(true); // TODO: Check cycles + code.ReturnFromRunCode(true); // TODO: Check cycles } void A32EmitA64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) { @@ -1407,7 +1400,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri } void A32EmitA64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { - auto get_upper = [](const IR::LocationDescriptor &desc) -> u32 { + auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 { return static_cast(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32); }; @@ -1432,7 +1425,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc code.ReturnFromRunCode(); return; } - + if (config.enable_cycle_counting) { code.CMP(X26, ZR); @@ -1456,7 +1449,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc FixupBranch dest = code.B(); - code.SwitchToFarCode(); + code.SwitchToFarCode(); code.AlignCode16(); code.SetJumpTarget(dest); code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), A32::LocationDescriptor{terminal.next}.PC()); @@ -1472,7 +1465,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) { EmitSetUpperLocationDescriptor(terminal.next, initial_location); - + if (!config.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), A32::LocationDescriptor{terminal.next}.PC()); code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, MJitStateReg(A32::Reg::PC)); @@ -1532,10 +1525,10 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDesc void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { const CodePtr patch_location = code.GetCodePtr(); - auto long_branch_gt = [this](CodePtr ptr){ + auto long_branch_gt = [this](CodePtr ptr) { const s64 distance = reinterpret_cast(ptr) - reinterpret_cast(code.GetCodePtr()); - if((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { + if ((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { code.B(CC_GT, ptr); return; } @@ -1558,10 +1551,10 @@ void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr void A32EmitA64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { const CodePtr patch_location = code.GetCodePtr(); - auto long_branch_gt = [this](CodePtr ptr){ + auto long_branch_gt = [this](CodePtr ptr) { const s64 distance = reinterpret_cast(ptr) - reinterpret_cast(code.GetCodePtr()); - if((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { + if ((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { code.B(CC_EQ, ptr); return; } @@ -1612,4 +1605,4 @@ void A32EmitA64::Unpatch(const IR::LocationDescriptor& location) { } } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/a32_emit_a64.h b/src/dynarmic/backend/A64/a32_emit_a64.h index f605a997..2780c8f0 100644 --- a/src/dynarmic/backend/A64/a32_emit_a64.h +++ b/src/dynarmic/backend/A64/a32_emit_a64.h @@ -17,9 +17,9 @@ #include "dynarmic/backend/A64/block_range_information.h" #include "dynarmic/backend/A64/emit_a64.h" #include "dynarmic/backend/A64/exception_handler.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/config.h" -#include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/ir/terminal.h" namespace Dynarmic::BackendA64 { @@ -137,4 +137,4 @@ protected: void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/a32_interface.cpp b/src/dynarmic/backend/A64/a32_interface.cpp index bfa511ec..621b3a0a 100644 --- a/src/dynarmic/backend/A64/a32_interface.cpp +++ b/src/dynarmic/backend/A64/a32_interface.cpp @@ -7,13 +7,12 @@ #include #include -#include -#include -#include -#include - #include #include +#include +#include +#include +#include #include "dynarmic/backend/A64/a32_emit_a64.h" #include "dynarmic/backend/A64/a32_jitstate.h" @@ -21,12 +20,12 @@ #include "dynarmic/backend/A64/callback.h" #include "dynarmic/backend/A64/devirtualize.h" #include "dynarmic/backend/A64/jitstate_info.h" +#include "dynarmic/common/atomic.h" #include "dynarmic/common/llvm_disassemble.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/opt/passes.h" -#include "dynarmic/common/atomic.h" namespace Dynarmic::A32 { @@ -46,8 +45,7 @@ struct Jit::Impl { : block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}) , emitter(block_of_code, config, jit) , config(std::move(config)) - , jit_interface(jit) - {} + , jit_interface(jit) {} A32JitState jit_state; BlockOfCode block_of_code; @@ -61,7 +59,7 @@ struct Jit::Impl { bool invalidate_entire_cache = false; HaltReason Execute() { - const CodePtr current_codeptr = [this]{ + const CodePtr current_codeptr = [this] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { @@ -91,7 +89,7 @@ struct Jit::Impl { reinterpret_cast(pos) < reinterpret_cast(block.entrypoint) + block.size; pos += 1) { fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast(pos), *pos); fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast(pos))); - result += Common::DisassembleAArch64(*pos, reinterpret_cast(pos)); + result += Common::DisassembleAArch64(*pos, reinterpret_cast(pos)); } #endif return result; @@ -174,7 +172,8 @@ private: } }; -Jit::Jit(UserConfig config) : impl(std::make_unique(this, std::move(config))) {} +Jit::Jit(UserConfig config) + : impl(std::make_unique(this, std::move(config))) {} Jit::~Jit() = default; @@ -263,10 +262,15 @@ struct Context::Impl { size_t invalid_cache_generation; }; -Context::Context() : impl(std::make_unique()) { impl->jit_state.ResetRSB(); } +Context::Context() + : impl(std::make_unique()) { + impl->jit_state.ResetRSB(); +} Context::~Context() = default; -Context::Context(const Context& ctx) : impl(std::make_unique(*ctx.impl)) {} -Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {} +Context::Context(const Context& ctx) + : impl(std::make_unique(*ctx.impl)) {} +Context::Context(Context&& ctx) noexcept + : impl(std::move(ctx.impl)) {} Context& Context::operator=(const Context& ctx) { *impl = *ctx.impl; return *this; @@ -326,4 +330,4 @@ std::vector Jit::Disassemble() const { return result; } -} // namespace Dynarmic::A32 +} // namespace Dynarmic::A32 diff --git a/src/dynarmic/backend/A64/a32_jitstate.cpp b/src/dynarmic/backend/A64/a32_jitstate.cpp index 6228d4fa..eca172a7 100644 --- a/src/dynarmic/backend/A64/a32_jitstate.cpp +++ b/src/dynarmic/backend/A64/a32_jitstate.cpp @@ -4,11 +4,12 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/a32_jitstate.h" + #include #include #include -#include "dynarmic/backend/A64/a32_jitstate.h" #include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" @@ -89,7 +90,7 @@ void A32JitState::SetCpsr(u32 cpsr) { upper_location_descriptor |= mcl::bit::get_bit<9>(cpsr) ? 2 : 0; upper_location_descriptor |= mcl::bit::get_bit<5>(cpsr) ? 1 : 0; // IT state - upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000; + upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000; upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000; // Other flags @@ -170,4 +171,4 @@ void A32JitState::SetFpscr(u32 FPSCR) { guest_fpsr |= FPSCR & 0x9F; } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/a32_jitstate.h b/src/dynarmic/backend/A64/a32_jitstate.h index 953594c6..e94a560b 100644 --- a/src/dynarmic/backend/A64/a32_jitstate.h +++ b/src/dynarmic/backend/A64/a32_jitstate.h @@ -7,6 +7,7 @@ #pragma once #include + #include namespace Dynarmic::BackendA64 { @@ -14,8 +15,8 @@ namespace Dynarmic::BackendA64 { class BlockOfCode; #ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable:4324) // Structure was padded due to alignment specifier +# pragma warning(push) +# pragma warning(disable : 4324) // Structure was padded due to alignment specifier #endif struct A32JitState { @@ -23,7 +24,7 @@ struct A32JitState { A32JitState() { ResetRSB(); } - std::array Reg{}; // Current register file. + std::array Reg{}; // Current register file. // TODO: Mode-specific register sets unimplemented. u32 upper_location_descriptor = 0; @@ -35,10 +36,10 @@ struct A32JitState { u32 Cpsr() const; void SetCpsr(u32 cpsr); - alignas(u64) std::array ExtReg{}; // Extension registers. + alignas(u64) std::array ExtReg{}; // Extension registers. static constexpr size_t SpillCount = 64; - std::array Spill{}; // Spill. + std::array Spill{}; // Spill. static size_t GetSpillLocationOffsetFromIndex(size_t i) { return static_cast(offsetof(A32JitState, Spill) + i * sizeof(u64)); } @@ -57,7 +58,7 @@ struct A32JitState { u32 exclusive_state = 0; u32 exclusive_address = 0; - static constexpr size_t RSBSize = 8; // MUST be a power of 2. + static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBPtrMask = RSBSize - 1; u32 rsb_ptr = 0; std::array rsb_location_descriptors; @@ -65,7 +66,7 @@ struct A32JitState { void ResetRSB(); u32 fpsr_exc = 0; - u32 fpsr_qc = 0; // Dummy value + u32 fpsr_qc = 0; // Dummy value u32 fpsr_nzcv = 0; u32 Fpscr() const; void SetFpscr(u32 FPSCR); @@ -102,9 +103,9 @@ struct A32JitState { }; #ifdef _MSC_VER -#pragma warning(pop) +# pragma warning(pop) #endif using CodePtr = const void*; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/abi.cpp b/src/dynarmic/backend/A64/abi.cpp index 18fd1272..116249c2 100644 --- a/src/dynarmic/backend/A64/abi.cpp +++ b/src/dynarmic/backend/A64/abi.cpp @@ -14,18 +14,18 @@ // 20th Sep 2018: This code was modified for Dynarmic. +#include "dynarmic/backend/A64/abi.h" + #include #include -#include - -#include "dynarmic/backend/A64/abi.h" +#include namespace Dynarmic::BackendA64 { template void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) { - u32 gprs = 0 , fprs = 0; + u32 gprs = 0, fprs = 0; for (HostLoc reg : regs) { if (HostLocIsGPR(reg)) { @@ -83,4 +83,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc e ABI_PopRegistersAndAdjustStack(code, regs); } -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/abi.h b/src/dynarmic/backend/A64/abi.h index 9504cf77..13968b46 100644 --- a/src/dynarmic/backend/A64/abi.h +++ b/src/dynarmic/backend/A64/abi.h @@ -23,7 +23,7 @@ constexpr HostLoc ABI_PARAM6 = HostLoc::X5; constexpr HostLoc ABI_PARAM7 = HostLoc::X6; constexpr HostLoc ABI_PARAM8 = HostLoc::X7; -constexpr std::array ABI_ALL_CALLER_SAVE = { +constexpr std::array ABI_ALL_CALLER_SAVE = { HostLoc::X0, HostLoc::X1, HostLoc::X2, @@ -52,7 +52,7 @@ constexpr std::array ABI_ALL_CALLER_SAVE = { HostLoc::Q5, HostLoc::Q6, HostLoc::Q7, - + HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, @@ -95,7 +95,7 @@ constexpr std::array ABI_ALL_CALLEE_SAVE = { HostLoc::Q15, }; -constexpr size_t ABI_SHADOW_SPACE = 0; // bytes +constexpr size_t ABI_SHADOW_SPACE = 0; // bytes static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 63, "Invalid total number of registers"); @@ -107,4 +107,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code); void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/block_of_code.cpp b/src/dynarmic/backend/A64/block_of_code.cpp index 6c7ae039..bd5f1c0e 100644 --- a/src/dynarmic/backend/A64/block_of_code.cpp +++ b/src/dynarmic/backend/A64/block_of_code.cpp @@ -4,6 +4,8 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/block_of_code.h" + #include #include #include @@ -12,23 +14,22 @@ #include "dynarmic/backend/A64/a32_jitstate.h" #include "dynarmic/backend/A64/abi.h" -#include "dynarmic/interface/halt_reason.h" -#include "dynarmic/backend/A64/block_of_code.h" #include "dynarmic/backend/A64/perf_map.h" +#include "dynarmic/interface/halt_reason.h" #ifdef _WIN32 - #include +# include #else - #include +# include #endif #ifdef __APPLE__ -#include +# include #endif namespace Dynarmic::BackendA64 { -const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0; +const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0; const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1; const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0; @@ -43,9 +44,9 @@ const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7; const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30; const std::array BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, - BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, - BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6, - BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8}; + BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, + BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6, + BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8}; namespace { @@ -54,22 +55,22 @@ constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024; #ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT void ProtectMemory([[maybe_unused]] const void* base, [[maybe_unused]] size_t size, bool is_executable) { -#if defined(_WIN32) +# if defined(_WIN32) DWORD oldProtect = 0; VirtualProtect(const_cast(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect); -#elif defined(__APPLE__) +# elif defined(__APPLE__) pthread_jit_write_protect_np(is_executable); -#else +# else static const size_t pageSize = sysconf(_SC_PAGESIZE); const size_t iaddr = reinterpret_cast(base); const size_t roundAddr = iaddr & ~(pageSize - static_cast(1)); const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE); mprotect(reinterpret_cast(roundAddr), size + (iaddr - roundAddr), mode); -#endif +# endif } #endif -} // anonymous namespace +} // anonymous namespace BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi) : fp_emitter(this) @@ -154,7 +155,7 @@ void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) { } void BlockOfCode::GenRunCode() { - const u8* loop, *enter_fpscr_then_loop; + const u8 *loop, *enter_fpscr_then_loop; std::vector return_to_caller_fpscr_already_exited; AlignCode16(); @@ -168,7 +169,7 @@ void BlockOfCode::GenRunCode() { MOV(Arm64Gen::X28, ABI_PARAM1); MOVI2R(Arm64Gen::X27, cb.value_in_X27); - MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register + MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register cb.GetTicksRemaining->EmitCall(*this); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); @@ -250,11 +251,11 @@ void BlockOfCode::GenRunCode() { void BlockOfCode::SwitchFpscrOnEntry() { MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR); STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR); - + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr); _MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1); LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr); - _MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1); + _MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1); } void BlockOfCode::SwitchFpscrOnExit() { @@ -321,13 +322,13 @@ std::size_t BlockOfCode::GetRegionSize() const { return total_region_size; } -void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) { +void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) { ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG"); void* ret = GetWritableCodePtr(); region_size += alloc_size; SetCodePtr(GetCodePtr() + alloc_size); - memset(ret, 0, alloc_size); + memset(ret, 0, alloc_size); return ret; } @@ -353,4 +354,4 @@ void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) { //#endif //} -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/block_of_code.h b/src/dynarmic/backend/A64/block_of_code.h index af6b7935..d7f698c7 100644 --- a/src/dynarmic/backend/A64/block_of_code.h +++ b/src/dynarmic/backend/A64/block_of_code.h @@ -14,8 +14,8 @@ #include "dynarmic/backend/A64/callback.h" #include "dynarmic/backend/A64/constant_pool.h" -#include "dynarmic/backend/A64/jitstate_info.h" #include "dynarmic/backend/A64/emitter/a64_emitter.h" +#include "dynarmic/backend/A64/jitstate_info.h" #include "dynarmic/interface/halt_reason.h" namespace Dynarmic::BackendA64 { @@ -34,7 +34,6 @@ public: BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi); BlockOfCode(const BlockOfCode&) = delete; - /// Call when external emitters have finished emitting their preludes. void PreludeComplete(); @@ -146,4 +145,4 @@ private: //Xbyak::util::Cpu cpu_info; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/block_range_information.cpp b/src/dynarmic/backend/A64/block_range_information.cpp index e314a8a7..10e656fc 100644 --- a/src/dynarmic/backend/A64/block_range_information.cpp +++ b/src/dynarmic/backend/A64/block_range_information.cpp @@ -4,35 +4,33 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/block_range_information.h" + #include #include #include #include -#include - -#include "dynarmic/backend/A64/block_range_information.h" - namespace Dynarmic::BackendA64 { -template +template void BlockRangeInformation::AddRange(boost::icl::discrete_interval range, IR::LocationDescriptor location) { block_ranges.add(std::make_pair(range, std::set{location})); } -template +template void BlockRangeInformation::ClearCache() { block_ranges.clear(); } -template +template std::unordered_set BlockRangeInformation::InvalidateRanges(const boost::icl::interval_set& ranges) { std::unordered_set erase_locations; for (auto invalidate_interval : ranges) { auto pair = block_ranges.equal_range(invalidate_interval); for (auto it = pair.first; it != pair.second; ++it) { - for (const auto &descriptor : it->second) { + for (const auto& descriptor : it->second) { erase_locations.insert(descriptor); } } @@ -44,4 +42,4 @@ std::unordered_set BlockRangeInformation; template class BlockRangeInformation; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/block_range_information.h b/src/dynarmic/backend/A64/block_range_information.h index 5e773bf5..9cd6bce7 100644 --- a/src/dynarmic/backend/A64/block_range_information.h +++ b/src/dynarmic/backend/A64/block_range_information.h @@ -15,7 +15,7 @@ namespace Dynarmic::BackendA64 { -template +template class BlockRangeInformation { public: void AddRange(boost::icl::discrete_interval range, IR::LocationDescriptor location); @@ -26,4 +26,4 @@ private: boost::icl::interval_map> block_ranges; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/callback.cpp b/src/dynarmic/backend/A64/callback.cpp index d94855cf..8b4f768e 100644 --- a/src/dynarmic/backend/A64/callback.cpp +++ b/src/dynarmic/backend/A64/callback.cpp @@ -5,6 +5,7 @@ */ #include "dynarmic/backend/A64/callback.h" + #include "dynarmic/backend/A64/block_of_code.h" namespace Dynarmic::BackendA64 { @@ -38,4 +39,4 @@ void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function fn = [](RegList) {}) const = 0; + virtual void EmitCall( + BlockOfCode& code, std::function fn = [](RegList) {}) const = 0; virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function fn) const = 0; }; class SimpleCallback final : public Callback { public: - template - SimpleCallback(Function fn) : fn(reinterpret_cast(fn)) {} + template + SimpleCallback(Function fn) + : fn(reinterpret_cast(fn)) {} - void EmitCall(BlockOfCode& code, std::function fn = [](RegList) {}) const override; + void EmitCall( + BlockOfCode& code, std::function fn = [](RegList) {}) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function fn) const override; private: @@ -41,10 +44,12 @@ private: class ArgCallback final : public Callback { public: - template - ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast(fn)), arg(arg) {} + template + ArgCallback(Function fn, u64 arg) + : fn(reinterpret_cast(fn)), arg(arg) {} - void EmitCall(BlockOfCode& code, std::function fn = [](RegList) {}) const override; + void EmitCall( + BlockOfCode& code, std::function fn = [](RegList) {}) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function fn) const override; private: @@ -52,4 +57,4 @@ private: u64 arg; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/constant_pool.cpp b/src/dynarmic/backend/A64/constant_pool.cpp index fc36073d..2b33a65b 100644 --- a/src/dynarmic/backend/A64/constant_pool.cpp +++ b/src/dynarmic/backend/A64/constant_pool.cpp @@ -4,22 +4,24 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/constant_pool.h" + #include #include #include "dynarmic/backend/A64/block_of_code.h" -#include "dynarmic/backend/A64/constant_pool.h" namespace Dynarmic::BackendA64 { -ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {} +ConstantPool::ConstantPool(BlockOfCode& code) + : code(code) {} void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) { const auto constant = std::make_tuple(lower, upper); auto iter = constant_info.find(constant); if (iter == constant_info.end()) { - struct PatchInfo p = { code.GetCodePtr(), Rt, constant }; + struct PatchInfo p = {code.GetCodePtr(), Rt, constant}; patch_info.emplace_back(p); code.BRK(0); return; @@ -29,7 +31,7 @@ void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) { if (!(offset >= -0x40000 && offset <= 0x3FFFF)) { constant_info.erase(constant); - struct PatchInfo p = { code.GetCodePtr(), Rt, constant }; + struct PatchInfo p = {code.GetCodePtr(), Rt, constant}; patch_info.emplace_back(p); code.BRK(0x42); return; @@ -58,9 +60,9 @@ void ConstantPool::PatchPool() { code.SetCodePtr(pool_ptr); } -void ConstantPool::Clear() { +void ConstantPool::Clear() { constant_info.clear(); patch_info.clear(); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/constant_pool.h b/src/dynarmic/backend/A64/constant_pool.h index 4c303555..80e54cb7 100644 --- a/src/dynarmic/backend/A64/constant_pool.h +++ b/src/dynarmic/backend/A64/constant_pool.h @@ -31,7 +31,7 @@ public: void Clear(); private: - static constexpr size_t align_size = 16; // bytes + static constexpr size_t align_size = 16; // bytes std::map, void*> constant_info; @@ -46,4 +46,4 @@ private: std::vector patch_info; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/devirtualize.h b/src/dynarmic/backend/A64/devirtualize.h index 49345dc0..5b8f4806 100644 --- a/src/dynarmic/backend/A64/devirtualize.h +++ b/src/dynarmic/backend/A64/devirtualize.h @@ -9,10 +9,10 @@ #include #include -#include -#include #include #include +#include +#include #include "dynarmic/backend/A64/callback.h" @@ -20,20 +20,20 @@ namespace Dynarmic::BackendA64 { namespace impl { -template +template struct ThunkBuilder; -template -struct ThunkBuilder { +template +struct ThunkBuilder { static R Thunk(C* this_, Args... args) { return (this_->*mfp)(std::forward(args)...); } }; -} // namespace impl +} // namespace impl template -ArgCallback DevirtualizeGeneric(mcl::class_type * this_) { +ArgCallback DevirtualizeGeneric(mcl::class_type* this_) { return ArgCallback{&impl::ThunkBuilder::Thunk, reinterpret_cast(this_)}; } @@ -74,4 +74,4 @@ ArgCallback Devirtualize(mcl::class_type* this_) { #endif } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emit_a64.cpp b/src/dynarmic/backend/A64/emit_a64.cpp index 9ff9138e..b017f9ff 100644 --- a/src/dynarmic/backend/A64/emit_a64.cpp +++ b/src/dynarmic/backend/A64/emit_a64.cpp @@ -4,16 +4,17 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/emit_a64.h" + #include #include #include #include -#include #include +#include #include "dynarmic/backend/A64/block_of_code.h" -#include "dynarmic/backend/A64/emit_a64.h" #include "dynarmic/backend/A64/hostloc.h" #include "dynarmic/backend/A64/perf_map.h" #include "dynarmic/backend/A64/reg_alloc.h" @@ -28,7 +29,7 @@ namespace Dynarmic::BackendA64 { EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) - : reg_alloc(reg_alloc), block(block) {} + : reg_alloc(reg_alloc), block(block) {} void EmitContext::EraseInstruction(IR::Inst* inst) { block.Instructions().erase(inst); @@ -36,7 +37,7 @@ void EmitContext::EraseInstruction(IR::Inst* inst) { } EmitA64::EmitA64(BlockOfCode& code) - : code(code) {} + : code(code) {} EmitA64::~EmitA64() = default; @@ -64,8 +65,8 @@ void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) { auto iter = block_descriptors.find(target); CodePtr target_code_ptr = iter != block_descriptors.end() - ? iter->second.entrypoint - : code.GetReturnFromRunCodeAddress(); + ? iter->second.entrypoint + : code.GetReturnFromRunCodeAddress(); code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr); @@ -80,7 +81,7 @@ void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::Locat code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1); code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1); - code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr); + code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr); } void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { @@ -162,28 +163,28 @@ FixupBranch EmitA64::EmitCond(IR::Cond cond) { code._MSR(FIELD_NZCV, cpsr); switch (cond) { - case IR::Cond::EQ: //z + case IR::Cond::EQ: //z label = code.B(CC_EQ); break; - case IR::Cond::NE: //!z + case IR::Cond::NE: //!z label = code.B(CC_NEQ); break; - case IR::Cond::CS: //c + case IR::Cond::CS: //c label = code.B(CC_CS); break; - case IR::Cond::CC: //!c + case IR::Cond::CC: //!c label = code.B(CC_CC); break; - case IR::Cond::MI: //n + case IR::Cond::MI: //n label = code.B(CC_MI); break; - case IR::Cond::PL: //!n + case IR::Cond::PL: //!n label = code.B(CC_PL); break; - case IR::Cond::VS: //v + case IR::Cond::VS: //v label = code.B(CC_VS); break; - case IR::Cond::VC: //!v + case IR::Cond::VC: //!v label = code.B(CC_VC); break; case IR::Cond::HI: //c & !z @@ -203,7 +204,7 @@ FixupBranch EmitA64::EmitCond(IR::Cond cond) { break; case IR::Cond::LE: // z | (n != v) label = code.B(CC_LE); - break; + break; default: ASSERT_MSG(false, "Unknown cond {}", static_cast(cond)); break; @@ -278,7 +279,7 @@ void EmitA64::InvalidateBasicBlocks(const std::unordered_set; // Array alias that always sizes itself according to the given type T // relative to the size of a vector register. e.g. T = u32 would result // in a std::array. -template +template using VectorArray = std::array>; struct EmitContext { @@ -124,4 +124,4 @@ protected: std::unordered_map patch_information; }; -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emit_a64_data_processing.cpp b/src/dynarmic/backend/A64/emit_a64_data_processing.cpp index 83f762ff..aef70053 100644 --- a/src/dynarmic/backend/A64/emit_a64_data_processing.cpp +++ b/src/dynarmic/backend/A64/emit_a64_data_processing.cpp @@ -8,8 +8,8 @@ #include #include "dynarmic/backend/A64/block_of_code.h" -#include "dynarmic/backend/A64/reg_alloc.h" #include "dynarmic/backend/A64/emit_a64.h" +#include "dynarmic/backend/A64/reg_alloc.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -61,7 +61,7 @@ void EmitA64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { code.UBFX(carry, result, 31, 1); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); - } + } code.LSR(result, result, 32); @@ -82,7 +82,7 @@ void EmitA64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); // TODO: Flag optimization - code.LSR(result,result, 31); + code.LSR(result, result, 31); ctx.reg_alloc.DefineValue(inst, result); } @@ -127,46 +127,46 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* code._MSR(FIELD_NZCV, nzcv); switch (args[0].GetImmediateCond()) { - case IR::Cond::EQ: //z - code.CSEL(else_, else_, then_ , CC_EQ); + case IR::Cond::EQ: //z + code.CSEL(else_, else_, then_, CC_EQ); break; - case IR::Cond::NE: //!z + case IR::Cond::NE: //!z code.CSEL(else_, else_, then_, CC_NEQ); break; - case IR::Cond::CS: //c + case IR::Cond::CS: //c code.CSEL(else_, else_, then_, CC_CS); break; - case IR::Cond::CC: //!c - code.CSEL(else_, else_, then_ , CC_CC); + case IR::Cond::CC: //!c + code.CSEL(else_, else_, then_, CC_CC); break; - case IR::Cond::MI: //n + case IR::Cond::MI: //n code.CSEL(else_, else_, then_, CC_MI); break; - case IR::Cond::PL: //!n + case IR::Cond::PL: //!n code.CSEL(else_, else_, then_, CC_PL); break; - case IR::Cond::VS: //v + case IR::Cond::VS: //v code.CSEL(else_, else_, then_, CC_VS); break; - case IR::Cond::VC: //!v + case IR::Cond::VC: //!v code.CSEL(else_, else_, then_, CC_VC); break; - case IR::Cond::HI: //c & !z + case IR::Cond::HI: //c & !z code.CSEL(else_, else_, then_, CC_HI); break; - case IR::Cond::LS: //!c | z + case IR::Cond::LS: //!c | z code.CSEL(else_, else_, then_, CC_LS); break; - case IR::Cond::GE: // n == v + case IR::Cond::GE: // n == v code.CSEL(else_, else_, then_, CC_GE); break; - case IR::Cond::LT: // n != v + case IR::Cond::LT: // n != v code.CSEL(else_, else_, then_, CC_LT); break; - case IR::Cond::GT: // !z & (n == v) + case IR::Cond::GT: // !z & (n == v) code.CSEL(else_, else_, then_, CC_GT); break; - case IR::Cond::LE: // z | (n != v) + case IR::Cond::LE: // z | (n != v) code.CSEL(else_, else_, then_, CC_LE); break; case IR::Cond::AL: @@ -218,7 +218,7 @@ void EmitA64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { Arm64Gen::ARM64Reg result = ctx.reg_alloc.UseScratchGpr(operand_arg); code.ANDI2R(shift, shift, 0xFF); - code.LSLV(result, result, shift); + code.LSLV(result, result, shift); code.CMPI2R(shift, 32); code.CSEL(result, WZR, DecodeReg(result), CC_GE); ctx.reg_alloc.DefineValue(inst, DecodeReg(result)); @@ -239,7 +239,7 @@ void EmitA64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code.MOV(carry, WZR); } else { code.ANDI2R(carry, result, 1); - code.MOV(result, WZR); + code.MOV(result, WZR); } ctx.reg_alloc.DefineValue(carry_inst, carry); @@ -257,7 +257,7 @@ void EmitA64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { end = code.B(CC_EQ); code.CMPI2R(shift, 32); - code.SUBI2R(shift, shift, 1); // Subtract 1 to get the bit that is shiftedout, into the MSB. + code.SUBI2R(shift, shift, 1); // Subtract 1 to get the bit that is shiftedout, into the MSB. code.LSLV(result, result, shift); code.UBFX(carry, result, 31, 1); code.LSL(result, result, 1); @@ -344,7 +344,7 @@ void EmitA64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { } else if (shift < 32) { code.LSR(carry, result, shift - 1); code.ANDI2R(carry, carry, 1); - code.LSR(result,result, shift); + code.LSR(result, result, shift); } else if (shift == 32) { code.UBFX(carry, result, 31, 1); code.MOV(result, WZR); @@ -369,15 +369,15 @@ void EmitA64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { end = code.B(CC_EQ); code.CMPI2R(shift, 32); - code.SUBI2R(shift, shift, 1); // Subtract 1 to get the bit that is shifted out to the carry. + code.SUBI2R(shift, shift, 1); // Subtract 1 to get the bit that is shifted out to the carry. code.LSRV(result, result, shift); code.ANDI2R(carry, result, 1); code.LSR(result, result, 1); code.CSEL(result, result, WZR, CC_LT); code.CSEL(carry, carry, WZR, CC_LE); - - code.SetJumpTarget(end); + + code.SetJumpTarget(end); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -490,8 +490,8 @@ void EmitA64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.ANDI2R(carry, result, 1); code.ASR(result, result, 1); // } - - code.SetJumpTarget(end); + + code.SetJumpTarget(end); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -582,7 +582,7 @@ void EmitA64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { // TODO: Optimize - std::vector end; + std::vector end; FixupBranch zero_1F; code.ANDSI2R(shift, shift, u32(0xFF)); @@ -649,8 +649,8 @@ void EmitA64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { // Set carry to the LSB and perform ROR. code.BFI(result, carry, 0, 1); - code.ROR(result, result, 1); - + code.ROR(result, result, 1); + if (carry_inst) { code.ANDI2R(carry, temp, 1); @@ -684,7 +684,7 @@ static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit result = bitsize == 64 ? result : DecodeReg(result); - if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { + if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); @@ -706,7 +706,7 @@ static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit code.CMP(DecodeReg(op_arg), DecodeReg(op_arg)); code.ADCS(result, result, op_arg); } else { - code.ADDS(result,result, op_arg); + code.ADDS(result, result, op_arg); } } else { code.CMPI2R(DecodeReg(carry), 1); @@ -759,7 +759,7 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit result = bitsize == 64 ? result : DecodeReg(result); - if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { + if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { u32 op_arg = args[1].GetImmediateU32(); @@ -767,7 +767,7 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit } else { Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); - code.ADDSI2R(op_arg, op_arg, 0); // Clear carry + code.ADDSI2R(op_arg, op_arg, 0); // Clear carry code.SBCS(result, result, op_arg); } } else { @@ -781,12 +781,12 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit if (carry_in.GetImmediateU1()) { code.SUBS(result, result, op_arg); } else { - code.ADDSI2R(DecodeReg(op_arg), DecodeReg(op_arg), 0); // Clear carry - code.SBCS(result,result, op_arg); + code.ADDSI2R(DecodeReg(op_arg), DecodeReg(op_arg), 0); // Clear carry + code.SBCS(result, result, op_arg); } } else { code.CMPI2R(DecodeReg(carry), 0x1); - code.SBCS(result,result, op_arg); + code.SBCS(result, result, op_arg); } } @@ -822,7 +822,7 @@ void EmitA64::EmitMul32(EmitContext& ctx, IR::Inst* inst) { ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); ARM64Reg op_arg = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); - + code.MUL(result, result, op_arg); ctx.reg_alloc.DefineValue(inst, result); @@ -839,7 +839,6 @@ void EmitA64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } - void EmitA64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -880,7 +879,6 @@ void EmitA64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } - void EmitA64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -890,7 +888,7 @@ void EmitA64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { u32 op_arg = args[1].GetImmediateU32(); code.ANDI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); } else { - Arm64Gen::ARM64Reg op_arg = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); + Arm64Gen::ARM64Reg op_arg = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); code.AND(result, result, op_arg); } @@ -905,8 +903,7 @@ void EmitA64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); code.ANDI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); - } - else { + } else { Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); code.AND(result, result, op_arg); } @@ -938,8 +935,7 @@ void EmitA64::EmitEor64(EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); code.EORI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); - } - else { + } else { Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); code.EOR(result, result, op_arg); } @@ -957,7 +953,7 @@ void EmitA64::EmitOr32(EmitContext& ctx, IR::Inst* inst) { code.ORRI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); } else { Arm64Gen::ARM64Reg op_arg = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); - code.ORR(result, result , op_arg); + code.ORR(result, result, op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -971,8 +967,7 @@ void EmitA64::EmitOr64(EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); code.ORRI2R(result, result, op_arg, ctx.reg_alloc.ScratchGpr()); - } - else { + } else { Arm64Gen::ARM64Reg op_arg = ctx.reg_alloc.UseGpr(args[1]); code.ORR(result, result, op_arg); } @@ -1001,8 +996,7 @@ void EmitA64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { result = ctx.reg_alloc.ScratchGpr(); code.MOVI2R(result, u32(~args[0].GetImmediateU32())); - } - else { + } else { result = ctx.reg_alloc.UseScratchGpr(args[0]); code.MVN(result, result); } @@ -1116,15 +1110,15 @@ void EmitA64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); code.CLZ(result, source); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } void EmitA64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ARM64Reg source = ctx.reg_alloc.UseGpr(args[0]); - ARM64Reg result = ctx.reg_alloc.ScratchGpr(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ARM64Reg source = ctx.reg_alloc.UseGpr(args[0]); + ARM64Reg result = ctx.reg_alloc.ScratchGpr(); - code.CLZ(result, source); - ctx.reg_alloc.DefineValue(inst, result); + code.CLZ(result, source); + ctx.reg_alloc.DefineValue(inst, result); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emit_a64_floating_point.cpp b/src/dynarmic/backend/A64/emit_a64_floating_point.cpp index d4f5b044..bc09ee4b 100644 --- a/src/dynarmic/backend/A64/emit_a64_floating_point.cpp +++ b/src/dynarmic/backend/A64/emit_a64_floating_point.cpp @@ -46,7 +46,7 @@ Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rou } } -template +template void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -61,7 +61,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { ctx.reg_alloc.DefineValue(inst, result); } -template +template void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -72,14 +72,13 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) if constexpr (std::is_member_function_pointer_v) { (code.fp_emitter.*fn)(result, result, operand); - } - else { + } else { fn(result, result, operand); } ctx.reg_alloc.DefineValue(inst, result); } -} // anonymous namespace +} // anonymous namespace //void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { // auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -136,27 +135,27 @@ void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { } void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD); + FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD); } void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD); + FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD); } void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV); + FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV); } void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV); + FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV); } void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL); + FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL); } void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL); + FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL); } void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT); @@ -167,16 +166,16 @@ void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) { } void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB); + FPThreeOp<32, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB); } void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB); + FPThreeOp<64, void (Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB); } static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr(); - // Fpsr's nzcv is copied across integer nzcv + // Fpsr's nzcv is copied across integer nzcv code.MRS(nzcv, FIELD_NZCV); return nzcv; } @@ -276,13 +275,11 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if constexpr (unsigned_) { code.fp_emitter.FCVTU(result, src, round_imm); - } - else { + } else { code.fp_emitter.FCVTS(result, src, round_imm); } ctx.reg_alloc.DefineValue(inst, result); - } void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { @@ -328,8 +325,7 @@ void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.SCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.SCVTF(result, from); } @@ -338,7 +334,7 @@ void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - + const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); const size_t fbits = args[1].GetImmediateU8(); @@ -347,8 +343,7 @@ void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.UCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.UCVTF(result, from); } @@ -366,8 +361,7 @@ void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.SCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.SCVTF(result, from); } @@ -385,8 +379,7 @@ void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.SCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.SCVTF(result, from); } @@ -404,8 +397,7 @@ void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.SCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.SCVTF(result, from); } @@ -423,8 +415,7 @@ void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.UCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.UCVTF(result, from); } @@ -434,7 +425,6 @@ void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); const size_t fbits = args[1].GetImmediateU8(); @@ -443,8 +433,7 @@ void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.UCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.UCVTF(result, from); } @@ -454,7 +443,6 @@ void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); const size_t fbits = args[1].GetImmediateU8(); @@ -463,11 +451,10 @@ void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { code.fp_emitter.UCVTF(result, from, fbits); - } - else { + } else { code.fp_emitter.UCVTF(result, from); } ctx.reg_alloc.DefineValue(inst, result); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emit_a64_packed.cpp b/src/dynarmic/backend/A64/emit_a64_packed.cpp index 948d04c3..393f25ac 100644 --- a/src/dynarmic/backend/A64/emit_a64_packed.cpp +++ b/src/dynarmic/backend/A64/emit_a64_packed.cpp @@ -468,4 +468,4 @@ void EmitA64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, ge); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emit_a64_saturation.cpp b/src/dynarmic/backend/A64/emit_a64_saturation.cpp index fb844c18..a4d8f363 100644 --- a/src/dynarmic/backend/A64/emit_a64_saturation.cpp +++ b/src/dynarmic/backend/A64/emit_a64_saturation.cpp @@ -37,8 +37,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) if constexpr (op == Op::Add) { code.fp_emitter.SQADD(size, result, result, addend); - } - else { + } else { code.fp_emitter.SQSUB(size, result, result, addend); } @@ -54,7 +53,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, result); } -} // anonymous namespace +} // anonymous namespace void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { EmitSignedSaturatedOp(code, ctx, inst); @@ -166,4 +165,4 @@ void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emitter/a64_emitter.cpp b/src/dynarmic/backend/A64/emitter/a64_emitter.cpp index 3a3b3dd6..7a68e653 100644 --- a/src/dynarmic/backend/A64/emitter/a64_emitter.cpp +++ b/src/dynarmic/backend/A64/emitter/a64_emitter.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include "a64_emitter.h" + #include #include #include @@ -9,11 +11,10 @@ #include #include -#include #include #include +#include -#include "a64_emitter.h" #include "dynarmic/common/math_util.h" #ifdef _WIN32 @@ -70,8 +71,7 @@ bool IsImmArithmetic(uint64_t input, u32* val, bool* shift) { } // For AND/TST/ORR/EOR etc -bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned int* imm_s, - unsigned int* imm_r) { +bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned int* imm_s, unsigned int* imm_r) { bool negate = false; // Logical immediates are encoded using parameters n, imm_s and imm_r using @@ -211,8 +211,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57; // Ensure that the index to the multipliers array is within bounds. - DEBUG_ASSERT((multiplier_idx >= 0) && - (static_cast(multiplier_idx) < multipliers.size())); + DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast(multiplier_idx) < multipliers.size())); uint64_t multiplier = multipliers[multiplier_idx]; uint64_t candidate = (b - a) * multiplier; @@ -284,8 +283,7 @@ bool FPImm8FromFloat(float value, u8* imm_out) { if ((exponent >> 7) == ((exponent >> 6) & 1)) return false; - const u8 imm8 = static_cast((sign << 7) | ((!(exponent >> 7)) << 6) | - ((exponent & 3) << 4) | mantissa4); + const u8 imm8 = static_cast((sign << 7) | ((!(exponent >> 7)) << 6) | ((exponent & 3) << 4) | mantissa4); const float new_float = FPImm8ToFloat(imm8); if (new_float == value) *imm_out = imm8; @@ -319,7 +317,7 @@ static constexpr u32 MaskImm26(s64 distance) { return distance & 0x3FFFFFF; } -} // Anonymous namespace +} // Anonymous namespace void ARM64XEmitter::SetCodePtrUnsafe(u8* ptr) { m_code = ptr; @@ -384,7 +382,8 @@ void ARM64XEmitter::FlushIcacheSection(const u8* start, const u8* end) { static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff; size_t isize, dsize; - __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + __asm__ volatile("mrs %0, ctr_el0" + : "=r"(ctr_el0)); isize = 4 << ((ctr_el0 >> 0) & 0xf); dsize = 4 << ((ctr_el0 >> 16) & 0xf); @@ -396,130 +395,145 @@ void ARM64XEmitter::FlushIcacheSection(const u8* start, const u8* end) { for (; addr < reinterpret_cast(end); addr += dsize) // use "civac" instead of "cvau", as this is the suggested workaround for // Cortex-A53 errata 819472, 826319, 827319 and 824069. - __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory"); - __asm__ volatile("dsb ish" : : : "memory"); + __asm__ volatile("dc civac, %0" + : + : "r"(addr) + : "memory"); + __asm__ volatile("dsb ish" + : + : + : "memory"); addr = reinterpret_cast(start) & ~static_cast(isize - 1); for (; addr < reinterpret_cast(end); addr += isize) - __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory"); + __asm__ volatile("ic ivau, %0" + : + : "r"(addr) + : "memory"); - __asm__ volatile("dsb ish" : : : "memory"); - __asm__ volatile("isb" : : : "memory"); + __asm__ volatile("dsb ish" + : + : + : "memory"); + __asm__ volatile("isb" + : + : + : "memory"); #endif } // Exception generation static const u32 ExcEnc[][3] = { - {0, 0, 1}, // SVC - {0, 0, 2}, // HVC - {0, 0, 3}, // SMC - {1, 0, 0}, // BRK - {2, 0, 0}, // HLT - {5, 0, 1}, // DCPS1 - {5, 0, 2}, // DCPS2 - {5, 0, 3}, // DCPS3 + {0, 0, 1}, // SVC + {0, 0, 2}, // HVC + {0, 0, 3}, // SMC + {1, 0, 0}, // BRK + {2, 0, 0}, // HLT + {5, 0, 1}, // DCPS1 + {5, 0, 2}, // DCPS2 + {5, 0, 3}, // DCPS3 }; // Arithmetic generation static const u32 ArithEnc[] = { - 0x058, // ADD - 0x258, // SUB + 0x058, // ADD + 0x258, // SUB }; // Conditional Select static const u32 CondSelectEnc[][2] = { - {0, 0}, // CSEL - {0, 1}, // CSINC - {1, 0}, // CSINV - {1, 1}, // CSNEG + {0, 0}, // CSEL + {0, 1}, // CSINC + {1, 0}, // CSINV + {1, 1}, // CSNEG }; // Data-Processing (1 source) static const u32 Data1SrcEnc[][2] = { - {0, 0}, // RBIT - {0, 1}, // REV16 - {0, 2}, // REV32 - {0, 3}, // REV64 - {0, 4}, // CLZ - {0, 5}, // CLS + {0, 0}, // RBIT + {0, 1}, // REV16 + {0, 2}, // REV32 + {0, 3}, // REV64 + {0, 4}, // CLZ + {0, 5}, // CLS }; // Data-Processing (2 source) static const u32 Data2SrcEnc[] = { - 0x02, // UDIV - 0x03, // SDIV - 0x08, // LSLV - 0x09, // LSRV - 0x0A, // ASRV - 0x0B, // RORV - 0x10, // CRC32B - 0x11, // CRC32H - 0x12, // CRC32W - 0x14, // CRC32CB - 0x15, // CRC32CH - 0x16, // CRC32CW - 0x13, // CRC32X (64bit Only) - 0x17, // XRC32CX (64bit Only) + 0x02, // UDIV + 0x03, // SDIV + 0x08, // LSLV + 0x09, // LSRV + 0x0A, // ASRV + 0x0B, // RORV + 0x10, // CRC32B + 0x11, // CRC32H + 0x12, // CRC32W + 0x14, // CRC32CB + 0x15, // CRC32CH + 0x16, // CRC32CW + 0x13, // CRC32X (64bit Only) + 0x17, // XRC32CX (64bit Only) }; // Data-Processing (3 source) static const u32 Data3SrcEnc[][2] = { - {0, 0}, // MADD - {0, 1}, // MSUB - {1, 0}, // SMADDL (64Bit Only) - {1, 1}, // SMSUBL (64Bit Only) - {2, 0}, // SMULH (64Bit Only) - {5, 0}, // UMADDL (64Bit Only) - {5, 1}, // UMSUBL (64Bit Only) - {6, 0}, // UMULH (64Bit Only) + {0, 0}, // MADD + {0, 1}, // MSUB + {1, 0}, // SMADDL (64Bit Only) + {1, 1}, // SMSUBL (64Bit Only) + {2, 0}, // SMULH (64Bit Only) + {5, 0}, // UMADDL (64Bit Only) + {5, 1}, // UMSUBL (64Bit Only) + {6, 0}, // UMULH (64Bit Only) }; // Logical (shifted register) static const u32 LogicalEnc[][2] = { - {0, 0}, // AND - {0, 1}, // BIC - {1, 0}, // OOR - {1, 1}, // ORN - {2, 0}, // EOR - {2, 1}, // EON - {3, 0}, // ANDS - {3, 1}, // BICS + {0, 0}, // AND + {0, 1}, // BIC + {1, 0}, // OOR + {1, 1}, // ORN + {2, 0}, // EOR + {2, 1}, // EON + {3, 0}, // ANDS + {3, 1}, // BICS }; // Load/Store Exclusive static const u32 LoadStoreExcEnc[][5] = { - {0, 0, 0, 0, 0}, // STXRB - {0, 0, 0, 0, 1}, // STLXRB - {0, 0, 1, 0, 0}, // LDXRB - {0, 0, 1, 0, 1}, // LDAXRB - {0, 1, 0, 0, 1}, // STLRB - {0, 1, 1, 0, 1}, // LDARB - {1, 0, 0, 0, 0}, // STXRH - {1, 0, 0, 0, 1}, // STLXRH - {1, 0, 1, 0, 0}, // LDXRH - {1, 0, 1, 0, 1}, // LDAXRH - {1, 1, 0, 0, 1}, // STLRH - {1, 1, 1, 0, 1}, // LDARH - {2, 0, 0, 0, 0}, // STXR - {3, 0, 0, 0, 0}, // (64bit) STXR - {2, 0, 0, 0, 1}, // STLXR - {3, 0, 0, 0, 1}, // (64bit) STLXR - {2, 0, 0, 1, 0}, // STXP - {3, 0, 0, 1, 0}, // (64bit) STXP - {2, 0, 0, 1, 1}, // STLXP - {3, 0, 0, 1, 1}, // (64bit) STLXP - {2, 0, 1, 0, 0}, // LDXR - {3, 0, 1, 0, 0}, // (64bit) LDXR - {2, 0, 1, 0, 1}, // LDAXR - {3, 0, 1, 0, 1}, // (64bit) LDAXR - {2, 0, 1, 1, 0}, // LDXP - {3, 0, 1, 1, 0}, // (64bit) LDXP - {2, 0, 1, 1, 1}, // LDAXP - {3, 0, 1, 1, 1}, // (64bit) LDAXP - {2, 1, 0, 0, 1}, // STLR - {3, 1, 0, 0, 1}, // (64bit) STLR - {2, 1, 1, 0, 1}, // LDAR - {3, 1, 1, 0, 1}, // (64bit) LDAR + {0, 0, 0, 0, 0}, // STXRB + {0, 0, 0, 0, 1}, // STLXRB + {0, 0, 1, 0, 0}, // LDXRB + {0, 0, 1, 0, 1}, // LDAXRB + {0, 1, 0, 0, 1}, // STLRB + {0, 1, 1, 0, 1}, // LDARB + {1, 0, 0, 0, 0}, // STXRH + {1, 0, 0, 0, 1}, // STLXRH + {1, 0, 1, 0, 0}, // LDXRH + {1, 0, 1, 0, 1}, // LDAXRH + {1, 1, 0, 0, 1}, // STLRH + {1, 1, 1, 0, 1}, // LDARH + {2, 0, 0, 0, 0}, // STXR + {3, 0, 0, 0, 0}, // (64bit) STXR + {2, 0, 0, 0, 1}, // STLXR + {3, 0, 0, 0, 1}, // (64bit) STLXR + {2, 0, 0, 1, 0}, // STXP + {3, 0, 0, 1, 0}, // (64bit) STXP + {2, 0, 0, 1, 1}, // STLXP + {3, 0, 0, 1, 1}, // (64bit) STLXP + {2, 0, 1, 0, 0}, // LDXR + {3, 0, 1, 0, 0}, // (64bit) LDXR + {2, 0, 1, 0, 1}, // LDAXR + {3, 0, 1, 0, 1}, // (64bit) LDAXR + {2, 0, 1, 1, 0}, // LDXP + {3, 0, 1, 1, 0}, // (64bit) LDXP + {2, 0, 1, 1, 1}, // LDAXP + {3, 0, 1, 1, 1}, // (64bit) LDAXP + {2, 1, 0, 0, 1}, // STLR + {3, 1, 0, 0, 1}, // (64bit) STLR + {2, 1, 1, 0, 1}, // LDAR + {3, 1, 1, 0, 1}, // (64bit) LDAR }; void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr) { @@ -535,8 +549,7 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr "{}: Received too large distance: {:X}", __func__, distance); Rt = DecodeReg(Rt); - Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | - ((static_cast(distance) << 5) & 0xFFFFE0) | Rt); + Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | ((static_cast(distance) << 5) & 0xFFFFE0) | Rt); } void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr) { @@ -552,8 +565,7 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi "{}: Received too large distance: {:X}", __func__, distance); Rt = DecodeReg(Rt); - Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | (bits << 19) | - ((static_cast(distance) << 5) & 0x7FFE0) | Rt); + Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | (bits << 19) | ((static_cast(distance) << 5) & 0x7FFE0) | Rt); } void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) { @@ -579,35 +591,29 @@ void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm) { ASSERT_MSG(!(imm & ~0xFFFF), "{}: Exception instruction too large immediate: {}", __func__, imm); - Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | - ExcEnc[instenc][2]); + Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]); } void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt) { Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt); } -void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm, ArithOption Option) { +void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) { bool b64Bit = Is64Bit(Rd); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | - (Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) | - Option.GetData() | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | (Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm) { +void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { bool b64Bit = Is64Bit(Rd); Rd = DecodeReg(Rd); Rm = DecodeReg(Rm); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0xD0 << 21) | (Rm << 16) | (Rn << 5) | - Rd); + Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd); } void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) { @@ -617,31 +623,26 @@ void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 n ASSERT_MSG(!(nzcv & ~0xF), "{}: Flags out of range: {}", __func__, nzcv); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (imm << 16) | (cond << 12) | - (1 << 11) | (Rn << 5) | nzcv); + Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv); } -void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, - CCFlags cond) { +void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) { bool b64Bit = Is64Bit(Rm); ASSERT_MSG(!(nzcv & ~0xF), "{}: Flags out of range: {}", __func__, nzcv); Rm = DecodeReg(Rm); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (Rm << 16) | (cond << 12) | - (Rn << 5) | nzcv); + Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (Rm << 16) | (cond << 12) | (Rn << 5) | nzcv); } -void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, - CCFlags cond) { +void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) { bool b64Bit = Is64Bit(Rd); Rd = DecodeReg(Rd); Rm = DecodeReg(Rm); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | (0xD4 << 21) | (Rm << 16) | - (cond << 12) | (CondSelectEnc[instenc][1] << 10) | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | (0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | (Rn << 5) | Rd); } void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) { @@ -649,8 +650,7 @@ void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) { Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (0x2D6 << 21) | (Data1SrcEnc[instenc][0] << 16) | - (Data1SrcEnc[instenc][1] << 10) | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (0x2D6 << 21) | (Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | (Rn << 5) | Rd); } void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { @@ -659,31 +659,26 @@ void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, AR Rd = DecodeReg(Rd); Rm = DecodeReg(Rm); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (0x0D6 << 21) | (Rm << 16) | (Data2SrcEnc[instenc] << 10) | (Rn << 5) | - Rd); + Write32((b64Bit << 31) | (0x0D6 << 21) | (Rm << 16) | (Data2SrcEnc[instenc] << 10) | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, - ARM64Reg Ra) { +void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { bool b64Bit = Is64Bit(Rd); Rd = DecodeReg(Rd); Rm = DecodeReg(Rm); Rn = DecodeReg(Rn); Ra = DecodeReg(Ra); - Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | (Rm << 16) | - (Data3SrcEnc[instenc][1] << 15) | (Ra << 10) | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | (Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | (Ra << 10) | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, - ArithOption Shift) { +void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) { bool b64Bit = Is64Bit(Rd); Rd = DecodeReg(Rd); Rm = DecodeReg(Rm); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | - (LogicalEnc[instenc][1] << 21) | Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | (LogicalEnc[instenc][1] << 21) | Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd); } void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, s32 imm) { @@ -693,25 +688,20 @@ void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, s32 imm) { ASSERT_MSG(IsInRangeImm19(imm), "{}: offset too large {}", __func__, imm); Rt = DecodeReg(Rt); - if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set + if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set bitop |= 0x1; Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (MaskImm19(imm) << 5) | Rt); } -void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, - ARM64Reg Rt) { +void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt) { Rs = DecodeReg(Rs); Rt2 = DecodeReg(Rt2); Rn = DecodeReg(Rn); Rt = DecodeReg(Rt); - Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | - (LoadStoreExcEnc[instenc][1] << 23) | (LoadStoreExcEnc[instenc][2] << 22) | - (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | (LoadStoreExcEnc[instenc][4] << 15) | - (Rt2 << 10) | (Rn << 5) | Rt); + Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | (LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | (LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt); } -void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, - u32 imm) { +void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) { bool b64Bit = Is64Bit(Rt); bool b128Bit = IsQuad(Rt); bool bVec = IsVector(Rt); @@ -749,8 +739,7 @@ void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM Rt = DecodeReg(Rt); Rn = DecodeReg(Rn); - Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | - Rt); + Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt); } void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size) { @@ -786,36 +775,30 @@ void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | (immr << 16) | - (imms << 10) | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, - ArithOption Rm) { +void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) { ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "Shifted registers are not supported used Indexed registers"); Rt = DecodeReg(Rt); Rn = DecodeReg(Rn); ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg()); - Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | Rm.GetData() | - (1 << 11) | (Rn << 5) | Rt); + Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | Rm.GetData() | (1 << 11) | (Rn << 5) | Rt); } -void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, - ARM64Reg Rd) { +void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd) { bool b64Bit = Is64Bit(Rd); ASSERT_MSG(!(imm & ~0xFFF), "{}: immediate too large: {:X}", __func__, imm); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | - (imm << 10) | (Rn << 5) | Rd); + Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | (imm << 10) | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, - int n) { +void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n) { // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit. // Use Rn to determine bitness here. bool b64Bit = Is64Bit(Rn); @@ -823,12 +806,10 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) | - (Rn << 5) | Rd); + Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, - ARM64Reg Rn, s32 imm) { +void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) { bool b64Bit = Is64Bit(Rt); u32 type_encode = 0; @@ -858,8 +839,7 @@ void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64R Rt2 = DecodeReg(Rt2); Rn = DecodeReg(Rn); - Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) | - (Rt2 << 10) | (Rn << 5) | Rt); + Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt); } void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm) { Rd = DecodeReg(Rd); @@ -877,7 +857,7 @@ void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64 // FixupBranch branching void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) { - if(!target) + if (!target) target = m_code; bool Not = false; u32 inst = 0; @@ -885,10 +865,10 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) { distance >>= 2; switch (branch.type) { - case 1: // CBNZ + case 1: // CBNZ Not = true; [[fallthrough]]; - case 0: // CBZ + case 0: // CBZ { ASSERT_MSG(IsInRangeImm19(distance), "{}({}): Received too large distance: {:X}", __func__, branch.type, distance); @@ -896,28 +876,27 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) { ARM64Reg reg = DecodeReg(branch.reg); inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) | reg; } break; - case 2: // B (conditional) + case 2: // B (conditional) ASSERT_MSG(IsInRangeImm19(distance), "{}({}): Received too large distance: {:X}", __func__, branch.type, distance); inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond; break; - case 4: // TBNZ + case 4: // TBNZ Not = true; [[fallthrough]]; - case 3: // TBZ + case 3: // TBZ { ASSERT_MSG(IsInRangeImm14(distance), "{}({}): Received too large distance: {:X}", __func__, branch.type, distance); ARM64Reg reg = DecodeReg(branch.reg); - inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | - ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg; + inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg; } break; - case 5: // B (unconditional) + case 5: // B (unconditional) ASSERT_MSG(IsInRangeImm26(distance), "{}({}): Received too large distance: {:X}", __func__, branch.type, distance); inst = (0x5 << 26) | MaskImm26(distance); break; - case 6: // BL (unconditional) + case 6: // BL (unconditional) ASSERT_MSG(IsInRangeImm26(distance), "{}({}): Received too large distance: {:X}", __func__, branch.type, distance); inst = (0x25 << 26) | MaskImm26(distance); @@ -1021,7 +1000,7 @@ void ARM64XEmitter::BL(const void* ptr) { void ARM64XEmitter::QuickCallFunction(const void* func, ARM64Reg scratchreg) { s64 distance = reinterpret_cast(func) - reinterpret_cast(m_code); - distance >>= 2; // Can only branch to opcode-aligned (4) addresses + distance >>= 2; // Can only branch to opcode-aligned (4) addresses if (!IsInRangeImm26(distance)) { // WARN_LOG( "Distance too far in function call (%p to %p)! Using scratch.", // m_code, func); @@ -1778,8 +1757,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) { return; } - if ((Is64Bit(Rd) && imm == std::numeric_limits::max()) || - (!Is64Bit(Rd) && imm == std::numeric_limits::max())) { + if ((Is64Bit(Rd) && imm == std::numeric_limits::max()) || (!Is64Bit(Rd) && imm == std::numeric_limits::max())) { // Max unsigned value (or if signed, -1) // Set to ~ZR ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP; @@ -1890,8 +1868,8 @@ void ARM64XEmitter::ABI_PushRegisters(u32 registers) { } else { STP(INDEX_PRE, gpr[0], gpr[1], SP, -stack_size); it += 2; - } - + } + // Fast store for all other registers, this is always an even number. for (int i = 0; i < (num_regs - 1) / 2; i++) { STP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1)); @@ -1943,8 +1921,7 @@ void ARM64XEmitter::ABI_PopRegisters(u32 registers) { } // Float Emitter -void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, - ARM64Reg Rn, s32 imm) { +void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { Rt = DecodeReg(Rt); Rn = DecodeReg(Rn); u32 encoded_size = 0; @@ -1986,35 +1963,29 @@ void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, encoded_imm |= 3; } - Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | - (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt); + Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, - ARM64Reg Rn, ARM64Reg Rm) { +void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!IsQuad(Rd), "{} only supports double and single registers!", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (Rm << 16) | - (opcode << 12) | (1 << 11) | (Rn << 5) | Rd); + Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (Rm << 16) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm) { +void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!IsSingle(Rd), "{} doesn't support singles!", __func__); bool quad = IsQuad(Rd); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (Rm << 16) | - (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); + Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm) { +void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!IsQuad(Rd), "{} doesn't support quads!", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); @@ -2038,74 +2009,60 @@ void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64R break; } - - Write32((U << 29) | (0b1011110001 << 21) | (esize << 22) | (Rm << 16) | - (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); + Write32((U << 29) | (0b1011110001 << 21) | (esize << 22) | (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); } void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn) { Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | (1 << 10) | - (Rn << 5) | Rd); + Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | (1 << 10) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, - ARM64Reg Rn) { +void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) { ASSERT_MSG(!IsSingle(Rd), "{} doesn't support singles!", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((Q << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) | - (Rn << 5) | Rd); + Write32((Q << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, - ARM64Reg Rt, ARM64Reg Rn) { +void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn) { ASSERT_MSG(!IsSingle(Rt), "{} doesn't support singles!", __func__); bool quad = IsQuad(Rt); Rt = DecodeReg(Rt); Rn = DecodeReg(Rn); - Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | (S << 12) | - (size << 10) | (Rn << 5) | Rt); + Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, - ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) { +void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!IsSingle(Rt), "{} doesn't support singles!", __func__); bool quad = IsQuad(Rt); Rt = DecodeReg(Rt); Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | (opcode << 13) | - (S << 12) | (size << 10) | (Rn << 5) | Rt); + Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | (opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, - ARM64Reg Rn) { +void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) { ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | - (Rn << 5) | Rd); + Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, - ARM64Reg Rd, ARM64Reg Rn) { +void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) { ASSERT_MSG(Rn <= SP, "{} only supports GPR as source!", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | (opcode << 16) | - (Rn << 5) | Rd); + Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, - bool sign) { +void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign) { DEBUG_ASSERT_MSG(IsScalar(Rn), "fcvts: Rn must be floating point"); if (IsGPR(Rd)) { // Use the encoding that transfers the result to a GPR. @@ -2160,8 +2117,7 @@ void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, Roundin sz |= 2; break; } - Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | - (Rn << 5) | Rd); + Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | (Rn << 5) | Rd); } } @@ -2173,13 +2129,11 @@ void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) { EmitConvertScalarToInt(Rd, Rn, round, true); } -void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, - u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn) { +void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn) { Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | - (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd); + Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd); } void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm) { @@ -2189,12 +2143,10 @@ void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Re Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (op << 14) | - (1 << 13) | (Rn << 5) | opcode2); + Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (op << 14) | (1 << 13) | (Rn << 5) | opcode2); } -void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm) { +void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__); bool is_double = IsDouble(Rd); @@ -2202,8 +2154,7 @@ void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (cond << 12) | - (3 << 10) | (Rn << 5) | Rd); + Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (cond << 12) | (3 << 10) | (Rn << 5) | Rd); } void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { @@ -2223,8 +2174,7 @@ void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | (1 << 11) | - (Rn << 5) | Rd); + Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | (1 << 11) | (Rn << 5) | Rd); } void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8) { @@ -2234,32 +2184,26 @@ void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64R Rd = DecodeReg(Rd); - Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | (imm8 << 13) | - (1 << 12) | (imm5 << 5) | Rd); + Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | (imm8 << 13) | (1 << 12) | (imm5 << 5) | Rd); } -void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, - ARM64Reg Rn) { +void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) { ASSERT_MSG(immh, "{} bad encoding! Can't have zero immh", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | (opcode << 11) | - (1 << 10) | (Rn << 5) | Rd); + Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, - ARM64Reg Rn) { +void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) { Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | - (1 << 10) | (Rn << 5) | Rd); + Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, - ARM64Reg Rn) { +void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn) { bool quad = IsQuad(Rt); u32 encoded_size = 0; @@ -2273,12 +2217,10 @@ void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opc Rt = DecodeReg(Rt); Rn = DecodeReg(Rn); - Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | (encoded_size << 10) | - (Rn << 5) | Rt); + Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | (encoded_size << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, - ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) { +void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) { bool quad = IsQuad(Rt); u32 encoded_size = 0; @@ -2293,31 +2235,26 @@ void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | - (encoded_size << 10) | (Rn << 5) | Rt); + Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | (encoded_size << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, - ARM64Reg Rn) { +void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) { ASSERT_MSG(!IsQuad(Rd), "{} doesn't support vector!", __func__); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | - (Rn << 5) | Rd); + Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) | (Rn << 5) | Rd); } -void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, - ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { +void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { bool quad = IsQuad(Rd); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); - Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | (Rm << 16) | - (opcode << 12) | (H << 11) | (Rn << 5) | Rd); + Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | (Rm << 16) | (opcode << 12) | (H << 11) | (Rn << 5) | Rd); } void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { @@ -2328,8 +2265,7 @@ void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, - ARM64Reg Rt2, ARM64Reg Rn, s32 imm) { +void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) { u32 type_encode = 0; u32 opc = 0; @@ -2366,12 +2302,10 @@ void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, Rt2 = DecodeReg(Rt2); Rn = DecodeReg(Rn); - Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) | - ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt); + Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, - ArithOption Rm) { +void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) { ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "{} must contain an extended reg as Rm!", __func__); @@ -2402,8 +2336,7 @@ void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64 Rn = DecodeReg(Rn); ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg()); - Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) | - Rm.GetData() | (1 << 11) | (Rn << 5) | Rt); + Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) | Rm.GetData() | (1 << 11) | (Rn << 5) | Rt); } void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) { @@ -2416,8 +2349,7 @@ void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd } v; v.hex = abcdefgh; Rd = DecodeReg(Rd); - Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.in.abc << 16) | (cmode << 12) | (o2 << 11) | - (1 << 10) | (v.in.defgh << 5) | Rd); + Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.in.abc << 16) | (cmode << 12) | (o2 << 11) | (1 << 10) | (v.in.defgh << 5) | Rd); } void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { @@ -2675,8 +2607,7 @@ void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) { opcode = 0b0010; EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn); } -void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, - ARM64Reg Rm) { +void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!(count == 0 || count > 4), "{} must have a count of 1 to 4 registers!", __func__); ASSERT_MSG(type == INDEX_POST, "{} only supports post indexing!", __func__); @@ -2704,8 +2635,7 @@ void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) { opcode = 0b0010; EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn); } -void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, - ARM64Reg Rm) { +void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) { ASSERT_MSG(!(count == 0 || count > 4), "{} must have a count of 1 to 4 registers!", __func__); ASSERT_MSG(type == INDEX_POST, "{} only supports post indexing!", __func__); @@ -2751,17 +2681,15 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top) { } Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((sf << 31) | (encoded_size << 22) | (0x1e2 << 20) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd); + Write32((sf << 31) | (encoded_size << 22) | (0x1e2 << 20) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd); } } // Loadstore paired -void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, - s32 imm) { +void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) { EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm); } -void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, - s32 imm) { +void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) { EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm); } @@ -2825,8 +2753,7 @@ void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg R EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3); } -void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, - ARM64Reg Ra, int opcode) { +void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) { int type = isDouble ? 1 : 0; Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); @@ -2834,8 +2761,7 @@ void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg R Ra = DecodeReg(Ra); int o1 = opcode >> 1; int o0 = opcode & 1; - m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | - (Ra << 10) | (Rn << 5) | Rd); + m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | (Ra << 10) | (Rn << 5) | Rd); } // Scalar three same @@ -3461,7 +3387,7 @@ void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift) { default: break; } - } else // 64 + } else // 64 { ASSERT_MSG(shift == 0, "{}(size64) doesn't support shift!", __func__); @@ -3706,8 +3632,7 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) } } -void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, - bool flags) { +void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, bool flags) { switch ((negative << 1) | static_cast(flags)) { case 0: ADD(Rd, Rn, static_cast(imm), shift); @@ -3724,8 +3649,7 @@ void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, } } -void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, - ARM64Reg scratch) { +void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch) { bool has_scratch = scratch != INVALID_REG; u64 imm_neg = Is64Bit(Rd) ? ~imm + 1 : (~imm + 1) & 0xFFFFFFFFuLL; bool neg_neg = negative ? false : true; @@ -3900,4 +3824,4 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch) { DUP(32, Rd, Rd, 0); } -} // namespace Dynarmic::BackendA64::Arm64Gen +} // namespace Dynarmic::BackendA64::Arm64Gen diff --git a/src/dynarmic/backend/A64/emitter/a64_emitter.h b/src/dynarmic/backend/A64/emitter/a64_emitter.h index 5e71d079..53347c4a 100644 --- a/src/dynarmic/backend/A64/emitter/a64_emitter.h +++ b/src/dynarmic/backend/A64/emitter/a64_emitter.h @@ -265,7 +265,11 @@ constexpr ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return static_cast(reg | 0xC0); } -enum OpType { TYPE_IMM = 0, TYPE_REG, TYPE_IMMSREG, TYPE_RSR, TYPE_MEM }; +enum OpType { TYPE_IMM = 0, + TYPE_REG, + TYPE_IMMSREG, + TYPE_RSR, + TYPE_MEM }; enum ShiftType { ST_LSL = 0, @@ -474,8 +478,7 @@ private: void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn); void EncodeExceptionInst(u32 instenc, u32 imm); void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt); - void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, - ArithOption Option); + void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); @@ -494,8 +497,7 @@ private: void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n); - void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, - ARM64Reg Rn, s32 imm); + void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm); void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); @@ -503,7 +505,8 @@ protected: void Write32(u32 value); public: - ARM64XEmitter() : m_code(nullptr), m_lastCacheFlushEnd(nullptr) { + ARM64XEmitter() + : m_code(nullptr), m_lastCacheFlushEnd(nullptr) { } ARM64XEmitter(u8* code_ptr) { @@ -831,7 +834,7 @@ public: // Wrapper around MOVZ+MOVK void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true); bool MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2); - template + template void MOVP2R(ARM64Reg Rd, P* ptr) { ASSERT_MSG(Is64Bit(Rd), "Can't store pointers in 32-bit registers"); MOVI2R(Rd, (uintptr_t)ptr); @@ -848,8 +851,7 @@ public: void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); - void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, - ARM64Reg scratch); + void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch); void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); @@ -872,14 +874,14 @@ public: // Unfortunately, calling operator() directly is undefined behavior in C++ // (this method might be a thunk in the case of multi-inheritance) so we // have to go through a trampoline function. - template + template static T CallLambdaTrampoline(const std::function* f, Args... args) { return (*f)(args...); } // This function expects you to have set up the state. // Overwrites X0 and X30 - template + template ARM64Reg ABI_SetupLambda(const std::function* f) { auto trampoline = &ARM64XEmitter::CallLambdaTrampoline; MOVI2R(X30, (uintptr_t)trampoline); @@ -889,7 +891,7 @@ public: // Plain function call void QuickCallFunction(const void* func, ARM64Reg scratchreg = X16); - template + template void QuickCallFunction(T func, ARM64Reg scratchreg = X16) { QuickCallFunction((const void*)func, scratchreg); } @@ -897,7 +899,8 @@ public: class ARM64FloatEmitter { public: - ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) { + ARM64FloatEmitter(ARM64XEmitter* emit) + : m_emit(emit) { } void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); @@ -935,7 +938,7 @@ public: void FABS(ARM64Reg Rd, ARM64Reg Rn); void FNEG(ARM64Reg Rd, ARM64Reg Rn); void FSQRT(ARM64Reg Rd, ARM64Reg Rn); - void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP + void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP // Scalar - 2 Source void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); @@ -959,7 +962,7 @@ public: void UQADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void SQSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void UQSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); - + // Scalar floating point immediate void FMOV(ARM64Reg Rd, uint8_t imm8); @@ -1110,22 +1113,17 @@ private: } // Emitting functions - void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, - s32 imm); - void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm); + void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); + void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn); void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); - void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, - ARM64Reg Rn); - void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, - ARM64Reg Rn, ARM64Reg Rm); + void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn); + void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm); void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); - void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, - int scale, ARM64Reg Rd, ARM64Reg Rn); + void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn); void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm); void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); @@ -1133,19 +1131,14 @@ private: void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn); - void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, - ARM64Reg Rm); + void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm); void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); - void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, - ARM64Reg Rm); + void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign); - void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, - int opcode); - void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, - ARM64Reg Rn, s32 imm); - void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, - ArithOption Rm); + void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode); + void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); + void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh); void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper); @@ -1170,4 +1163,4 @@ private: } }; -} // namespace Dynarmic::BackendA64::Arm64Gen +} // namespace Dynarmic::BackendA64::Arm64Gen diff --git a/src/dynarmic/backend/A64/emitter/arm_common.h b/src/dynarmic/backend/A64/emitter/arm_common.h index 9562a7a0..309a7dae 100644 --- a/src/dynarmic/backend/A64/emitter/arm_common.h +++ b/src/dynarmic/backend/A64/emitter/arm_common.h @@ -6,23 +6,23 @@ namespace Dynarmic::BackendA64 { enum CCFlags { - CC_EQ = 0, // Equal - CC_NEQ, // Not equal - CC_CS, // Carry Set - CC_CC, // Carry Clear - CC_MI, // Minus (Negative) - CC_PL, // Plus - CC_VS, // Overflow - CC_VC, // No Overflow - CC_HI, // Unsigned higher - CC_LS, // Unsigned lower or same - CC_GE, // Signed greater than or equal - CC_LT, // Signed less than - CC_GT, // Signed greater than - CC_LE, // Signed less than or equal - CC_AL, // Always (unconditional) 14 - CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same - CC_LO = CC_CC, // Alias of CC_CC Unsigned lower + CC_EQ = 0, // Equal + CC_NEQ, // Not equal + CC_CS, // Carry Set + CC_CC, // Carry Clear + CC_MI, // Minus (Negative) + CC_PL, // Plus + CC_VS, // Overflow + CC_VC, // No Overflow + CC_HI, // Unsigned higher + CC_LS, // Unsigned lower or same + CC_GE, // Signed greater than or equal + CC_LT, // Signed less than + CC_GT, // Signed greater than + CC_LE, // Signed less than or equal + CC_AL, // Always (unconditional) 14 + CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same + CC_LO = CC_CC, // Alias of CC_CC Unsigned lower }; const u32 NO_COND = 0xE0000000; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/emitter/code_block.h b/src/dynarmic/backend/A64/emitter/code_block.h index 665d0d4e..738b3357 100644 --- a/src/dynarmic/backend/A64/emitter/code_block.h +++ b/src/dynarmic/backend/A64/emitter/code_block.h @@ -8,9 +8,9 @@ #include #ifdef _WIN32 -#include +# include #else -#include +# include #endif #include @@ -21,7 +21,7 @@ namespace Dynarmic::BackendA64 { // You get memory management for free, plus, you can use all emitter functions // without having to prefix them with gen-> or something similar. Example // implementation: class JIT : public CodeBlock {} -template +template class CodeBlock : public T { private: // A privately used function to set the executable RAM space to something @@ -57,11 +57,11 @@ public: #if defined(_WIN32) void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); #else -#if defined(__APPLE__) +# if defined(__APPLE__) void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0); -#else +# else void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); -#endif +# endif if (ptr == MAP_FAILED) ptr = nullptr; @@ -137,4 +137,4 @@ public: m_children.emplace_back(child); } }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/exception_handler.h b/src/dynarmic/backend/A64/exception_handler.h index 1032b1e8..d0a8b1d8 100644 --- a/src/dynarmic/backend/A64/exception_handler.h +++ b/src/dynarmic/backend/A64/exception_handler.h @@ -7,8 +7,8 @@ #pragma once #include -#include #include +#include #include @@ -32,9 +32,10 @@ public: void Register(BlockOfCode& code, std::function segv_callback = nullptr); bool SupportsFastmem() const; + private: struct Impl; std::unique_ptr impl; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/exception_handler_generic.cpp b/src/dynarmic/backend/A64/exception_handler_generic.cpp index 713431e2..433eddb9 100644 --- a/src/dynarmic/backend/A64/exception_handler_generic.cpp +++ b/src/dynarmic/backend/A64/exception_handler_generic.cpp @@ -22,4 +22,4 @@ bool ExceptionHandler::SupportsFastmem() const { return false; } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/exception_handler_posix.cpp b/src/dynarmic/backend/A64/exception_handler_posix.cpp index a64fdc94..2c9bd903 100644 --- a/src/dynarmic/backend/A64/exception_handler_posix.cpp +++ b/src/dynarmic/backend/A64/exception_handler_posix.cpp @@ -8,14 +8,13 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include #include #include - -#include #ifdef __APPLE__ -#include +# include #else -#include +# include #endif #include @@ -117,8 +116,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n", PC); - struct sigaction* retry_sa = - sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus; + struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus; if (retry_sa->sa_flags & SA_SIGINFO) { retry_sa->sa_sigaction(sig, info, raw_context); return; @@ -133,7 +131,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { retry_sa->sa_handler(sig); } -} // anonymous namespace +} // anonymous namespace struct ExceptionHandler::Impl final { Impl(BlockOfCode& code, std::function cb) { @@ -162,4 +160,4 @@ bool ExceptionHandler::SupportsFastmem() const { return static_cast(impl); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/hostloc.cpp b/src/dynarmic/backend/A64/hostloc.cpp index b4d4facf..6c81f0a5 100644 --- a/src/dynarmic/backend/A64/hostloc.cpp +++ b/src/dynarmic/backend/A64/hostloc.cpp @@ -18,4 +18,4 @@ Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) { return EncodeRegToQuad(static_cast(static_cast(loc) - static_cast(HostLoc::Q0))); } -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/hostloc.h b/src/dynarmic/backend/A64/hostloc.h index 14ca64e0..0f138908 100644 --- a/src/dynarmic/backend/A64/hostloc.h +++ b/src/dynarmic/backend/A64/hostloc.h @@ -48,7 +48,7 @@ enum class HostLoc { X29, X30, - SP, // 64bit stack pointer + SP, // 64bit stack pointer // Qword FPR registers Q0, @@ -138,32 +138,76 @@ using HostLocList = std::initializer_list; // X30 is the link register. // In order of desireablity based first on ABI constexpr HostLocList any_gpr = { - HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, - HostLoc::X24, HostLoc::X25, + HostLoc::X19, + HostLoc::X20, + HostLoc::X21, + HostLoc::X22, + HostLoc::X23, + HostLoc::X24, + HostLoc::X25, - HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, - HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17, + HostLoc::X8, + HostLoc::X9, + HostLoc::X10, + HostLoc::X11, + HostLoc::X12, + HostLoc::X13, + HostLoc::X14, + HostLoc::X15, + HostLoc::X16, + HostLoc::X17, - HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3, - HostLoc::X2, HostLoc::X1, HostLoc::X0, + HostLoc::X7, + HostLoc::X6, + HostLoc::X5, + HostLoc::X4, + HostLoc::X3, + HostLoc::X2, + HostLoc::X1, + HostLoc::X0, }; constexpr HostLocList any_fpr = { - HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13, - HostLoc::Q14, HostLoc::Q15, + HostLoc::Q8, + HostLoc::Q9, + HostLoc::Q10, + HostLoc::Q11, + HostLoc::Q12, + HostLoc::Q13, + HostLoc::Q14, + HostLoc::Q15, - HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21, - HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27, - HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31, + HostLoc::Q16, + HostLoc::Q17, + HostLoc::Q18, + HostLoc::Q19, + HostLoc::Q20, + HostLoc::Q21, + HostLoc::Q22, + HostLoc::Q23, + HostLoc::Q24, + HostLoc::Q25, + HostLoc::Q26, + HostLoc::Q27, + HostLoc::Q28, + HostLoc::Q29, + HostLoc::Q30, + HostLoc::Q31, - HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2, - HostLoc::Q1, HostLoc::Q0, + HostLoc::Q7, + HostLoc::Q6, + HostLoc::Q5, + HostLoc::Q4, + HostLoc::Q3, + HostLoc::Q2, + HostLoc::Q1, + HostLoc::Q0, }; Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc); Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc); -template +template size_t SpillToOpArg(HostLoc loc) { ASSERT(HostLocIsSpill(loc)); @@ -174,4 +218,4 @@ size_t SpillToOpArg(HostLoc loc) { return JitStateType::GetSpillLocationOffsetFromIndex(i); } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/jitstate_info.h b/src/dynarmic/backend/A64/jitstate_info.h index 1df3a5bb..ca1d8211 100644 --- a/src/dynarmic/backend/A64/jitstate_info.h +++ b/src/dynarmic/backend/A64/jitstate_info.h @@ -11,22 +11,21 @@ namespace Dynarmic::BackendA64 { struct JitStateInfo { - template + template JitStateInfo(const JitStateType&) - : offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining)) - , offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run)) - , offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR)) - , offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr)) - , offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr)) - , offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr)) - , rsb_ptr_mask(JitStateType::RSBPtrMask) - , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors)) - , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) - , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) - , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) - , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) - , offsetof_halt_reason(offsetof(JitStateType, halt_reason)) - {} + : offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining)) + , offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run)) + , offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR)) + , offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr)) + , offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr)) + , offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr)) + , rsb_ptr_mask(JitStateType::RSBPtrMask) + , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors)) + , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) + , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) + , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) + , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) + , offsetof_halt_reason(offsetof(JitStateType, halt_reason)) {} const size_t offsetof_cycles_remaining; const size_t offsetof_cycles_to_run; @@ -43,4 +42,4 @@ struct JitStateInfo { const size_t offsetof_halt_reason; }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/perf_map.cpp b/src/dynarmic/backend/A64/perf_map.cpp index 5f84781c..7ebb7a53 100644 --- a/src/dynarmic/backend/A64/perf_map.cpp +++ b/src/dynarmic/backend/A64/perf_map.cpp @@ -9,14 +9,14 @@ #ifdef __linux__ -#include -#include -#include -#include -#include +# include +# include +# include -#include -#include +# include +# include +# include +# include namespace Dynarmic::BackendA64 { @@ -41,7 +41,7 @@ void OpenFile() { std::setvbuf(file, nullptr, _IONBF, 0); } -} // anonymous namespace +} // anonymous namespace namespace detail { void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) { @@ -57,7 +57,7 @@ void PerfMapRegister(const void* start, const void* end, const std::string& frie const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast(start), reinterpret_cast(end) - reinterpret_cast(start), friendly_name); std::fwrite(line.data(), sizeof *line.data(), line.size(), file); } -} // namespace detail +} // namespace detail void PerfMapClear() { std::lock_guard guard{mutex}; @@ -71,7 +71,7 @@ void PerfMapClear() { OpenFile(); } -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 #else @@ -79,10 +79,10 @@ namespace Dynarmic::BackendA64 { namespace detail { void PerfMapRegister(const void*, const void*, const std::string&) {} -} // namespace detail +} // namespace detail void PerfMapClear() {} -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 #endif diff --git a/src/dynarmic/backend/A64/perf_map.h b/src/dynarmic/backend/A64/perf_map.h index 7c5caee1..2229c5e2 100644 --- a/src/dynarmic/backend/A64/perf_map.h +++ b/src/dynarmic/backend/A64/perf_map.h @@ -15,7 +15,7 @@ namespace Dynarmic::BackendA64 { namespace detail { void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name); -} // namespace detail +} // namespace detail template void PerfMapRegister(T start, const void* end, const std::string& friendly_name) { @@ -24,4 +24,4 @@ void PerfMapRegister(T start, const void* end, const std::string& friendly_name) void PerfMapClear(); -} // namespace Dynarmic::BackendX64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/reg_alloc.cpp b/src/dynarmic/backend/A64/reg_alloc.cpp index 2382b19e..4a80884e 100644 --- a/src/dynarmic/backend/A64/reg_alloc.cpp +++ b/src/dynarmic/backend/A64/reg_alloc.cpp @@ -4,6 +4,8 @@ * General Public License version 2 or any later version. */ +#include "dynarmic/backend/A64/reg_alloc.h" + #include #include #include @@ -12,7 +14,6 @@ #include #include "dynarmic/backend/A64/abi.h" -#include "dynarmic/backend/A64/reg_alloc.h" namespace Dynarmic::BackendA64 { @@ -67,7 +68,7 @@ static size_t GetBitWidth(IR::Type type) { case IR::Type::U128: return 128; case IR::Type::NZCVFlags: - return 32; // TODO: Update to 16 when flags optimization is done + return 32; // TODO: Update to 16 when flags optimization is done } UNREACHABLE(); return 0; @@ -379,16 +380,9 @@ HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) { return location; } -void RegAlloc::HostCall(IR::Inst* result_def, std::optional arg0, - std::optional arg1, - std::optional arg2, - std::optional arg3, - std::optional arg4, - std::optional arg5, - std::optional arg6, - std::optional arg7) { +void RegAlloc::HostCall(IR::Inst* result_def, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3, std::optional arg4, std::optional arg5, std::optional arg6, std::optional arg7) { constexpr size_t args_count = 8; - constexpr std::array args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 }; + constexpr std::array args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8}; const std::array, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; static const std::vector other_caller_save = [args_hostloc]() { @@ -417,7 +411,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional candidates = desired_locations; + std::vector candidates = desired_locations; // Find all locations that have not been allocated.. - const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){ + const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) { return !this->LocInfo(loc).IsLocked(); }); candidates.erase(allocated_locs, candidates.end()); @@ -445,7 +439,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { // Selects the best location out of the available locations. // TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible. - std::partition(candidates.begin(), candidates.end(), [this](auto loc){ + std::partition(candidates.begin(), candidates.end(), [this](auto loc) { return this->LocInfo(loc).IsEmpty(); }); @@ -648,4 +642,4 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { } } -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/backend/A64/reg_alloc.h b/src/dynarmic/backend/A64/reg_alloc.h index 5c3bab9e..3ae2679e 100644 --- a/src/dynarmic/backend/A64/reg_alloc.h +++ b/src/dynarmic/backend/A64/reg_alloc.h @@ -8,9 +8,9 @@ #include #include +#include #include #include -#include #include @@ -84,7 +84,8 @@ public: private: friend class RegAlloc; - explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} + explicit Argument(RegAlloc& reg_alloc) + : reg_alloc(reg_alloc) {} bool allocated = false; RegAlloc& reg_alloc; @@ -96,7 +97,7 @@ public: using ArgumentInfo = std::array; explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function spill_to_addr) - : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {} + : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {} ArgumentInfo GetArgumentInfo(IR::Inst* inst); @@ -117,14 +118,7 @@ public: Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr); Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr); - void HostCall(IR::Inst* result_def = nullptr, std::optional arg0 = {}, - std::optional arg1 = {}, - std::optional arg2 = {}, - std::optional arg3 = {}, - std::optional arg4 = {}, - std::optional arg5 = {}, - std::optional arg6 = {}, - std::optional arg7 = {}); + void HostCall(IR::Inst* result_def = nullptr, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}, std::optional arg4 = {}, std::optional arg5 = {}, std::optional arg6 = {}, std::optional arg7 = {}); // TODO: Values in host flags @@ -163,4 +157,4 @@ private: void EmitExchange(HostLoc a, HostLoc b); }; -} // namespace Dynarmic::BackendA64 +} // namespace Dynarmic::BackendA64 diff --git a/src/dynarmic/common/math_util.h b/src/dynarmic/common/math_util.h index 3b278031..d29aeb67 100644 --- a/src/dynarmic/common/math_util.h +++ b/src/dynarmic/common/math_util.h @@ -44,8 +44,8 @@ u8 RecipEstimate(u64 a); */ u8 RecipSqrtEstimate(u64 a); -template -constexpr bool IsPow2(T imm){ +template +constexpr bool IsPow2(T imm) { return imm > 0 && (imm & (imm - 1)) == 0; }