diff --git a/src/backend/A64/a32_emit_a64.cpp b/src/backend/A64/a32_emit_a64.cpp new file mode 100644 index 00000000..52052038 --- /dev/null +++ b/src/backend/A64/a32_emit_a64.cpp @@ -0,0 +1,1414 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include +#include + +#include +#include + +#include + +#include "backend/A64/a32_emit_a64.h" +#include "backend/A64/a32_jitstate.h" +#include "backend/A64/abi.h" +#include "backend/A64/block_of_code.h" +#include "backend/A64/devirtualize.h" +#include "backend/A64/emit_a64.h" +#include "backend/A64/emitter/a64_emitter.h" +#include "backend/A64/perf_map.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/scope_exit.h" +#include "common/variant_util.h" +#include "frontend/A32/location_descriptor.h" +#include "frontend/A32/types.h" +#include "frontend/ir/basic_block.h" +#include "frontend/ir/microinstruction.h" +#include "frontend/ir/opcodes.h" + +// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary. +// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart. + +namespace Dynarmic::BackendA64 { + +// Note that unlike the x64 backend these only returns ONLY the offset to register and not the address! +static size_t MJitStateReg(A32::Reg reg) { + return offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg); +} + +//static size_t MJitStateExtReg(A32::ExtReg reg) { +// if (A32::IsSingleExtReg(reg)) { +// size_t index = static_cast(reg) - static_cast(A32::ExtReg::S0); +// return offsetof(A32JitState, ExtReg) + sizeof(u32) * index; +// } +// if (A32::IsDoubleExtReg(reg)) { +// size_t index = static_cast(reg) - static_cast(A32::ExtReg::D0); +// return offsetof(A32JitState, ExtReg) + sizeof(u64) * index; +// } +// ASSERT_MSG(false, "Should never happen."); +//} + +A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block) : EmitContext(reg_alloc, block) {} + +A32::LocationDescriptor A32EmitContext::Location() const { + return A32::LocationDescriptor{block.Location()}; +} + +FP::RoundingMode A32EmitContext::FPSCR_RMode() const { + return Location().FPSCR().RMode(); +} + +u32 A32EmitContext::FPCR() const { + return Location().FPSCR().Value(); +} + +bool A32EmitContext::FPSCR_FTZ() const { + return Location().FPSCR().FTZ(); +} + +bool A32EmitContext::FPSCR_DN() const { + return Location().FPSCR().DN(); +} + +A32EmitA64::A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) + : EmitA64(code), config(std::move(config)), jit_interface(jit_interface) { + GenMemoryAccessors(); + GenTerminalHandlers(); + code.PreludeComplete(); + ClearFastDispatchTable(); +} + +A32EmitA64::~A32EmitA64() = default; + +A32EmitA64::BlockDescriptor A32EmitA64::Emit(IR::Block& block) { + code.EnableWriting(); + SCOPE_EXIT { + code.DisableWriting(); + }; + + code.AlignCode16(); + const u8* entrypoint = code.GetCodePtr(); + + // Start emitting. + EmitCondPrelude(block); + + RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg}; + A32EmitContext ctx{reg_alloc, block}; + + for (auto iter = block.begin(); iter != block.end(); ++iter) { + IR::Inst* inst = &*iter; + + // Call the relevant Emit* member function. + switch (inst->GetOpcode()) { + +#define OPCODE(name, type, ...) \ + case IR::Opcode::name: \ + A32EmitA64::Emit##name(ctx, inst); \ + break; +#define A32OPC(name, type, ...) \ + case IR::Opcode::A32##name: \ + A32EmitA64::EmitA32##name(ctx, inst); \ + break; +#define A64OPC(...) +#include "backend/A64/opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + + default: + ASSERT_MSG(false, "Invalid opcode: {}", inst->GetOpcode()); + break; + } + + reg_alloc.EndOfAllocScope(); + } + + reg_alloc.AssertNoMoreUses(); + + EmitAddCycles(block.CycleCount()); + EmitA64::EmitTerminal(block.GetTerminal(), block.Location()); + code.BRK(0); + code.FlushIcacheSection(entrypoint, code.GetCodePtr()); + + const size_t size = static_cast(code.GetCodePtr() - entrypoint); + + const A32::LocationDescriptor descriptor{block.Location()}; + const A32::LocationDescriptor end_location{block.EndLocation()}; + + const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); + block_ranges.AddRange(range, descriptor); + + return RegisterBlock(descriptor, entrypoint, size); +} + +void A32EmitA64::ClearCache() { + EmitA64::ClearCache(); + block_ranges.ClearCache(); + ClearFastDispatchTable(); +} + +void A32EmitA64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { + InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); + ClearFastDispatchTable(); +} + +void A32EmitA64::ClearFastDispatchTable() { + if (config.enable_fast_dispatch) { + fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr}); + } +} + +void A32EmitA64::GenMemoryAccessors() { + code.AlignCode16(); + read_memory_8 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryRead8>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(read_memory_8, code.GetCodePtr(), "a32_read_memory_8"); + + code.AlignCode16(); + read_memory_16 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryRead16>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(read_memory_16, code.GetCodePtr(), "a32_read_memory_16"); + + code.AlignCode16(); + read_memory_32 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + code.ADD(X29, SP, 0); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryRead32>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(read_memory_32, code.GetCodePtr(), "a32_read_memory_32"); + + code.AlignCode16(); + read_memory_64 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryRead64>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(read_memory_64, code.GetCodePtr(), "a32_read_memory_64"); + + code.AlignCode16(); + write_memory_8 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryWrite8>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(write_memory_8, code.GetCodePtr(), "a32_write_memory_8"); + + code.AlignCode16(); + write_memory_16 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryWrite16>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(write_memory_16, code.GetCodePtr(), "a32_write_memory_16"); + + code.AlignCode16(); + write_memory_32 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + code.ADD(X29, SP, 0); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryWrite32>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(write_memory_32, code.GetCodePtr(), "a32_write_memory_32"); + + code.AlignCode16(); + write_memory_64 = code.GetCodePtr(); + // Push lr and fp onto the stack + code.ABI_PushRegisters(0x60000000); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + Devirtualize<&A32::UserCallbacks::MemoryWrite64>(config.callbacks).EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); + code.ABI_PopRegisters(0x60000000); + code.RET(); + PerfMapRegister(write_memory_64, code.GetCodePtr(), "a32_write_memory_64"); +} + +void A32EmitA64::GenTerminalHandlers() { + const ARM64Reg fast_dispatch_entry_reg = X19; + const ARM64Reg location_descriptor_reg = X20; + + // PC ends up in fast_dispatch_entry_reg, location_descriptor ends up in location_descriptor_reg. + const auto calculate_location_descriptor = [this, fast_dispatch_entry_reg, location_descriptor_reg] { + // This calculation has to match up with IREmitter::PushRSB + // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. + code.LDR(INDEX_UNSIGNED, DecodeReg(location_descriptor_reg), X28, offsetof(A32JitState, FPSCR_mode)); + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et)); + code.ORR(DecodeReg(location_descriptor_reg), DecodeReg(location_descriptor_reg), DecodeReg(code.ABI_SCRATCH1)); + code.LDR(INDEX_UNSIGNED, DecodeReg(fast_dispatch_entry_reg), X28, MJitStateReg(A32::Reg::PC)); + code.ORR(location_descriptor_reg, location_descriptor_reg, fast_dispatch_entry_reg, ArithOption{fast_dispatch_entry_reg, ST_LSL, 32}); + }; + + FixupBranch fast_dispatch_cache_miss, rsb_cache_miss; + + code.AlignCode16(); + terminal_handler_pop_rsb_hint = code.GetCodePtr(); + calculate_location_descriptor(); + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, rsb_ptr)); + code.SUBI2R(code.ABI_SCRATCH1, DecodeReg(code.ABI_SCRATCH1), 1); + code.ANDI2R(code.ABI_SCRATCH1, DecodeReg(code.ABI_SCRATCH1), u32(A32JitState::RSBPtrMask)); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, rsb_ptr)); + code.ADD(code.ABI_SCRATCH1, X28, code.ABI_SCRATCH1, ArithOption{code.ABI_SCRATCH1, ST_LSL, 3}); + + // cmp(location_descriptor_reg, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rsb_ptr * sizeof(u64)]); + code.LDR(INDEX_UNSIGNED, X21, code.ABI_SCRATCH1, offsetof(A32JitState, rsb_location_descriptors)); + code.CMP(location_descriptor_reg, X21); + if (config.enable_fast_dispatch) { + rsb_cache_miss = code.B(CC_NEQ); + } + else { + code.B(CC_NEQ, code.GetReturnFromRunCodeAddress()); + } + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, code.ABI_SCRATCH1, offsetof(A32JitState, rsb_codeptrs)); + code.BR(code.ABI_SCRATCH1); + PerfMapRegister(terminal_handler_pop_rsb_hint, code.GetCodePtr(), "a32_terminal_handler_pop_rsb_hint"); + + if (config.enable_fast_dispatch) { + code.AlignCode16(); + terminal_handler_fast_dispatch_hint = code.GetCodePtr(); + calculate_location_descriptor(); + code.SetJumpTarget(rsb_cache_miss); + code.MOVI2R(code.ABI_SCRATCH1, reinterpret_cast(fast_dispatch_table.data())); + code.CRC32CW(DecodeReg(fast_dispatch_entry_reg), DecodeReg(fast_dispatch_entry_reg), DecodeReg(code.ABI_SCRATCH1)); + code.ANDI2R(fast_dispatch_entry_reg, fast_dispatch_entry_reg, fast_dispatch_table_mask); + code.ADD(fast_dispatch_entry_reg, fast_dispatch_entry_reg, code.ABI_SCRATCH1); + + // code.cmp(location_descriptor_reg, qword[fast_dispatch_entry_reg + offsetof(FastDispatchEntry, location_descriptor)]); + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, location_descriptor)); + code.CMP(location_descriptor_reg, code.ABI_SCRATCH1); + fast_dispatch_cache_miss = code.B(CC_NEQ); + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, code_ptr)); + code.BR(code.ABI_SCRATCH1); + + code.SetJumpTarget(fast_dispatch_cache_miss); + code.STR(INDEX_UNSIGNED, location_descriptor_reg, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, location_descriptor) ); + code.LookupBlock(); + code.STR(INDEX_UNSIGNED, code.ABI_RETURN, fast_dispatch_entry_reg, offsetof(FastDispatchEntry, code_ptr)); + code.BR(code.ABI_RETURN); + PerfMapRegister(terminal_handler_fast_dispatch_hint, code.GetCodePtr(), "a32_terminal_handler_fast_dispatch_hint"); + } +} + + +void A32EmitA64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { + A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); + code.LDR(INDEX_UNSIGNED, result, X28, MJitStateReg(reg)); + ctx.reg_alloc.DefineValue(inst, result); +} + +//void A32EmitA64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) { +// A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); +// ASSERT(A32::IsSingleExtReg(reg)); +// +// Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); +// code.movss(result, MJitStateExtReg(reg)); +// ctx.reg_alloc.DefineValue(inst, result); +//} +// +//void A32EmitA64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) { +// A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); +// ASSERT(A32::IsDoubleExtReg(reg)); +// +// Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); +// code.movsd(result, MJitStateExtReg(reg)); +// ctx.reg_alloc.DefineValue(inst, result); +//} + +void A32EmitA64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (args[1].IsInFpr()) { + Arm64Gen::ARM64Reg to_store = ctx.reg_alloc.UseFpr(args[1]); + code.fp_emitter.STR(sizeof(u32), INDEX_UNSIGNED, to_store, X28, MJitStateReg(reg)); + } else { + Arm64Gen::ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); + code.STR(INDEX_UNSIGNED, to_store, X28, MJitStateReg(reg)); + } +} + +//void A32EmitA64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); +// ASSERT(A32::IsSingleExtReg(reg)); +// if (args[1].IsInXmm()) { +// Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); +// code.movss(MJitStateExtReg(reg), to_store); +// } else { +// Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); +// code.mov(MJitStateExtReg(reg), to_store); +// } +//} +// +//void A32EmitA64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); +// ASSERT(A32::IsDoubleExtReg(reg)); +// if (args[1].IsInXmm()) { +// Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); +// code.movsd(MJitStateExtReg(reg), to_store); +// } else { +// Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]); +// code.mov(MJitStateExtReg(reg), to_store); +// } +//} + +static u32 GetCpsrImpl(A32JitState* jit_state) { + return jit_state->Cpsr(); +} + +void A32EmitA64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { + //if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + // Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + // Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + + // // Here we observe that CPSR_et and CPSR_ge are right next to each other in memory, + // // so we load them both at the same time with one 64-bit read. This allows us to + // // extract all of their bits together at once with one pext. + // static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge)); + // code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, CPSR_et)]); + // code.mov(tmp.cvt64(), 0x80808080'00000003ull); + // code.pext(result.cvt64(), result.cvt64(), tmp.cvt64()); + // code.mov(tmp, 0x000f0220); + // code.pdep(result, result, tmp); + // code.mov(tmp, dword[r15 + offsetof(A32JitState, CPSR_q)]); + // code.shl(tmp, 27); + // code.or_(result, tmp); + // code.or_(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); + // code.or_(result, dword[r15 + offsetof(A32JitState, CPSR_jaifm)]); + + // ctx.reg_alloc.DefineValue(inst, result); + //} else { + ctx.reg_alloc.HostCall(inst); + code.MOV(code.ABI_PARAM1, X28); + code.QuickCallFunction(&GetCpsrImpl); + //} +} + +static void SetCpsrImpl(u32 value, A32JitState* jit_state) { + jit_state->SetCpsr(value); +} + +void A32EmitA64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + //if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + // Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + // Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + // Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32(); + + // // CPSR_q + // code.bt(cpsr, 27); + // code.setc(code.byte[r15 + offsetof(A32JitState, CPSR_q)]); + + // // CPSR_nzcv + // code.mov(tmp, cpsr); + // code.and_(tmp, 0xF0000000); + // code.mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], tmp); + + // // CPSR_jaifm + // code.mov(tmp, cpsr); + // code.and_(tmp, 0x07F0FDDF); + // code.mov(dword[r15 + offsetof(A32JitState, CPSR_jaifm)], tmp); + + // // CPSR_et and CPSR_ge + // static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge)); + // code.mov(tmp, 0x000f0220); + // code.pext(cpsr, cpsr, tmp); + // code.mov(tmp.cvt64(), 0x01010101'00000003ull); + // code.pdep(cpsr.cvt64(), cpsr.cvt64(), tmp.cvt64()); + // // We perform SWAR partitioned subtraction here, to negate the GE bytes. + // code.mov(tmp.cvt64(), 0x80808080'00000003ull); + // code.mov(tmp2.cvt64(), tmp.cvt64()); + // code.sub(tmp.cvt64(), cpsr.cvt64()); + // code.xor_(tmp.cvt64(), tmp2.cvt64()); + // code.mov(qword[r15 + offsetof(A32JitState, CPSR_et)], tmp.cvt64()); + //} else { + ctx.reg_alloc.HostCall(nullptr, args[0]); + code.MOV(code.ABI_PARAM2, X28); + code.QuickCallFunction(&SetCpsrImpl); + //} +} + +void A32EmitA64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ARM64Reg a = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + + code.ANDI2R(a, a, 0xF0000000); + code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv)); + +} + +void A32EmitA64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + ARM64Reg a = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.MOVI2R(a, u32(imm & 0xF0000000)); + code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv)); + code.MOVI2R(a, u8((imm & 0x08000000) != 0 ? 1 : 0)); + code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_q)); + } else { + ARM64Reg a = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + ARM64Reg q = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.UBFX(q, a, 27, 1); + code.STR(INDEX_UNSIGNED, q, X28, offsetof(A32JitState, CPSR_q)); + code.ANDI2R(a, a, 0xF0000000); + code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv)); + } +} + +void A32EmitA64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) { + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); + code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv)); + code.UBFX(result, result, 31, 1); + ctx.reg_alloc.DefineValue(inst, result); +} + +void A32EmitA64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) { + constexpr size_t flag_bit = 31; + constexpr u32 flag_mask = 1u << flag_bit; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.ORRI2R(nzcv, nzcv, flag_mask); + } else { + code.ANDI2R(nzcv, nzcv, ~flag_mask); + } + } else { + Arm64Gen::ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + + code.BFI(nzcv, to_store, flag_bit, 1); + } + code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); +} + +void A32EmitA64::EmitA32GetZFlag(A32EmitContext& ctx, IR::Inst* inst) { + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); + code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv)); + code.UBFX(result, result, 30, 1); + ctx.reg_alloc.DefineValue(inst, result); +} + +void A32EmitA64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) { + constexpr size_t flag_bit = 30; + constexpr u32 flag_mask = 1u << flag_bit; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.ORRI2R(nzcv, nzcv, flag_mask); + } else { + code.ANDI2R(nzcv, nzcv, ~flag_mask); + } + } else { + Arm64Gen::ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + + code.BFI(nzcv, to_store, flag_bit, 1); + } + code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); +} + +void A32EmitA64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) { + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); + code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv)); + code.UBFX(result, result, 29, 1); + ctx.reg_alloc.DefineValue(inst, result); +} + +void A32EmitA64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) { + constexpr size_t flag_bit = 29; + constexpr u32 flag_mask = 1u << flag_bit; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.ORRI2R(nzcv, nzcv, flag_mask); + } else { + code.ANDI2R(nzcv, nzcv, ~flag_mask); + } + } else { + Arm64Gen::ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + code.BFI(nzcv, to_store, flag_bit, 1); + } + code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); +} + +void A32EmitA64::EmitA32GetVFlag(A32EmitContext& ctx, IR::Inst* inst) { + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); + code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv)); + code.UBFX(result, result, 28, 1); + ctx.reg_alloc.DefineValue(inst, result); +} + +void A32EmitA64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) { + constexpr size_t flag_bit = 28; + constexpr u32 flag_mask = 1u << flag_bit; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.ORRI2R(nzcv, nzcv, flag_mask); + } else { + code.ANDI2R(nzcv, nzcv, ~flag_mask); + } + } else { + Arm64Gen::ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + + code.BFI(nzcv, to_store, flag_bit, 1); + } + code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv)); +} + +void A32EmitA64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, CPSR_q)); + } + } else { + ARM64Reg to_store = ctx.reg_alloc.UseGpr(args[0]); + + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_q)); + code.ORR(code.ABI_SCRATCH1, code.ABI_SCRATCH1, to_store); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_q)); + } +} + +//void A32EmitA64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { +// Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); +// code.movd(result, dword[r15 + offsetof(A32JitState, CPSR_ge)]); +// ctx.reg_alloc.DefineValue(inst, result); +//} +// +//void A32EmitA64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// ASSERT(!args[0].IsImmediate()); +// +// if (args[0].IsInXmm()) { +// Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); +// code.movd(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); +// } else { +// Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); +// code.mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); +// } +//} + +void A32EmitA64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + u32 ge = 0; + ge |= Common::Bit<19>(imm) ? 0xFF000000 : 0; + ge |= Common::Bit<18>(imm) ? 0x00FF0000 : 0; + ge |= Common::Bit<17>(imm) ? 0x0000FF00 : 0; + ge |= Common::Bit<16>(imm) ? 0x000000FF : 0; + + code.MOVI2R(code.ABI_SCRATCH1, ge); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_ge)); + } else { + ARM64Reg a = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + + code.LSR(a, a, 16); + code.ANDI2R(a,a, 0xF); + code.MOVI2R(code.ABI_SCRATCH1, 0x00204081); + code.MUL(a, a, DecodeReg(code.ABI_SCRATCH1)); + code.ANDI2R(a, a, 0x01010101,code.ABI_SCRATCH1); + code.MOVI2R(code.ABI_SCRATCH1, 0xFF); + code.MUL(a, a, DecodeReg(code.ABI_SCRATCH1)); + code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_ge)); + } +} + +void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& arg = args[0]; + + // Pseudocode: + // if (new_pc & 1) { + // new_pc &= 0xFFFFFFFE; + // cpsr.T = true; + // } else { + // new_pc &= 0xFFFFFFFC; + // cpsr.T = false; + // } + // We rely on the fact we disallow EFlag from changing within a block. + + if (arg.IsImmediate()) { + u32 new_pc = arg.GetImmediateU32(); + u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC; + u32 et = 0; + et |= ctx.Location().EFlag() ? 2 : 0; + et |= Common::Bit<0>(new_pc) ? 1 : 0; + + code.MOVI2R(code.ABI_SCRATCH1, new_pc & mask); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, MJitStateReg(A32::Reg::PC)); + code.MOVI2R(code.ABI_SCRATCH1, et); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et)); + } else { + if (ctx.Location().EFlag()) { + ARM64Reg new_pc = DecodeReg(ctx.reg_alloc.UseScratchGpr(arg)); + ARM64Reg mask = DecodeReg(ctx.reg_alloc.ScratchGpr()); + ARM64Reg et = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.ANDI2R(mask, new_pc, 1); + code.ADDI2R(et, mask, 2); + code.STR(INDEX_UNSIGNED, et, X28, offsetof(A32JitState, CPSR_et)); + code.LSL(mask, mask, 1); + code.SUB(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code.AND(new_pc, new_pc, mask); + code.STR(INDEX_UNSIGNED, new_pc, X28, MJitStateReg(A32::Reg::PC)); + } else { + ARM64Reg new_pc = DecodeReg(ctx.reg_alloc.UseScratchGpr(arg)); + ARM64Reg mask = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + code.ANDI2R(mask, new_pc, 1); + code.STR(INDEX_UNSIGNED, mask, X28, offsetof(A32JitState, CPSR_et)); + code.LSL(mask, mask, 1); + code.SUB(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code.AND(new_pc, new_pc, mask); + code.STR(INDEX_UNSIGNED, new_pc, X28, MJitStateReg(A32::Reg::PC)); + } + } +} + +void A32EmitA64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { + ctx.reg_alloc.HostCall(nullptr); + + code.SwitchMxcsrOnExit(); + code.LDR(INDEX_UNSIGNED, code.ABI_PARAM2, X28, offsetof(A32JitState, cycles_to_run)); + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, offsetof(A32JitState, cycles_remaining)); + code.SUB(code.ABI_PARAM2, code.ABI_PARAM2, code.ABI_SCRATCH1); + + Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks).EmitCall(code); + ctx.reg_alloc.EndOfAllocScope(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + Devirtualize<&A32::UserCallbacks::CallSVC>(config.callbacks).EmitCall(code); + Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks).EmitCall(code); + code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_to_run)); + code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_remaining)); + code.SwitchMxcsrOnEntry(); +} + +void A32EmitA64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { + ctx.reg_alloc.HostCall(nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[0].IsImmediate() && args[1].IsImmediate()); + u32 pc = args[0].GetImmediateU32(); + u64 exception = args[1].GetImmediateU64(); + Devirtualize<&A32::UserCallbacks::ExceptionRaised>(config.callbacks).EmitCall(code, [&](RegList param) { + code.MOVI2R(param[0], pc); + code.MOVI2R(param[1], exception); + }); +} + +static u32 GetFpscrImpl(A32JitState* jit_state) { + return jit_state->Fpscr(); +} + +void A32EmitA64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { + ctx.reg_alloc.HostCall(inst); + code.MOV(code.ABI_PARAM1, X28); + + code.MRS(code.ABI_SCRATCH1, FIELD_FPSR); + code.STR(INDEX_UNSIGNED,code.ABI_SCRATCH1, X28, offsetof(A32JitState, guest_FPSR)); + code.QuickCallFunction(&GetFpscrImpl); +} + +static void SetFpscrImpl(u32 value, A32JitState* jit_state) { + jit_state->SetFpscr(value); +} + +void A32EmitA64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, args[0]); + code.MOV(code.ABI_PARAM2, X28); + + code.QuickCallFunction(&SetFpscrImpl); + + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, offsetof(A32JitState, guest_FPSR)); + code._MSR(FIELD_FPSR, code.ABI_SCRATCH1); +} + +void A32EmitA64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr()); + code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, FPSCR_nzcv)); + ctx.reg_alloc.DefineValue(inst, result); +} + +void A32EmitA64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ARM64Reg value = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + + code.ANDI2R(value, value, 0xF0000000); + + code.STR(INDEX_UNSIGNED, value, X28, offsetof(A32JitState, FPSCR_nzcv)); +} + +void A32EmitA64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { + code.STR(INDEX_UNSIGNED, WZR, X28, offsetof(A32JitState, exclusive_state)); +} + +void A32EmitA64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + Arm64Gen::ARM64Reg address = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + + code.MOVI2R(code.ABI_SCRATCH1, u8(1)); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, exclusive_state)); + code.STR(INDEX_UNSIGNED, address, X28, offsetof(A32JitState, exclusive_address)); +} + +template +static void ReadMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { + constexpr size_t bit_size = Common::BitSize(); + auto args = reg_alloc.GetArgumentInfo(inst); + + if (!config.page_table) { + reg_alloc.HostCall(inst, {}, args[0]); + Devirtualize(config.callbacks).EmitCall(code); + return; + } + + reg_alloc.UseScratch(args[0], ABI_PARAM2); + + Arm64Gen::ARM64Reg result = reg_alloc.ScratchGpr({ABI_RETURN}); + Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2); + Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr(); + Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr(); + + FixupBranch abort, end; + + code.MOVP2R(result, config.page_table); + code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12}); + code.LDR(result, result, ArithOption{page_index, true}); + abort = code.CBZ(result); + code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095); + switch (bit_size) { + case 8: + code.LDRB(DecodeReg(result), result, ArithOption{ page_offset }); + break; + case 16: + code.LDRH(DecodeReg(result), result, ArithOption{ page_offset }); + break; + case 32: + code.LDR(DecodeReg(result), result, ArithOption{ page_offset }); + break; + case 64: + code.LDR(result, result, ArithOption{ page_offset }); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + end = code.B(); + code.SetJumpTarget(abort); + code.BL(wrapped_fn); + code.SetJumpTarget(end); + + reg_alloc.DefineValue(inst, result); +} + +template +static void WriteMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { + constexpr size_t bit_size = Common::BitSize(); + auto args = reg_alloc.GetArgumentInfo(inst); + + if (!config.page_table) { + reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + Devirtualize(config.callbacks).EmitCall(code); + return; + } + + reg_alloc.ScratchGpr({ABI_RETURN}); + reg_alloc.UseScratch(args[0], ABI_PARAM2); + reg_alloc.UseScratch(args[1], ABI_PARAM3); + + Arm64Gen::ARM64Reg vaddr = DecodeReg(code.ABI_PARAM2); + Arm64Gen::ARM64Reg value = code.ABI_PARAM3; + Arm64Gen::ARM64Reg page_index = reg_alloc.ScratchGpr(); + Arm64Gen::ARM64Reg page_offset = reg_alloc.ScratchGpr(); + + FixupBranch abort, end; + + code.MOVI2R(code.ABI_SCRATCH1, reinterpret_cast(config.page_table)); + code.MOV(DecodeReg(page_index), vaddr, ArithOption{vaddr, ST_LSR, 12}); + code.LDR(code.ABI_SCRATCH1, code.ABI_SCRATCH1, ArithOption{ page_index, true }); + abort = code.CBZ(code.ABI_SCRATCH1); + code.ANDI2R(DecodeReg(page_offset), DecodeReg(vaddr), 4095); + switch (bit_size) { + case 8: + code.STRB(DecodeReg(value), code.ABI_SCRATCH1, ArithOption{ page_offset }); + break; + case 16: + code.STRH(DecodeReg(value), code.ABI_SCRATCH1, ArithOption{ page_offset }); + break; + case 32: + code.STR(DecodeReg(value), code.ABI_SCRATCH1, ArithOption{ page_offset }); + break; + case 64: + code.STR(value, code.ABI_SCRATCH1, ArithOption{ page_offset }); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + end = code.B(); + code.SetJumpTarget(abort); + code.BL(wrapped_fn); + code.SetJumpTarget(end); +} + +void A32EmitA64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_8); +} + +void A32EmitA64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_16); +} + +void A32EmitA64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_32); +} + +void A32EmitA64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, config, read_memory_64); +} + +void A32EmitA64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_8); +} + +void A32EmitA64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_16); +} + +void A32EmitA64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_32); +} + +void A32EmitA64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, config, write_memory_64); +} + +template +static void ExclusiveWrite(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, bool prepend_high_word) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (prepend_high_word) { + reg_alloc.HostCall(nullptr, {}, args[0], args[1], args[2]); + } else { + reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + } + Arm64Gen::ARM64Reg passed = DecodeReg(reg_alloc.ScratchGpr()); + Arm64Gen::ARM64Reg tmp = DecodeReg(reg_alloc.ScratchGpr()); + + std::vector end; + + code.MOVI2R(passed, u32(1)); + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, exclusive_state)); + end.push_back(code.CBZ(DecodeReg(code.ABI_SCRATCH1))); + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, exclusive_address)); + code.EOR(tmp, code.ABI_PARAM2, DecodeReg(code.ABI_SCRATCH1)); + code.TSTI2R(tmp, A32JitState::RESERVATION_GRANULE_MASK, code.ABI_SCRATCH1); + end.push_back(code.B(CC_NEQ)); + code.STR(INDEX_UNSIGNED, WZR, X28, offsetof(A32JitState, exclusive_state)); + if (prepend_high_word) { + code.LSL(code.ABI_PARAM4,code.ABI_PARAM4, 32); + code.ORR(code.ABI_PARAM3, code.ABI_PARAM3, code.ABI_PARAM4); + } + Devirtualize(config.callbacks).EmitCall(code); + code.MOVI2R(passed, 0); + + for (FixupBranch e : end) { + code.SetJumpTarget(e); + } + + reg_alloc.DefineValue(inst, passed); +} + +void A32EmitA64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, config, false); +} + +void A32EmitA64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, config, false); +} + +void A32EmitA64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, config, false); +} + +void A32EmitA64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, config, true); +} + +static void EmitCoprocessorException() { + ASSERT_MSG(false, "Should raise coproc exception here"); +} + +static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, + IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { + reg_alloc.HostCall(inst, {}, {}, arg0, arg1); + + code.MOVI2R(code.ABI_PARAM1, reinterpret_cast(jit_interface)); + if (callback.user_arg) { + code.MOVI2R(code.ABI_PARAM2, reinterpret_cast(*callback.user_arg)); + } + + code.QuickCallFunction(callback.function); +} + +void A32EmitA64::EmitA32CoprocInternalOperation(A32EmitContext& ctx, IR::Inst* inst) { + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + unsigned opc1 = static_cast(coproc_info[2]); + A32::CoprocReg CRd = static_cast(coproc_info[3]); + A32::CoprocReg CRn = static_cast(coproc_info[4]); + A32::CoprocReg CRm = static_cast(coproc_info[5]); + unsigned opc2 = static_cast(coproc_info[6]); + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileInternalOperation(two, opc1, CRd, CRn, CRm, opc2); + if (!action) { + EmitCoprocessorException(); + return; + } + + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action); +} + +void A32EmitA64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + unsigned opc1 = static_cast(coproc_info[2]); + A32::CoprocReg CRn = static_cast(coproc_info[3]); + A32::CoprocReg CRm = static_cast(coproc_info[4]); + unsigned opc2 = static_cast(coproc_info[5]); + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileSendOneWord(two, opc1, CRn, CRm, opc2); + switch (action.which()) { + case 0: + EmitCoprocessorException(); + return; + case 1: + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), nullptr, args[1]); + return; + case 2: { + u32* destination_ptr = boost::get(action); + + ARM64Reg reg_word = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); + ARM64Reg reg_destination_addr = ctx.reg_alloc.ScratchGpr(); + + code.MOVI2R(reg_destination_addr, reinterpret_cast(destination_ptr)); + code.STR(INDEX_UNSIGNED, reg_word, reg_destination_addr, 0); + + return; + } + default: + ASSERT_MSG(false, "Unreachable"); + } +} + +void A32EmitA64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + unsigned opc = static_cast(coproc_info[2]); + A32::CoprocReg CRm = static_cast(coproc_info[3]); + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileSendTwoWords(two, opc, CRm); + switch (action.which()) { + case 0: + EmitCoprocessorException(); + return; + case 1: + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), nullptr, args[1], args[2]); + return; + case 2: { + auto destination_ptrs = boost::get>(action); + + ARM64Reg reg_word1 = DecodeReg(ctx.reg_alloc.UseGpr(args[1])); + ARM64Reg reg_word2 = DecodeReg(ctx.reg_alloc.UseGpr(args[2])); + ARM64Reg reg_destination_addr = ctx.reg_alloc.ScratchGpr(); + + code.MOVI2R(reg_destination_addr, reinterpret_cast(destination_ptrs[0])); + code.STR(INDEX_UNSIGNED, reg_word1, reg_destination_addr, 0); + code.MOVI2R(reg_destination_addr, reinterpret_cast(destination_ptrs[1])); + code.STR(INDEX_UNSIGNED, reg_word2, reg_destination_addr, 0); + + return; + } + default: + ASSERT_MSG(false, "Unreachable"); + } +} + +void A32EmitA64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) { + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + unsigned opc1 = static_cast(coproc_info[2]); + A32::CoprocReg CRn = static_cast(coproc_info[3]); + A32::CoprocReg CRm = static_cast(coproc_info[4]); + unsigned opc2 = static_cast(coproc_info[5]); + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileGetOneWord(two, opc1, CRn, CRm, opc2); + switch (action.which()) { + case 0: + EmitCoprocessorException(); + return; + case 1: + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), inst); + return; + case 2: { + u32* source_ptr = boost::get(action); + + ARM64Reg reg_word = DecodeReg(ctx.reg_alloc.ScratchGpr()); + ARM64Reg reg_source_addr = ctx.reg_alloc.ScratchGpr(); + + code.MOVI2R(reg_source_addr, reinterpret_cast(source_ptr)); + code.LDR(INDEX_UNSIGNED, reg_word, reg_source_addr, 0); + + ctx.reg_alloc.DefineValue(inst, reg_word); + + return; + } + default: + ASSERT_MSG(false, "Unreachable"); + } +} + +void A32EmitA64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) { + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + unsigned opc = coproc_info[2]; + A32::CoprocReg CRm = static_cast(coproc_info[3]); + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileGetTwoWords(two, opc, CRm); + switch (action.which()) { + case 0: + EmitCoprocessorException(); + return; + case 1: + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), inst); + return; + case 2: { + auto source_ptrs = boost::get>(action); + + ARM64Reg reg_result = ctx.reg_alloc.ScratchGpr(); + ARM64Reg reg_destination_addr = ctx.reg_alloc.ScratchGpr(); + ARM64Reg reg_tmp = ctx.reg_alloc.ScratchGpr(); + + code.MOVI2R(reg_destination_addr, reinterpret_cast(source_ptrs[1])); + code.LDR(INDEX_UNSIGNED, DecodeReg(reg_result), reg_destination_addr, 0); + code.MOVI2R(reg_destination_addr, reinterpret_cast(source_ptrs[0])); + code.LDR(INDEX_UNSIGNED, DecodeReg(reg_tmp), reg_destination_addr, 0); + code.ORR(reg_result, reg_tmp, reg_result, ArithOption{ reg_result , ST_LSL, 32}); + + ctx.reg_alloc.DefineValue(inst, reg_result); + + return; + } + default: + ASSERT_MSG(false, "Unreachable"); + } +} + +void A32EmitA64::EmitA32CoprocLoadWords(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + bool long_transfer = coproc_info[2] != 0; + A32::CoprocReg CRd = static_cast(coproc_info[3]); + bool has_option = coproc_info[4] != 0; + std::optional option = std::nullopt; + if (has_option) { + option = coproc_info[5]; + } + + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileLoadWords(two, long_transfer, CRd, option); + if (!action) { + EmitCoprocessorException(); + return; + } + + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); +} + +void A32EmitA64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto coproc_info = inst->GetArg(0).GetCoprocInfo(); + + size_t coproc_num = coproc_info[0]; + bool two = coproc_info[1] != 0; + bool long_transfer = coproc_info[2] != 0; + A32::CoprocReg CRd = static_cast(coproc_info[3]); + bool has_option = coproc_info[4] != 0; + std::optional option = std::nullopt; + if (has_option) { + option = coproc_info[5]; + } + + std::shared_ptr coproc = config.coprocessors[coproc_num]; + if (!coproc) { + EmitCoprocessorException(); + return; + } + + auto action = coproc->CompileStoreWords(two, long_transfer, CRd, option); + if (!action) { + EmitCoprocessorException(); + return; + } + + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); +} + + +std::string A32EmitA64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const { + const A32::LocationDescriptor descriptor{ir_descriptor}; + return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}", descriptor.TFlag() ? "t" : "a", descriptor.PC(), descriptor.EFlag() ? "be" : "le", + descriptor.FPSCR().Value()); +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) { + ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented"); + ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented"); + ASSERT_MSG(terminal.num_instructions == 1, "Unimplemented"); + + code.MOVI2R(DecodeReg(code.ABI_PARAM2), A32::LocationDescriptor{terminal.next}.PC()); + code.MOVI2R(DecodeReg(code.ABI_PARAM3), 1); + code.STR(INDEX_UNSIGNED,DecodeReg(code.ABI_PARAM2), X28, MJitStateReg(A32::Reg::PC)); + code.SwitchMxcsrOnExit(); + Devirtualize<&A32::UserCallbacks::InterpreterFallback>(config.callbacks).EmitCall(code); + code.ReturnFromRunCode(true); // TODO: Check cycles +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor) { + code.ReturnFromRunCode(); +} + +static u32 CalculateCpsr_et(const IR::LocationDescriptor& arg) { + const A32::LocationDescriptor desc{arg}; + u32 et = 0; + et |= desc.EFlag() ? 2 : 0; + et |= desc.TFlag() ? 1 : 0; + return et; +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { + if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { + code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), CalculateCpsr_et(terminal.next)); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et)); + } + + code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, offsetof(A32JitState, cycles_remaining)); + code.CMP(code.ABI_SCRATCH1, ZR); + + patch_information[terminal.next].jg.emplace_back(code.GetCodePtr()); + if (auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } + FixupBranch dest = code.B(); + + code.SwitchToFarCode(); + code.AlignCode16(); + code.SetJumpTarget(dest); + code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), A32::LocationDescriptor{terminal.next}.PC()); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, MJitStateReg(A32::Reg::PC)); + PushRSBHelper(X1, X2, terminal.next); + code.ForceReturnFromRunCode(); + code.FlushIcache(); + code.SwitchToNearCode(); +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { + if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { + code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), CalculateCpsr_et(terminal.next)); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et)); + } + + patch_information[terminal.next].jmp.emplace_back(code.GetCodePtr()); + if (auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJmp(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJmp(terminal.next); + } +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) { + code.B(terminal_handler_pop_rsb_hint); +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor) { + if (config.enable_fast_dispatch) { + code.B(terminal_handler_fast_dispatch_hint); + } else { + code.ReturnFromRunCode(); + } +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) { + FixupBranch pass = EmitCond(terminal.if_); + EmitTerminal(terminal.else_, initial_location); + code.SetJumpTarget(pass); + EmitTerminal(terminal.then_, initial_location); +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::CheckBit, IR::LocationDescriptor) { + ASSERT_MSG(false, "Term::CheckBit should never be emitted by the A32 frontend"); +} + +void A32EmitA64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) { + code.LDRB(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, halt_requested)); + // Conditional branch only gives +/- 1MB of branch distance + FixupBranch zero = code.CBZ(DecodeReg(code.ABI_SCRATCH1)); + code.B(code.GetForceReturnFromRunCodeAddress()); + code.SetJumpTarget(zero); + EmitTerminal(terminal.else_, initial_location); +} + +void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { + const CodePtr patch_location = code.GetCodePtr(); + + auto long_branch_gt = [this](CodePtr ptr){ + const s64 distance = reinterpret_cast(ptr) - reinterpret_cast(code.GetCodePtr()); + + if((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { + code.B(CC_GT, ptr); + return; + } + + FixupBranch cc_le = code.B(CC_LE); + code.B(ptr); + code.SetJumpTarget(cc_le); + }; + + if (target_code_ptr) { + long_branch_gt(target_code_ptr); + } else { + code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), A32::LocationDescriptor{target_desc}.PC()); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, MJitStateReg(A32::Reg::PC)); + long_branch_gt(code.GetReturnFromRunCodeAddress()); + } + code.EnsurePatchLocationSize(patch_location, 24); +} + +void A32EmitA64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { + const CodePtr patch_location = code.GetCodePtr(); + if (target_code_ptr) { + code.B(target_code_ptr); + } else { + code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), A32::LocationDescriptor{target_desc}.PC()); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, MJitStateReg(A32::Reg::PC)); + code.B(code.GetReturnFromRunCodeAddress()); + } + code.EnsurePatchLocationSize(patch_location, 20); +} + +void A32EmitA64::EmitPatchMovX0(CodePtr target_code_ptr) { + if (!target_code_ptr) { + target_code_ptr = code.GetReturnFromRunCodeAddress(); + } + const CodePtr patch_location = code.GetCodePtr(); + code.MOVP2R(X0, target_code_ptr); + code.EnsurePatchLocationSize(patch_location, 16); +} + +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/a32_emit_a64.h b/src/backend/A64/a32_emit_a64.h new file mode 100644 index 00000000..1fb746ee --- /dev/null +++ b/src/backend/A64/a32_emit_a64.h @@ -0,0 +1,107 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +#include + +#include "backend/A64/a32_jitstate.h" +#include "backend/A64/block_range_information.h" +#include "backend/A64/emit_a64.h" +#include "dynarmic/A32/a32.h" +#include "dynarmic/A32/config.h" +#include "frontend/A32/location_descriptor.h" +#include "frontend/ir/terminal.h" + +namespace Dynarmic::BackendA64 { + +class RegAlloc; + +struct A32EmitContext final : public EmitContext { + A32EmitContext(RegAlloc& reg_alloc, IR::Block& block); + A32::LocationDescriptor Location() const; + FP::RoundingMode FPSCR_RMode() const override; + u32 FPCR() const override; + bool FPSCR_FTZ() const override; + bool FPSCR_DN() const override; +}; + +class A32EmitA64 final : public EmitA64 { +public: + A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface); + ~A32EmitA64() override; + + /** + * Emit host machine code for a basic block with intermediate representation `ir`. + * @note ir is modified. + */ + BlockDescriptor Emit(IR::Block& ir); + + void ClearCache() override; + + void InvalidateCacheRanges(const boost::icl::interval_set& ranges); + +protected: + const A32::UserConfig config; + A32::Jit* jit_interface; + BlockRangeInformation block_ranges; + + struct FastDispatchEntry { + u64 location_descriptor; + const void* code_ptr; + }; + static_assert(sizeof(FastDispatchEntry) == 0x10); + static constexpr u64 fast_dispatch_table_mask = 0xFFFF0; + static constexpr size_t fast_dispatch_table_size = 0x10000; + std::array fast_dispatch_table; + void ClearFastDispatchTable(); + + const void* read_memory_8; + const void* read_memory_16; + const void* read_memory_32; + const void* read_memory_64; + const void* write_memory_8; + const void* write_memory_16; + const void* write_memory_32; + const void* write_memory_64; + void GenMemoryAccessors(); + + const void* terminal_handler_pop_rsb_hint; + const void* terminal_handler_fast_dispatch_hint = nullptr; + void GenTerminalHandlers(); + + // Microinstruction emitters +#define OPCODE(...) +#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst); +#define A64OPC(...) +#include "frontend/ir/opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + + // Helpers + std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override; + + // Terminal instruction emitters + void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) override; + void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override; + + // Patching + void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; + void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; + void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override; +}; + +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc new file mode 100644 index 00000000..f605f139 --- /dev/null +++ b/src/backend/A64/opcodes.inc @@ -0,0 +1,649 @@ +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... + +OPCODE(Void, Void, ) +OPCODE(Identity, Opaque, Opaque ) +OPCODE(Breakpoint, Void, ) + +// A32 Context getters/setters +A32OPC(GetRegister, U32, A32Reg ) +//A32OPC(GetExtendedRegister32, U32, A32ExtReg ) +//A32OPC(GetExtendedRegister64, U64, A32ExtReg ) +A32OPC(SetRegister, Void, A32Reg, U32 ) +//A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 ) +//A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 ) +A32OPC(GetCpsr, U32, ) +A32OPC(SetCpsr, Void, U32 ) +A32OPC(SetCpsrNZCV, Void, U32 ) +A32OPC(SetCpsrNZCVQ, Void, U32 ) +A32OPC(GetNFlag, U1, ) +A32OPC(SetNFlag, Void, U1 ) +A32OPC(GetZFlag, U1, ) +A32OPC(SetZFlag, Void, U1 ) +A32OPC(GetCFlag, U1, ) +A32OPC(SetCFlag, Void, U1 ) +A32OPC(GetVFlag, U1, ) +A32OPC(SetVFlag, Void, U1 ) +A32OPC(OrQFlag, Void, U1 ) +//A32OPC(GetGEFlags, U32, ) +//A32OPC(SetGEFlags, Void, U32 ) +A32OPC(SetGEFlagsCompressed, Void, U32 ) +A32OPC(BXWritePC, Void, U32 ) +A32OPC(CallSupervisor, Void, U32 ) +A32OPC(ExceptionRaised, Void, U32, U64 ) +//A32OPC(GetFpscr, U32, ) +//A32OPC(SetFpscr, Void, U32, ) +//A32OPC(GetFpscrNZCV, U32, ) +//A32OPC(SetFpscrNZCV, Void, NZCV ) + +// A64 Context getters/setters +//A64OPC(SetCheckBit, Void, U1 ) +//A64OPC(GetCFlag, U1, ) +//A64OPC(GetNZCVRaw, U32, ) +//A64OPC(SetNZCVRaw, Void, U32 ) +//A64OPC(SetNZCV, Void, NZCV ) +//A64OPC(GetW, U32, A64Reg ) +//A64OPC(GetX, U64, A64Reg ) +//A64OPC(GetS, U128, A64Vec ) +//A64OPC(GetD, U128, A64Vec ) +//A64OPC(GetQ, U128, A64Vec ) +//A64OPC(GetSP, U64, ) +//A64OPC(GetFPCR, U32, ) +//A64OPC(GetFPSR, U32, ) +//A64OPC(SetW, Void, A64Reg, U32 ) +//A64OPC(SetX, Void, A64Reg, U64 ) +//A64OPC(SetS, Void, A64Vec, U128 ) +//A64OPC(SetD, Void, A64Vec, U128 ) +//A64OPC(SetQ, Void, A64Vec, U128 ) +//A64OPC(SetSP, Void, U64 ) +//A64OPC(SetFPCR, Void, U32 ) +//A64OPC(SetFPSR, Void, U32 ) +//A64OPC(OrQC, Void, U1 ) +//A64OPC(SetPC, Void, U64 ) +//A64OPC(CallSupervisor, Void, U32 ) +//A64OPC(ExceptionRaised, Void, U64, U64 ) +//A64OPC(DataCacheOperationRaised, Void, U64, U64 ) +//A64OPC(DataSynchronizationBarrier, Void, ) +//A64OPC(DataMemoryBarrier, Void, ) +//A64OPC(InstructionSynchronizationBarrier, Void, ) +//A64OPC(GetCNTFRQ, U32, ) +//A64OPC(GetCNTPCT, U64, ) +//A64OPC(GetCTR, U32, ) +//A64OPC(GetDCZID, U32, ) +//A64OPC(GetTPIDR, U64, ) +//A64OPC(GetTPIDRRO, U64, ) +//A64OPC(SetTPIDR, Void, U64 ) + +// Hints +OPCODE(PushRSB, Void, U64 ) + +// Pseudo-operation, handled specially at final emit +OPCODE(GetCarryFromOp, U1, Opaque ) +OPCODE(GetOverflowFromOp, U1, Opaque ) +OPCODE(GetGEFromOp, U32, Opaque ) +OPCODE(GetNZCVFromOp, NZCV, Opaque ) +OPCODE(GetUpperFromOp, U128, Opaque ) +OPCODE(GetLowerFromOp, U128, Opaque ) + +OPCODE(NZCVFromPackedFlags, NZCV, U32 ) + +// Calculations +//OPCODE(Pack2x32To1x64, U64, U32, U32 ) +//OPCODE(Pack2x64To1x128, U128, U64, U64 ) +//OPCODE(LeastSignificantWord, U32, U64 ) +//OPCODE(MostSignificantWord, U32, U64 ) +//OPCODE(LeastSignificantHalf, U16, U32 ) +//OPCODE(LeastSignificantByte, U8, U32 ) +//OPCODE(MostSignificantBit, U1, U32 ) +//OPCODE(IsZero32, U1, U32 ) +//OPCODE(IsZero64, U1, U64 ) +//OPCODE(TestBit, U1, U64, U8 ) +//OPCODE(ConditionalSelect32, U32, Cond, U32, U32 ) +//OPCODE(ConditionalSelect64, U64, Cond, U64, U64 ) +//OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV ) +//OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 ) +//OPCODE(LogicalShiftLeft64, U64, U64, U8 ) +//OPCODE(LogicalShiftRight32, U32, U32, U8, U1 ) +//OPCODE(LogicalShiftRight64, U64, U64, U8 ) +//OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 ) +//OPCODE(ArithmeticShiftRight64, U64, U64, U8 ) +//OPCODE(RotateRight32, U32, U32, U8, U1 ) +//OPCODE(RotateRight64, U64, U64, U8 ) +//OPCODE(RotateRightExtended, U32, U32, U1 ) +//OPCODE(Add32, U32, U32, U32, U1 ) +//OPCODE(Add64, U64, U64, U64, U1 ) +//OPCODE(Sub32, U32, U32, U32, U1 ) +//OPCODE(Sub64, U64, U64, U64, U1 ) +//OPCODE(Mul32, U32, U32, U32 ) +//OPCODE(Mul64, U64, U64, U64 ) +//OPCODE(SignedMultiplyHigh64, U64, U64, U64 ) +//OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 ) +//OPCODE(UnsignedDiv32, U32, U32, U32 ) +//OPCODE(UnsignedDiv64, U64, U64, U64 ) +//OPCODE(SignedDiv32, U32, U32, U32 ) +//OPCODE(SignedDiv64, U64, U64, U64 ) +//OPCODE(And32, U32, U32, U32 ) +//OPCODE(And64, U64, U64, U64 ) +//OPCODE(Eor32, U32, U32, U32 ) +//OPCODE(Eor64, U64, U64, U64 ) +//OPCODE(Or32, U32, U32, U32 ) +//OPCODE(Or64, U64, U64, U64 ) +//OPCODE(Not32, U32, U32 ) +//OPCODE(Not64, U64, U64 ) +//OPCODE(SignExtendByteToWord, U32, U8 ) +//OPCODE(SignExtendHalfToWord, U32, U16 ) +//OPCODE(SignExtendByteToLong, U64, U8 ) +//OPCODE(SignExtendHalfToLong, U64, U16 ) +//OPCODE(SignExtendWordToLong, U64, U32 ) +//OPCODE(ZeroExtendByteToWord, U32, U8 ) +//OPCODE(ZeroExtendHalfToWord, U32, U16 ) +//OPCODE(ZeroExtendByteToLong, U64, U8 ) +//OPCODE(ZeroExtendHalfToLong, U64, U16 ) +//OPCODE(ZeroExtendWordToLong, U64, U32 ) +//OPCODE(ZeroExtendLongToQuad, U128, U64 ) +//OPCODE(ByteReverseWord, U32, U32 ) +//OPCODE(ByteReverseHalf, U16, U16 ) +////OPCODE(ByteReverseDual, U64, U64 ) +//OPCODE(CountLeadingZeros32, U32, U32 ) +//OPCODE(CountLeadingZeros64, U64, U64 ) +//OPCODE(ExtractRegister32, U32, U32, U32, U8 ) +//OPCODE(ExtractRegister64, U64, U64, U64, U8 ) +//OPCODE(MaxSigned32, U32, U32, U32 ) +//OPCODE(MaxSigned64, U64, U64, U64 ) +//OPCODE(MaxUnsigned32, U32, U32, U32 ) +//OPCODE(MaxUnsigned64, U64, U64, U64 ) +//OPCODE(MinSigned32, U32, U32, U32 ) +//OPCODE(MinSigned64, U64, U64, U64 ) +//OPCODE(MinUnsigned32, U32, U32, U32 ) +//OPCODE(MinUnsigned64, U64, U64, U64 ) + +// Saturated instructions +//OPCODE(SignedSaturatedAdd8, U8, U8, U8 ) +//OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) +//OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) +//OPCODE(SignedSaturatedAdd64, U64, U64, U64 ) +//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 ) +//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 ) +//OPCODE(SignedSaturatedSub8, U8, U8, U8 ) +//OPCODE(SignedSaturatedSub16, U16, U16, U16 ) +//OPCODE(SignedSaturatedSub32, U32, U32, U32 ) +//OPCODE(SignedSaturatedSub64, U64, U64, U64 ) +//OPCODE(SignedSaturation, U32, U32, U8 ) +//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 ) +//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 ) +//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 ) +//OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 ) +//OPCODE(UnsignedSaturatedSub8, U8, U8, U8 ) +//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 ) +//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 ) +//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 ) +//OPCODE(UnsignedSaturation, U32, U32, U8 ) + +// Packed instructions +//OPCODE(PackedAddU8, U32, U32, U32 ) +//OPCODE(PackedAddS8, U32, U32, U32 ) +//OPCODE(PackedSubU8, U32, U32, U32 ) +//OPCODE(PackedSubS8, U32, U32, U32 ) +//OPCODE(PackedAddU16, U32, U32, U32 ) +//OPCODE(PackedAddS16, U32, U32, U32 ) +//OPCODE(PackedSubU16, U32, U32, U32 ) +//OPCODE(PackedSubS16, U32, U32, U32 ) +//OPCODE(PackedAddSubU16, U32, U32, U32 ) +//OPCODE(PackedAddSubS16, U32, U32, U32 ) +//OPCODE(PackedSubAddU16, U32, U32, U32 ) +//OPCODE(PackedSubAddS16, U32, U32, U32 ) +//OPCODE(PackedHalvingAddU8, U32, U32, U32 ) +//OPCODE(PackedHalvingAddS8, U32, U32, U32 ) +//OPCODE(PackedHalvingSubU8, U32, U32, U32 ) +//OPCODE(PackedHalvingSubS8, U32, U32, U32 ) +//OPCODE(PackedHalvingAddU16, U32, U32, U32 ) +//OPCODE(PackedHalvingAddS16, U32, U32, U32 ) +//OPCODE(PackedHalvingSubU16, U32, U32, U32 ) +//OPCODE(PackedHalvingSubS16, U32, U32, U32 ) +//OPCODE(PackedHalvingAddSubU16, U32, U32, U32 ) +//OPCODE(PackedHalvingAddSubS16, U32, U32, U32 ) +//OPCODE(PackedHalvingSubAddU16, U32, U32, U32 ) +//OPCODE(PackedHalvingSubAddS16, U32, U32, U32 ) +//OPCODE(PackedSaturatedAddU8, U32, U32, U32 ) +//OPCODE(PackedSaturatedAddS8, U32, U32, U32 ) +//OPCODE(PackedSaturatedSubU8, U32, U32, U32 ) +//OPCODE(PackedSaturatedSubS8, U32, U32, U32 ) +//OPCODE(PackedSaturatedAddU16, U32, U32, U32 ) +//OPCODE(PackedSaturatedAddS16, U32, U32, U32 ) +//OPCODE(PackedSaturatedSubU16, U32, U32, U32 ) +//OPCODE(PackedSaturatedSubS16, U32, U32, U32 ) +//OPCODE(PackedAbsDiffSumS8, U32, U32, U32 ) +//OPCODE(PackedSelect, U32, U32, U32, U32 ) + +// CRC instructions +//OPCODE(CRC32Castagnoli8, U32, U32, U32 ) +//OPCODE(CRC32Castagnoli16, U32, U32, U32 ) +//OPCODE(CRC32Castagnoli32, U32, U32, U32 ) +//OPCODE(CRC32Castagnoli64, U32, U32, U64 ) +//OPCODE(CRC32ISO8, U32, U32, U32 ) +//OPCODE(CRC32ISO16, U32, U32, U32 ) +//OPCODE(CRC32ISO32, U32, U32, U32 ) +//OPCODE(CRC32ISO64, U32, U32, U64 ) + +// AES instructions +//OPCODE(AESDecryptSingleRound, U128, U128 ) +//OPCODE(AESEncryptSingleRound, U128, U128 ) +//OPCODE(AESInverseMixColumns, U128, U128 ) +//OPCODE(AESMixColumns, U128, U128 ) + +// SM4 instructions +//OPCODE(SM4AccessSubstitutionBox, U8, U8 ) + +// Vector instructions +//OPCODE(VectorGetElement8, U8, U128, U8 ) +//OPCODE(VectorGetElement16, U16, U128, U8 ) +//OPCODE(VectorGetElement32, U32, U128, U8 ) +//OPCODE(VectorGetElement64, U64, U128, U8 ) +//OPCODE(VectorSetElement8, U128, U128, U8, U8 ) +//OPCODE(VectorSetElement16, U128, U128, U8, U16 ) +//OPCODE(VectorSetElement32, U128, U128, U8, U32 ) +//OPCODE(VectorSetElement64, U128, U128, U8, U64 ) +//OPCODE(VectorAbs8, U128, U128 ) +//OPCODE(VectorAbs16, U128, U128 ) +//OPCODE(VectorAbs32, U128, U128 ) +//OPCODE(VectorAbs64, U128, U128 ) +//OPCODE(VectorAdd8, U128, U128, U128 ) +//OPCODE(VectorAdd16, U128, U128, U128 ) +//OPCODE(VectorAdd32, U128, U128, U128 ) +//OPCODE(VectorAdd64, U128, U128, U128 ) +//OPCODE(VectorAnd, U128, U128, U128 ) +//OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 ) +//OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 ) +//OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 ) +//OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 ) +//OPCODE(VectorArithmeticVShift8, U128, U128, U128 ) +//OPCODE(VectorArithmeticVShift16, U128, U128, U128 ) +//OPCODE(VectorArithmeticVShift32, U128, U128, U128 ) +//OPCODE(VectorArithmeticVShift64, U128, U128, U128 ) +//OPCODE(VectorBroadcastLower8, U128, U8 ) +//OPCODE(VectorBroadcastLower16, U128, U16 ) +//OPCODE(VectorBroadcastLower32, U128, U32 ) +//OPCODE(VectorBroadcast8, U128, U8 ) +//OPCODE(VectorBroadcast16, U128, U16 ) +//OPCODE(VectorBroadcast32, U128, U32 ) +//OPCODE(VectorBroadcast64, U128, U64 ) +//OPCODE(VectorCountLeadingZeros8, U128, U128 ) +//OPCODE(VectorCountLeadingZeros16, U128, U128 ) +//OPCODE(VectorCountLeadingZeros32, U128, U128 ) +//OPCODE(VectorDeinterleaveEven8, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveEven16, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveEven32, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveEven64, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 ) +//OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 ) +//OPCODE(VectorEor, U128, U128, U128 ) +//OPCODE(VectorEqual8, U128, U128, U128 ) +//OPCODE(VectorEqual16, U128, U128, U128 ) +//OPCODE(VectorEqual32, U128, U128, U128 ) +//OPCODE(VectorEqual64, U128, U128, U128 ) +//OPCODE(VectorEqual128, U128, U128, U128 ) +//OPCODE(VectorExtract, U128, U128, U128, U8 ) +//OPCODE(VectorExtractLower, U128, U128, U128, U8 ) +//OPCODE(VectorGreaterS8, U128, U128, U128 ) +//OPCODE(VectorGreaterS16, U128, U128, U128 ) +//OPCODE(VectorGreaterS32, U128, U128, U128 ) +//OPCODE(VectorGreaterS64, U128, U128, U128 ) +//OPCODE(VectorHalvingAddS8, U128, U128, U128 ) +//OPCODE(VectorHalvingAddS16, U128, U128, U128 ) +//OPCODE(VectorHalvingAddS32, U128, U128, U128 ) +//OPCODE(VectorHalvingAddU8, U128, U128, U128 ) +//OPCODE(VectorHalvingAddU16, U128, U128, U128 ) +//OPCODE(VectorHalvingAddU32, U128, U128, U128 ) +//OPCODE(VectorHalvingSubS8, U128, U128, U128 ) +//OPCODE(VectorHalvingSubS16, U128, U128, U128 ) +//OPCODE(VectorHalvingSubS32, U128, U128, U128 ) +//OPCODE(VectorHalvingSubU8, U128, U128, U128 ) +//OPCODE(VectorHalvingSubU16, U128, U128, U128 ) +//OPCODE(VectorHalvingSubU32, U128, U128, U128 ) +//OPCODE(VectorInterleaveLower8, U128, U128, U128 ) +//OPCODE(VectorInterleaveLower16, U128, U128, U128 ) +//OPCODE(VectorInterleaveLower32, U128, U128, U128 ) +//OPCODE(VectorInterleaveLower64, U128, U128, U128 ) +//OPCODE(VectorInterleaveUpper8, U128, U128, U128 ) +//OPCODE(VectorInterleaveUpper16, U128, U128, U128 ) +//OPCODE(VectorInterleaveUpper32, U128, U128, U128 ) +//OPCODE(VectorInterleaveUpper64, U128, U128, U128 ) +//OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftRight8, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftRight16, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftRight32, U128, U128, U8 ) +//OPCODE(VectorLogicalShiftRight64, U128, U128, U8 ) +//OPCODE(VectorLogicalVShift8, U128, U128, U128 ) +//OPCODE(VectorLogicalVShift16, U128, U128, U128 ) +//OPCODE(VectorLogicalVShift32, U128, U128, U128 ) +//OPCODE(VectorLogicalVShift64, U128, U128, U128 ) +//OPCODE(VectorMaxS8, U128, U128, U128 ) +//OPCODE(VectorMaxS16, U128, U128, U128 ) +//OPCODE(VectorMaxS32, U128, U128, U128 ) +//OPCODE(VectorMaxS64, U128, U128, U128 ) +//OPCODE(VectorMaxU8, U128, U128, U128 ) +//OPCODE(VectorMaxU16, U128, U128, U128 ) +//OPCODE(VectorMaxU32, U128, U128, U128 ) +//OPCODE(VectorMaxU64, U128, U128, U128 ) +//OPCODE(VectorMinS8, U128, U128, U128 ) +//OPCODE(VectorMinS16, U128, U128, U128 ) +//OPCODE(VectorMinS32, U128, U128, U128 ) +//OPCODE(VectorMinS64, U128, U128, U128 ) +//OPCODE(VectorMinU8, U128, U128, U128 ) +//OPCODE(VectorMinU16, U128, U128, U128 ) +//OPCODE(VectorMinU32, U128, U128, U128 ) +//OPCODE(VectorMinU64, U128, U128, U128 ) +//OPCODE(VectorMultiply8, U128, U128, U128 ) +//OPCODE(VectorMultiply16, U128, U128, U128 ) +//OPCODE(VectorMultiply32, U128, U128, U128 ) +//OPCODE(VectorMultiply64, U128, U128, U128 ) +//OPCODE(VectorNarrow16, U128, U128 ) +//OPCODE(VectorNarrow32, U128, U128 ) +//OPCODE(VectorNarrow64, U128, U128 ) +//OPCODE(VectorNot, U128, U128 ) +//OPCODE(VectorOr, U128, U128, U128 ) +//OPCODE(VectorPairedAddLower8, U128, U128, U128 ) +//OPCODE(VectorPairedAddLower16, U128, U128, U128 ) +//OPCODE(VectorPairedAddLower32, U128, U128, U128 ) +//OPCODE(VectorPairedAddSignedWiden8, U128, U128 ) +//OPCODE(VectorPairedAddSignedWiden16, U128, U128 ) +//OPCODE(VectorPairedAddSignedWiden32, U128, U128 ) +//OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 ) +//OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 ) +//OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 ) +//OPCODE(VectorPairedAdd8, U128, U128, U128 ) +//OPCODE(VectorPairedAdd16, U128, U128, U128 ) +//OPCODE(VectorPairedAdd32, U128, U128, U128 ) +//OPCODE(VectorPairedAdd64, U128, U128, U128 ) +//OPCODE(VectorPairedMaxS8, U128, U128, U128 ) +//OPCODE(VectorPairedMaxS16, U128, U128, U128 ) +//OPCODE(VectorPairedMaxS32, U128, U128, U128 ) +//OPCODE(VectorPairedMaxU8, U128, U128, U128 ) +//OPCODE(VectorPairedMaxU16, U128, U128, U128 ) +//OPCODE(VectorPairedMaxU32, U128, U128, U128 ) +//OPCODE(VectorPairedMinS8, U128, U128, U128 ) +//OPCODE(VectorPairedMinS16, U128, U128, U128 ) +//OPCODE(VectorPairedMinS32, U128, U128, U128 ) +//OPCODE(VectorPairedMinU8, U128, U128, U128 ) +//OPCODE(VectorPairedMinU16, U128, U128, U128 ) +//OPCODE(VectorPairedMinU32, U128, U128, U128 ) +//OPCODE(VectorPolynomialMultiply8, U128, U128, U128 ) +//OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 ) +//OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 ) +//OPCODE(VectorPopulationCount, U128, U128 ) +//OPCODE(VectorReverseBits, U128, U128 ) +//OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 ) +//OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 ) +//OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 ) +//OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 ) +//OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 ) +//OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 ) +//OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 ) +//OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 ) +//OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 ) +//OPCODE(VectorShuffleWords, U128, U128, U8 ) +//OPCODE(VectorSignExtend8, U128, U128 ) +//OPCODE(VectorSignExtend16, U128, U128 ) +//OPCODE(VectorSignExtend32, U128, U128 ) +//OPCODE(VectorSignExtend64, U128, U128 ) +//OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 ) +//OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 ) +//OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 ) +//OPCODE(VectorSignedMultiply16, Void, U128, U128 ) +//OPCODE(VectorSignedMultiply32, Void, U128, U128 ) +//OPCODE(VectorSignedSaturatedAbs8, U128, U128 ) +//OPCODE(VectorSignedSaturatedAbs16, U128, U128 ) +//OPCODE(VectorSignedSaturatedAbs32, U128, U128 ) +//OPCODE(VectorSignedSaturatedAbs64, U128, U128 ) +//OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 ) +//OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 ) +//OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 ) +//OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 ) +//OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 ) +//OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 ) +//OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 ) +//OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 ) +//OPCODE(VectorSignedSaturatedNeg8, U128, U128 ) +//OPCODE(VectorSignedSaturatedNeg16, U128, U128 ) +//OPCODE(VectorSignedSaturatedNeg32, U128, U128 ) +//OPCODE(VectorSignedSaturatedNeg64, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 ) +//OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 ) +//OPCODE(VectorSub8, U128, U128, U128 ) +//OPCODE(VectorSub16, U128, U128, U128 ) +//OPCODE(VectorSub32, U128, U128, U128 ) +//OPCODE(VectorSub64, U128, U128, U128 ) +//OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque ) +//OPCODE(VectorTableLookup, U128, U128, Table, U128 ) +//OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 ) +//OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 ) +//OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 ) +//OPCODE(VectorUnsignedMultiply16, Void, U128, U128 ) +//OPCODE(VectorUnsignedMultiply32, Void, U128, U128 ) +//OPCODE(VectorUnsignedRecipEstimate, U128, U128 ) +//OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedAccumulateSigned8, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedAccumulateSigned16, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedAccumulateSigned32, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 ) +//OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 ) +//OPCODE(VectorZeroExtend8, U128, U128 ) +//OPCODE(VectorZeroExtend16, U128, U128 ) +//OPCODE(VectorZeroExtend32, U128, U128 ) +//OPCODE(VectorZeroExtend64, U128, U128 ) +//OPCODE(VectorZeroUpper, U128, U128 ) +//OPCODE(ZeroVector, U128, ) + +// Floating-point operations +//OPCODE(FPAbs16, U16, U16 ) +//OPCODE(FPAbs32, U32, U32 ) +//OPCODE(FPAbs64, U64, U64 ) +//OPCODE(FPAdd32, U32, U32, U32 ) +//OPCODE(FPAdd64, U64, U64, U64 ) +//OPCODE(FPCompare32, NZCV, U32, U32, U1 ) +//OPCODE(FPCompare64, NZCV, U64, U64, U1 ) +//OPCODE(FPDiv32, U32, U32, U32 ) +//OPCODE(FPDiv64, U64, U64, U64 ) +//OPCODE(FPMax32, U32, U32, U32 ) +//OPCODE(FPMax64, U64, U64, U64 ) +//OPCODE(FPMaxNumeric32, U32, U32, U32 ) +//OPCODE(FPMaxNumeric64, U64, U64, U64 ) +//OPCODE(FPMin32, U32, U32, U32 ) +//OPCODE(FPMin64, U64, U64, U64 ) +//OPCODE(FPMinNumeric32, U32, U32, U32 ) +//OPCODE(FPMinNumeric64, U64, U64, U64 ) +//OPCODE(FPMul32, U32, U32, U32 ) +//OPCODE(FPMul64, U64, U64, U64 ) +//OPCODE(FPMulAdd16, U16, U16, U16, U16 ) +//OPCODE(FPMulAdd32, U32, U32, U32, U32 ) +//OPCODE(FPMulAdd64, U64, U64, U64, U64 ) +//OPCODE(FPMulX32, U32, U32, U32 ) +//OPCODE(FPMulX64, U64, U64, U64 ) +//OPCODE(FPNeg16, U16, U16 ) +//OPCODE(FPNeg32, U32, U32 ) +//OPCODE(FPNeg64, U64, U64 ) +//OPCODE(FPRecipEstimate16, U16, U16 ) +//OPCODE(FPRecipEstimate32, U32, U32 ) +//OPCODE(FPRecipEstimate64, U64, U64 ) +//OPCODE(FPRecipExponent16, U16, U16 ) +//OPCODE(FPRecipExponent32, U32, U32 ) +//OPCODE(FPRecipExponent64, U64, U64 ) +//OPCODE(FPRecipStepFused16, U16, U16, U16 ) +//OPCODE(FPRecipStepFused32, U32, U32, U32 ) +//OPCODE(FPRecipStepFused64, U64, U64, U64 ) +//OPCODE(FPRoundInt16, U16, U16, U8, U1 ) +//OPCODE(FPRoundInt32, U32, U32, U8, U1 ) +//OPCODE(FPRoundInt64, U64, U64, U8, U1 ) +//OPCODE(FPRSqrtEstimate16, U16, U16 ) +//OPCODE(FPRSqrtEstimate32, U32, U32 ) +//OPCODE(FPRSqrtEstimate64, U64, U64 ) +//OPCODE(FPRSqrtStepFused16, U16, U16, U16 ) +//OPCODE(FPRSqrtStepFused32, U32, U32, U32 ) +//OPCODE(FPRSqrtStepFused64, U64, U64, U64 ) +//OPCODE(FPSqrt32, U32, U32 ) +//OPCODE(FPSqrt64, U64, U64 ) +//OPCODE(FPSub32, U32, U32, U32 ) +//OPCODE(FPSub64, U64, U64, U64 ) + +// Floating-point conversions +//OPCODE(FPHalfToDouble, U64, U16, U8 ) +//OPCODE(FPHalfToSingle, U32, U16, U8 ) +//OPCODE(FPSingleToDouble, U64, U32, U8 ) +//OPCODE(FPSingleToHalf, U16, U32, U8 ) +//OPCODE(FPDoubleToHalf, U16, U64, U8 ) +//OPCODE(FPDoubleToSingle, U32, U64, U8 ) +//OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 ) +//OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) +//OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) +//OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) +//OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 ) +//OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 ) +//OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 ) +//OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 ) +//OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) +//OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) +//OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) +//OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 ) +//OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 ) +//OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 ) +//OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 ) +//OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 ) +//OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 ) +//OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 ) +//OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 ) +//OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 ) + +// Floating-point vector instructions +//OPCODE(FPVectorAbs16, U128, U128 ) +//OPCODE(FPVectorAbs32, U128, U128 ) +//OPCODE(FPVectorAbs64, U128, U128 ) +//OPCODE(FPVectorAdd32, U128, U128, U128 ) +//OPCODE(FPVectorAdd64, U128, U128, U128 ) +//OPCODE(FPVectorDiv32, U128, U128, U128 ) +//OPCODE(FPVectorDiv64, U128, U128, U128 ) +//OPCODE(FPVectorEqual32, U128, U128, U128 ) +//OPCODE(FPVectorEqual64, U128, U128, U128 ) +//OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 ) +//OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 ) +//OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 ) +//OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 ) +//OPCODE(FPVectorGreater32, U128, U128, U128 ) +//OPCODE(FPVectorGreater64, U128, U128, U128 ) +//OPCODE(FPVectorGreaterEqual32, U128, U128, U128 ) +//OPCODE(FPVectorGreaterEqual64, U128, U128, U128 ) +//OPCODE(FPVectorMax32, U128, U128, U128 ) +//OPCODE(FPVectorMax64, U128, U128, U128 ) +//OPCODE(FPVectorMin32, U128, U128, U128 ) +//OPCODE(FPVectorMin64, U128, U128, U128 ) +//OPCODE(FPVectorMul32, U128, U128, U128 ) +//OPCODE(FPVectorMul64, U128, U128, U128 ) +//OPCODE(FPVectorMulAdd16, U128, U128, U128, U128 ) +//OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 ) +//OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 ) +//OPCODE(FPVectorMulX32, U128, U128, U128 ) +//OPCODE(FPVectorMulX64, U128, U128, U128 ) +//OPCODE(FPVectorNeg16, U128, U128 ) +//OPCODE(FPVectorNeg32, U128, U128 ) +//OPCODE(FPVectorNeg64, U128, U128 ) +//OPCODE(FPVectorPairedAdd32, U128, U128, U128 ) +//OPCODE(FPVectorPairedAdd64, U128, U128, U128 ) +//OPCODE(FPVectorPairedAddLower32, U128, U128, U128 ) +//OPCODE(FPVectorPairedAddLower64, U128, U128, U128 ) +//OPCODE(FPVectorRecipEstimate16, U128, U128 ) +//OPCODE(FPVectorRecipEstimate32, U128, U128 ) +//OPCODE(FPVectorRecipEstimate64, U128, U128 ) +//OPCODE(FPVectorRecipStepFused16, U128, U128, U128 ) +//OPCODE(FPVectorRecipStepFused32, U128, U128, U128 ) +//OPCODE(FPVectorRecipStepFused64, U128, U128, U128 ) +//OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 ) +//OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 ) +//OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 ) +//OPCODE(FPVectorRSqrtEstimate16, U128, U128 ) +//OPCODE(FPVectorRSqrtEstimate32, U128, U128 ) +//OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) +//OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128 ) +//OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) +//OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 ) +//OPCODE(FPVectorSqrt32, U128, U128 ) +//OPCODE(FPVectorSqrt64, U128, U128 ) +//OPCODE(FPVectorSub32, U128, U128, U128 ) +//OPCODE(FPVectorSub64, U128, U128, U128 ) +//OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 ) +//OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 ) +//OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 ) +//OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 ) +//OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 ) +//OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 ) + +// A32 Memory access +A32OPC(ClearExclusive, Void, ) +A32OPC(SetExclusive, Void, U32, U8 ) +A32OPC(ReadMemory8, U8, U32 ) +A32OPC(ReadMemory16, U16, U32 ) +A32OPC(ReadMemory32, U32, U32 ) +A32OPC(ReadMemory64, U64, U32 ) +A32OPC(WriteMemory8, Void, U32, U8 ) +A32OPC(WriteMemory16, Void, U32, U16 ) +A32OPC(WriteMemory32, Void, U32, U32 ) +A32OPC(WriteMemory64, Void, U32, U64 ) +A32OPC(ExclusiveWriteMemory8, U32, U32, U8 ) +A32OPC(ExclusiveWriteMemory16, U32, U32, U16 ) +A32OPC(ExclusiveWriteMemory32, U32, U32, U32 ) +A32OPC(ExclusiveWriteMemory64, U32, U32, U32, U32 ) + +// A64 Memory access +//A64OPC(ClearExclusive, Void, ) +//A64OPC(SetExclusive, Void, U64, U8 ) +//A64OPC(ReadMemory8, U8, U64 ) +//A64OPC(ReadMemory16, U16, U64 ) +//A64OPC(ReadMemory32, U32, U64 ) +//A64OPC(ReadMemory64, U64, U64 ) +//A64OPC(ReadMemory128, U128, U64 ) +//A64OPC(WriteMemory8, Void, U64, U8 ) +//A64OPC(WriteMemory16, Void, U64, U16 ) +//A64OPC(WriteMemory32, Void, U64, U32 ) +//A64OPC(WriteMemory64, Void, U64, U64 ) +//A64OPC(WriteMemory128, Void, U64, U128 ) +//A64OPC(ExclusiveWriteMemory8, U32, U64, U8 ) +//A64OPC(ExclusiveWriteMemory16, U32, U64, U16 ) +//A64OPC(ExclusiveWriteMemory32, U32, U64, U32 ) +//A64OPC(ExclusiveWriteMemory64, U32, U64, U64 ) +//A64OPC(ExclusiveWriteMemory128, U32, U64, U128 ) + +// Coprocessor +//A32OPC(CoprocInternalOperation, Void, CoprocInfo ) +//A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 ) +//A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 ) +//A32OPC(CoprocGetOneWord, U32, CoprocInfo ) +//A32OPC(CoprocGetTwoWords, U64, CoprocInfo ) +//A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 ) +//A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )