diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 56149d43..b433391c 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -118,85 +118,96 @@ void EmitX64::EmitBreakpoint(RegAlloc&, IR::Block&, IR::Inst*) { } void EmitX64::EmitIdentity(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - if (!inst->GetArg(0).IsImmediate()) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + if (!args[0].IsImmediate()) { + reg_alloc.DefineValue(inst, args[0]); } } void EmitX64::EmitGetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Arm::Reg reg = inst->GetArg(0).GetRegRef(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateReg(reg)); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitGetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsSingleExtReg(reg)); - Xbyak::Xmm result = reg_alloc.DefXmm(inst); + Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->movss(result, MJitStateExtReg(reg)); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitGetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsDoubleExtReg(reg)); - Xbyak::Xmm result = reg_alloc.DefXmm(inst); + + Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->movsd(result, MJitStateExtReg(reg)); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); Arm::Reg reg = inst->GetArg(0).GetRegRef(); - IR::Value arg = inst->GetArg(1); - if (arg.IsImmediate()) { - code->mov(MJitStateReg(reg), arg.GetU32()); + if (args[1].IsImmediate()) { + code->mov(MJitStateReg(reg), args[1].GetImmediateU32()); } else { - Xbyak::Reg32 to_store = reg_alloc.UseGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[1]).cvt32(); code->mov(MJitStateReg(reg), to_store); } } void EmitX64::EmitSetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsSingleExtReg(reg)); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1)); + Xbyak::Xmm source = reg_alloc.UseXmm(args[1]); code->movss(MJitStateExtReg(reg), source); } void EmitX64::EmitSetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsDoubleExtReg(reg)); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1)); + Xbyak::Xmm source = reg_alloc.UseXmm(args[1]); code->movsd(MJitStateExtReg(reg), source); } void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 arg = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 arg = reg_alloc.UseGpr(args[0]).cvt32(); code->mov(MJitStateCpsr(), arg); } void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 31); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 31; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -205,24 +216,25 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 30); code->and_(result, 1); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 30; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -231,24 +243,25 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 29); code->and_(result, 1); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 29; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -257,24 +270,25 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 28); code->and_(result, 1); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 28; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -285,12 +299,12 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 27; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) code->or_(MJitStateCpsr(), flag_mask); } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->or_(MJitStateCpsr(), to_store); @@ -298,22 +312,23 @@ void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 16); code->and_(result, 0xF); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 16; constexpr u32 flag_mask = 0xFu << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - u32 imm = (arg.GetU32() << flag_bit) & flag_mask; + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = (args[0].GetImmediateU32() << flag_bit) & flag_mask; code->and_(MJitStateCpsr(), ~flag_mask); code->or_(MJitStateCpsr(), imm); } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(to_store, flag_mask); @@ -323,8 +338,10 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + auto& arg = args[0]; + const u32 T_bit = 1 << 5; - auto arg = inst->GetArg(0); // Pseudocode: // if (new_pc & 1) { @@ -336,7 +353,7 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { // } if (arg.IsImmediate()) { - u32 new_pc = arg.GetU32(); + u32 new_pc = arg.GetImmediateU32(); if (Common::Bit<0>(new_pc)) { new_pc &= 0xFFFFFFFE; code->mov(MJitStateReg(Arm::Reg::PC), new_pc); @@ -368,9 +385,8 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto imm32 = inst->GetArg(0); - - reg_alloc.HostCall(nullptr, imm32); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(nullptr, args[0]); code->SwitchMxcsrOnExit(); code->CallFunction(cb.CallSVC); @@ -395,9 +411,8 @@ static void SetFpscrImpl(u32 value, JitState* jit_state) { } void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto a = inst->GetArg(0); - - reg_alloc.HostCall(nullptr, a); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(nullptr, args[0]); code->mov(code->ABI_PARAM2, code->r15); code->SwitchMxcsrOnExit(); @@ -408,15 +423,16 @@ void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - Xbyak::Reg32 value = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32(); code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value); } @@ -424,8 +440,9 @@ void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 unique_hash_of_target = inst->GetArg(0).GetU64(); + auto args = reg_alloc.GetArgumentInfo(inst); + ASSERT(args[0].IsImmediate()); + u64 unique_hash_of_target = args[0].GetImmediateU64(); auto iter = block_descriptors.find(unique_hash_of_target); CodePtr target_code_ptr = iter != block_descriptors.end() @@ -470,121 +487,123 @@ void EmitX64::EmitGetGEFromOp(RegAlloc&, IR::Block&, IR::Inst*) { } void EmitX64::EmitPack2x32To1x64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg lo; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - lo = result.cvt32(); - } else { - std::tie(lo, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - lo.setBit(32); - Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(inst->GetArg(1)); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 lo = reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(args[1]); code->shl(hi, 32); - code->mov(result.cvt32(), *lo); // Zero extend to 64-bits - code->or_(result, hi); + code->mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits + code->or_(lo, hi); + + reg_alloc.DefineValue(inst, lo); } void EmitX64::EmitLeastSignificantWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitMostSignificantWord(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); code->shr(result, 32); + reg_alloc.DefineValue(inst, result); + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); if (carry_inst) { EraseInstruction(block, carry_inst); - - Xbyak::Reg64 carry = reg_alloc.DefGpr(carry_inst); - + Xbyak::Reg64 carry = reg_alloc.ScratchGpr(); code->setc(carry.cvt8()); + reg_alloc.DefineValue(carry_inst, carry); } } void EmitX64::EmitLeastSignificantHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitLeastSignificantByte(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitMostSignificantBit(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->shr(result, 31); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitIsZero(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->test(result, result); code->sete(result.cvt8()); code->movzx(result, result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitIsZero64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->test(result, result); code->sete(result.cvt8()); code->movzx(result, result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + // TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented. if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - u8 shift = shift_arg.GetU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { code->shl(result, shift); } else { code->xor_(result, result); } + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shl(result, shift); + code->shl(result, code->cl); code->xor_(zero, zero); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->cmovnb(result, zero); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -600,21 +619,24 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In code->xor_(result, result); code->and_(carry, 1); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); // TODO: Optimize this. code->inLocalLabel(); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->ja(".Rs_gt32"); code->je(".Rs_eq32"); // if (Rs & 0xFF < 32) { code->bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0 - code->shl(result, shift); + code->shl(result, code->cl); code->setc(carry.cvt8()); code->jmp(".end"); // } else if (Rs & 0xFF > 32) { @@ -631,6 +653,9 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } @@ -638,44 +663,49 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - u8 shift = shift_arg.GetU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { code->shr(result, shift); } else { code->xor_(result, result); } + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shr(result, shift); + code->shr(result, code->cl); code->xor_(zero, zero); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->cmovnb(result, zero); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -690,23 +720,26 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::I code->xor_(result, result); code->xor_(carry, carry); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); // TODO: Optimize this. code->inLocalLabel(); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->ja(".Rs_gt32"); code->je(".Rs_eq32"); // if (Rs & 0xFF == 0) goto end; - code->test(shift, shift); + code->test(code->cl, code->cl); code->jz(".end"); // if (Rs & 0xFF < 32) { - code->shr(result, shift); + code->shr(result, code->cl); code->setc(carry.cvt8()); code->jmp(".end"); // } else if (Rs & 0xFF > 32) { @@ -723,39 +756,52 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::I code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } void EmitX64::EmitLogicalShiftRight64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; - auto shift_arg = inst->GetArg(1); ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented"); - u8 shift = shift_arg.GetU8(); - ASSERT_MSG(shift < 64, "shift width clamping is not implemented"); + ASSERT_MSG(shift_arg.GetImmediateU8() < 64, "shift width clamping is not implemented"); + + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(operand_arg); + u8 shift = shift_arg.GetImmediateU8(); code->shr(result.cvt64(), shift); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); code->sar(result, u8(shift < 31 ? shift : 31)); + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg32 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt32(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.UseScratch(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); Xbyak::Reg32 const31 = reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift. @@ -763,20 +809,20 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR // We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31. code->mov(const31, 31); - code->movzx(shift, shift.cvt8()); - code->cmp(shift, u32(31)); - code->cmovg(shift, const31); - code->sar(result, shift.cvt8()); + code->movzx(code->ecx, code->cl); + code->cmp(code->ecx, u32(31)); + code->cmovg(code->ecx, const31); + code->sar(result, code->cl); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -788,22 +834,25 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR code->bt(result, 31); code->setc(carry); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); // TODO: Optimize this. code->inLocalLabel(); - code->cmp(shift, u32(31)); + code->cmp(code->cl, u32(31)); code->ja(".Rs_gt31"); // if (Rs & 0xFF == 0) goto end; - code->test(shift, shift); + code->test(code->cl, code->cl); code->jz(".end"); // if (Rs & 0xFF <= 31) { - code->sar(result, shift); + code->sar(result, code->cl); code->setc(carry); code->jmp(".end"); // } else if (Rs & 0xFF > 31) { @@ -815,6 +864,9 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } @@ -822,34 +874,39 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); code->ror(result, u8(shift & 0x1F)); + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); // x64 ROR instruction does (shift & 0x1F) for us. - code->ror(result, shift); + code->ror(result, code->cl); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -860,23 +917,26 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i code->ror(result, shift); code->setc(carry); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + reg_alloc.UseScratch(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); // TODO: Optimize code->inLocalLabel(); // if (Rs & 0xFF == 0) goto end; - code->test(shift, shift); + code->test(code->cl, code->cl); code->jz(".end"); - code->and_(shift.cvt32(), u32(0x1F)); + code->and_(code->ecx, u32(0x1F)); code->jz(".zero_1F"); // if (Rs & 0x1F != 0) { - code->ror(result, shift); + code->ror(result, code->cl); code->setc(carry); code->jmp(".end"); // } else { @@ -887,6 +947,9 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } @@ -894,28 +957,32 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i void EmitX64::EmitRotateRightExtended(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = carry_inst - ? reg_alloc.UseDefGpr(inst->GetArg(1), carry_inst).cvt8() - : reg_alloc.UseGpr(inst->GetArg(1)).cvt8(); + auto args = reg_alloc.GetArgumentInfo(inst); + + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(args[1]).cvt8(); code->bt(carry.cvt32(), 0); code->rcr(result, 1); + reg_alloc.DefineValue(inst, result); + if (carry_inst) { EraseInstruction(block, carry_inst); code->setc(carry); + + reg_alloc.DefineValue(carry_inst, carry); } } const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1); -static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, const IR::Value& carry_in, IR::Inst* carry_out) { +static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { if (carry_in.IsImmediate()) { - return carry_out ? reg_alloc.DefGpr(carry_out).cvt8() : INVALID_REG.cvt8(); + return carry_out ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); } else { - return carry_out ? reg_alloc.UseDefGpr(carry_in, carry_out).cvt8() : reg_alloc.UseGpr(carry_in).cvt8(); + return carry_out ? reg_alloc.UseScratchGpr(carry_in).cvt8() : reg_alloc.UseGpr(carry_in).cvt8(); } } @@ -923,20 +990,19 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - IR::Value carry_in = inst->GetArg(2); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& carry_in = args[2]; - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst); - Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8(); + Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); // TODO: Consider using LEA. - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->stc(); code->adc(result, op_arg); } else { @@ -947,10 +1013,10 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->adc(result, op_arg); } } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->stc(); code->adc(result, *op_arg); } else { @@ -962,48 +1028,50 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } } + reg_alloc.DefineValue(inst, result); + if (carry_inst) { EraseInstruction(block, carry_inst); - code->setc(carry); + reg_alloc.DefineValue(carry_inst, carry); } if (overflow_inst) { EraseInstruction(block, overflow_inst); - code->seto(overflow); + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitAdd64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst); - Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]); code->add(result, op_arg); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - IR::Value carry_in = inst->GetArg(2); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& carry_in = args[2]; - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst); - Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8(); + Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); // TODO: Consider using LEA. // TODO: Optimize CMP case. // Note that x64 CF is inverse of what the ARM carry flag is here. - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->sub(result, op_arg); } else { code->stc(); @@ -1015,10 +1083,10 @@ void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->sbb(result, op_arg); } } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->sub(result, *op_arg); } else { code->stc(); @@ -1031,242 +1099,203 @@ void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } } + reg_alloc.DefineValue(inst, result); + if (carry_inst) { EraseInstruction(block, carry_inst); - code->setnc(carry); + reg_alloc.DefineValue(carry_inst, carry); } if (overflow_inst) { EraseInstruction(block, overflow_inst); - code->seto(overflow); + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitSub64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst); - Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]); code->sub(result, op_arg); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitMul(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - if (a.IsImmediate()) - std::swap(a, b); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - if (b.IsImmediate()) { - code->imul(result, result, b.GetU32()); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + if (args[1].IsImmediate()) { + code->imul(result, result, args[1].GetImmediateU32()); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->imul(result, *op_arg); } + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitMul64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst); - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); code->imul(result, *op_arg); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitAnd(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); code->and_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->and_(result, *op_arg); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitEor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); code->xor_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->xor_(result, *op_arg); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitOr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); code->or_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->or_(result, *op_arg); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitNot(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); + auto args = reg_alloc.GetArgumentInfo(inst); - if (a.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - - code->mov(result, u32(~a.GetU32())); + Xbyak::Reg32 result; + if (args[0].IsImmediate()) { + result = reg_alloc.ScratchGpr().cvt32(); + code->mov(result, u32(~args[0].GetImmediateU32())); } else { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - + result = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->not_(result); } + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(32); - code->movsxd(result.cvt64(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movsxd(result.cvt64(), result.cvt32()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(16); - code->movsx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movsx(result.cvt32(), result.cvt16()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(8); - code->movsx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movsx(result.cvt32(), result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(32); - code->mov(result.cvt32(), *source); // x64 zeros upper 32 bits on a 32-bit move + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(16); - code->movzx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movzx(result.cvt32(), result.cvt16()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(8); - code->movzx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movzx(result.cvt32(), result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->bswap(result); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg16 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt16(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg16 result = reg_alloc.UseScratchGpr(args[0]).cvt16(); code->rol(result, 8); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseDual(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); code->bswap(result); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - + auto args = reg_alloc.GetArgumentInfo(inst); if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) { - Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 source = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->lzcnt(result, source); + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 source = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. code->bsr(result, source); @@ -1274,18 +1303,19 @@ void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i code->cmovz(result, source); code->neg(result); code->add(result, 31); + + reg_alloc.DefineValue(inst, result); } } void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 addend = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); code->mov(overflow, result); code->shr(overflow, 31); @@ -1294,22 +1324,25 @@ void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, IR:: code->add(result, addend); code->cmovo(result, overflow); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seto(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subend = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); code->mov(overflow, result); code->shr(overflow, 31); @@ -1318,25 +1351,29 @@ void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, IR:: code->sub(result, subend); code->cmovo(result, overflow); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seto(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - size_t N = inst->GetArg(1).GetU8(); + auto args = reg_alloc.GetArgumentInfo(inst); + size_t N = args[1].GetImmediateU8(); ASSERT(N <= 31); u32 saturated_value = (1u << N) - 1; - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); // Pseudocode: result = clamp(reg_a, 0, saturated_value); code->xor_(overflow, overflow); @@ -1345,22 +1382,26 @@ void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR:: code->cmovle(result, overflow); code->cmovbe(result, reg_a); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seta(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - size_t N = inst->GetArg(1).GetU8(); + auto args = reg_alloc.GetArgumentInfo(inst); + size_t N = args[1].GetImmediateU8(); ASSERT(N >= 1 && N <= 32); if (N == 32) { - reg_alloc.RegisterAddDef(inst, a); + reg_alloc.DefineValue(inst, args[0]); if (overflow_inst) { auto no_overflow = IR::Value(false); overflow_inst->ReplaceUsesWith(no_overflow); @@ -1373,9 +1414,9 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::In u32 negative_saturated_value = 1u << (N - 1); u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. @@ -1391,10 +1432,14 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::In code->cmp(overflow, mask); code->cmovbe(result, reg_a); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seta(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } @@ -1435,330 +1480,269 @@ static void ExtractAndDuplicateMostSignificantBitFromPackedWords(BlockOfCode* co } void EmitX64::EmitPackedAddU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_ge, tmp; + code->paddb(xmm_a, xmm_b); if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - tmp = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm tmp = reg_alloc.ScratchXmm(); - code->mov(reg_ge, reg_a); - code->and_(reg_ge, reg_b); + code->movdqa(tmp, xmm_a); + code->pminub(tmp, xmm_b); + code->pcmpeqb(tmp, xmm_b); + code->movd(reg_ge, tmp); + code->not_(reg_ge); + + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } - // SWAR Arithmetic - code->mov(result, reg_a); - code->xor_(result, reg_b); - code->and_(result, 0x80808080); - code->and_(reg_a, 0x7F7F7F7F); - code->and_(reg_b, 0x7F7F7F7F); - code->add(reg_a, reg_b); - if (ge_inst) { - code->mov(tmp, result); - code->and_(tmp, reg_a); - code->or_(reg_ge, tmp); - } - code->xor_(result, reg_a); - if (ge_inst) { - ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge, tmp); - } + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedAddS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); + code->movdqa(saturated_sum, xmm_a); code->paddsb(saturated_sum, xmm_b); code->movd(reg_ge, saturated_sum); } + code->paddb(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_ge, tmp; + code->paddw(xmm_a, xmm_b); if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm tmp = reg_alloc.ScratchXmm(); - code->mov(reg_ge, reg_a); - code->and_(reg_ge, reg_b); + code->movdqa(tmp, xmm_a); + code->pminuw(tmp, xmm_b); + code->pcmpeqw(tmp, xmm_b); + code->movd(reg_ge, tmp); + code->not_(reg_ge); + + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } - // SWAR Arithmetic - code->mov(result, reg_a); - code->xor_(result, reg_b); - code->and_(result, 0x80008000); - code->and_(reg_a, 0x7FFF7FFF); - code->and_(reg_b, 0x7FFF7FFF); - code->add(reg_a, reg_b); - if (ge_inst) { - tmp = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp, result); - code->and_(tmp, reg_a); - code->or_(reg_ge, tmp); - } - code->xor_(result, reg_a); - if (ge_inst) { - ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); - } + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { + reg_ge = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); + code->movdqa(saturated_sum, xmm_a); code->paddsw(saturated_sum, xmm_b); code->movd(reg_ge, saturated_sum); } + code->paddw(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge; - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - xmm_ge = reg_alloc.ScratchXmm(); - } + Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { code->movdqa(xmm_ge, xmm_a); code->pmaxub(xmm_ge, xmm_b); code->pcmpeqb(xmm_ge, xmm_a); code->movd(reg_ge, xmm_ge); } + code->psubb(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); if (ge_inst) { ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - code->movd(xmm_b, reg_b); - code->movd(xmm_a, reg_a); - if (ge_inst) { Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); + code->movdqa(xmm_ge, xmm_a); code->psubsb(xmm_ge, xmm_b); code->movd(reg_ge, xmm_ge); } + code->psubb(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge; - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - xmm_ge = reg_alloc.ScratchXmm(); - } + reg_ge = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { code->movdqa(xmm_ge, xmm_a); code->pmaxuw(xmm_ge, xmm_b); code->pcmpeqw(xmm_ge, xmm_a); code->movd(reg_ge, xmm_ge); } + code->psubw(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - - code->movd(xmm_b, reg_b); - code->movd(xmm_a, reg_a); - if (ge_inst) { Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); + code->movdqa(xmm_ge, xmm_a); code->psubsw(xmm_ge, xmm_b); code->movd(reg_ge, xmm_ge); } + code->psubw(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); // This code path requires SSSE3 because of the PSHUFB instruction. // A fallback implementation is provided below. if (cpu_info.has(Xbyak::util::Cpu::tSSSE3)) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32(); - - // Load the operands into Xmm registers - Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseScratchXmm(args[1]); Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); Xbyak::Reg64 mask = reg_alloc.ScratchGpr(); - code->movd(xmm_scratch_a, result); - code->movd(xmm_scratch_b, arg); - // Set the mask to expand the values // 0xAABBCCDD becomes 0x00AA00BB00CC00DD code->mov(mask, 0x8003800280018000); code->movq(xmm_mask, mask); // Expand each 8-bit value to 16-bit - code->pshufb(xmm_scratch_a, xmm_mask); - code->pshufb(xmm_scratch_b, xmm_mask); + code->pshufb(xmm_a, xmm_mask); + code->pshufb(xmm_b, xmm_mask); // Add the individual 16-bit values - code->paddw(xmm_scratch_a, xmm_scratch_b); + code->paddw(xmm_a, xmm_b); // Shift the 16-bit values to the right to halve them - code->psrlw(xmm_scratch_a, 1); + code->psrlw(xmm_a, 1); // Set the mask to pack the values again // 0x00AA00BB00CC00DD becomes 0xAABBCCDD @@ -1766,33 +1750,33 @@ void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->movq(xmm_mask, mask); // Shuffle them back to 8-bit values - code->pshufb(xmm_scratch_a, xmm_mask); + code->pshufb(xmm_a, xmm_mask); - code->movd(result, xmm_scratch_a); - return; + reg_alloc.DefineValue(inst, xmm_a); + } else { + // Fallback implementation in case the CPU doesn't support SSSE3 + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 and_a_b = reg_a; + Xbyak::Reg32 result = reg_a; + + code->mov(xor_a_b, reg_a); + code->and(and_a_b, reg_b); + code->xor(xor_a_b, reg_b); + code->shr(xor_a_b, 1); + code->and(xor_a_b, 0x7F7F7F7F); + code->add(result, xor_a_b); + + reg_alloc.DefineValue(inst, result); } - - // Fallback implementation in case the CPU doesn't support SSSE3 - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); - Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 and_a_b = reg_a; - Xbyak::Reg32 result = reg_a; - - code->mov(xor_a_b, reg_a); - code->and(and_a_b, reg_b); - code->xor(xor_a_b, reg_b); - code->shr(xor_a_b, 1); - code->and(xor_a_b, 0x7F7F7F7F); - code->add(result, xor_a_b); } void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1808,14 +1792,15 @@ void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->shr(xor_a_b, 1); code->and(xor_a_b, 0x7FFF7FFF); code->add(result, xor_a_b); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1836,14 +1821,15 @@ void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->and(xor_a_b, 0x7F7F7F7F); code->add(result, xor_a_b); code->xor(result, carry); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1864,14 +1850,15 @@ void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->and(xor_a_b, 0x7FFF7FFF); code->add(result, xor_a_b); code->xor(result, carry); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1894,14 +1881,14 @@ void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->xor(minuend, 0x80808080); // minuend now contains the desired result. + reg_alloc.DefineValue(inst, minuend); } void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32(); @@ -1929,14 +1916,15 @@ void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->sub(minuend, subtrahend); code->xor(minuend, 0x80808080); code->xor(minuend, carry); + + reg_alloc.DefineValue(inst, minuend); } void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1957,14 +1945,15 @@ void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->or(minuend, 0x80008000); code->sub(minuend, subtrahend); code->xor(minuend, 0x80008000); + + reg_alloc.DefineValue(inst, minuend); } void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32(); @@ -1992,106 +1981,76 @@ void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->sub(minuend, subtrahend); code->xor(minuend, 0x80008000); code->xor(minuend, carry); + + reg_alloc.DefineValue(inst, minuend); +} + +void EmitPackedHalvingSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, bool is_signed) { + auto args = reg_alloc.GetArgumentInfo(inst); + + Xbyak::Reg32 reg_a_hi = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); + + // If asx is true, the high word contains the sum and the low word the difference. + // If false, the high word contains the difference and the low word the sum. + bool asx = args[2].GetImmediateU1(); + + if (is_signed) { + code->movsx(reg_a_lo, reg_a_hi.cvt16()); + code->movsx(reg_b_lo, reg_b_hi.cvt16()); + code->sar(reg_a_hi, 16); + code->sar(reg_b_hi, 16); + } else { + code->movzx(reg_a_lo, reg_a_hi.cvt16()); + code->movzx(reg_b_lo, reg_b_hi.cvt16()); + code->shr(reg_a_hi, 16); + code->shr(reg_b_hi, 16); + } + + if (asx) { + // Calculate diff such that reg_a_lo<31:16> contains diff<16:1>. + code->sub(reg_a_lo, reg_b_hi); + code->shl(reg_a_lo, 15); + + // Calculate sum such that reg_a_hi<15:0> contains sum<16:1>. + code->add(reg_a_hi, reg_b_lo); + code->shr(reg_a_hi, 1); + } else { + // Calculate sum such that reg_a_lo<31:16> contains sum<16:1>. + code->add(reg_a_lo, reg_b_hi); + code->shl(reg_a_lo, 15); + + // Calculate diff such that reg_a_hi<15:0> contains diff<16:1>. + code->sub(reg_a_hi, reg_b_lo); + code->shr(reg_a_hi, 1); + } + + // reg_a_lo now contains the low word and reg_a_hi now contains the high word. + // Merge them. + code->shld(reg_a_hi, reg_a_lo, 16); + + reg_alloc.DefineValue(inst, reg_a_hi); } void EmitX64::EmitPackedHalvingSubAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - // If asx is true, the high word contains the sum and the low word the difference. - // If false, the high word contains the difference and the low word the sum. - bool asx = inst->GetArg(2).GetU1(); - - Xbyak::Reg32 reg_a_hi = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); - - code->movzx(reg_a_lo, reg_a_hi.cvt16()); - code->movzx(reg_b_lo, reg_b_hi.cvt16()); - code->shr(reg_a_hi, 16); - code->shr(reg_b_hi, 16); - - if (asx) { - // Calculate diff such that reg_a_lo<31:16> contains diff<16:1>. - code->sub(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate sum such that reg_a_hi<15:0> contains sum<16:1>. - code->add(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } else { - // Calculate sum such that reg_a_lo<31:16> contains sum<16:1>. - code->add(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate diff such that reg_a_hi<15:0> contains diff<16:1>. - code->sub(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } - - // reg_a_lo now contains the low word and reg_a_hi now contains the high word. - // Merge them. - code->shld(reg_a_hi, reg_a_lo, 16); + EmitPackedHalvingSubAdd(code, reg_alloc, inst, false); } void EmitX64::EmitPackedHalvingSubAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - // If asx is true, the high word contains the sum and the low word the difference. - // If false, the high word contains the difference and the low word the sum. - bool asx = inst->GetArg(2).GetU1(); - - Xbyak::Reg32 reg_a_hi = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); - - code->movsx(reg_a_lo, reg_a_hi.cvt16()); - code->movsx(reg_b_lo, reg_b_hi.cvt16()); - code->sar(reg_a_hi, 16); - code->sar(reg_b_hi, 16); - - if (asx) { - // Calculate diff such that reg_a_lo<31:16> contains diff<16:1>. - code->sub(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate sum such that reg_a_hi<15:0> contains sum<16:1>. - code->add(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } else { - // Calculate sum such that reg_a_lo<31:16> contains sum<16:1>. - code->add(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate diff such that reg_a_hi<15:0> contains diff<16:1>. - code->sub(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } - - // reg_a_lo now contains the low word and reg_a_hi now contains the high word. - // Merge them. - code->shld(reg_a_hi, reg_a_lo, 16); + EmitPackedHalvingSubAdd(code, reg_alloc, inst, true); } static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm(); + (code->*fn)(xmm_a, xmm_b); - code->movd(xmm_scratch_a, result); - code->movd(xmm_scratch_b, arg); - - (code->*fn)(xmm_scratch_a, xmm_scratch_b); - - code->movd(result, xmm_scratch_a); + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSaturatedAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { @@ -2224,11 +2183,10 @@ static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_ } static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); - Xbyak::Xmm operand = reg_alloc.UseXmm(b); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm operand = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); if (block.Location().FPSCR().FTZ()) { @@ -2242,14 +2200,15 @@ static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block if (block.Location().FPSCR().DN()) { DefaultNaN32(code, result); } + + reg_alloc.DefineValue(inst, result); } static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); - Xbyak::Xmm operand = reg_alloc.UseXmm(b); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm operand = reg_alloc.UseXmm(args[1]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2263,12 +2222,14 @@ static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block if (block.Location().FPSCR().DN()) { DefaultNaN64(code, result); } + + reg_alloc.DefineValue(inst, result); } static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); if (block.Location().FPSCR().FTZ()) { @@ -2282,12 +2243,14 @@ static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, if (block.Location().FPSCR().DN()) { DefaultNaN32(code, result); } + + reg_alloc.DefineValue(inst, result); } static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2301,76 +2264,76 @@ static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, if (block.Location().FPSCR().DN()) { DefaultNaN64(code, result); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitTransferFromFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0)); - // TODO: Eliminate this. - code->movd(result, source); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitTransferFromFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.DefGpr(inst); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0)); - // TODO: Eliminate this. - code->movq(result, source); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitTransferToFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - if (inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetU32() == 0) { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate() && args[0].GetImmediateU32() == 0) { + Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->xorps(result, result); + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); - Xbyak::Reg32 source = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); - // TODO: Eliminate this. - code->movd(result, source); + reg_alloc.DefineValue(inst, args[0]); } } void EmitX64::EmitTransferToFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - if (inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetU64() == 0) { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); - code->xorpd(result, result); + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate() && args[0].GetImmediateU64() == 0) { + Xbyak::Xmm result = reg_alloc.ScratchXmm(); + code->xorps(result, result); + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); - Xbyak::Reg64 source = reg_alloc.UseGpr(inst->GetArg(0)); - // TODO: Eliminate this. - code->movq(result, source); + reg_alloc.DefineValue(inst, args[0]); } } void EmitX64::EmitFPAbs32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pand(result, code->MFloatNonSignMask32()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPAbs64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pand(result, code->MFloatNonSignMask64()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPNeg32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pxor(result, code->MFloatNegativeZero32()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPNeg64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pxor(result, code->MFloatNegativeZero64()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPAdd32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { @@ -2437,12 +2400,10 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) { } void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - bool quiet = inst->GetArg(2).GetU1(); - - Xbyak::Xmm reg_a = reg_alloc.UseXmm(a); - Xbyak::Xmm reg_b = reg_alloc.UseXmm(b); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]); + Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]); + bool quiet = args[2].GetImmediateU1(); if (quiet) { code->ucomiss(reg_a, reg_b); @@ -2454,12 +2415,10 @@ void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - bool quiet = inst->GetArg(2).GetU1(); - - Xbyak::Xmm reg_a = reg_alloc.UseXmm(a); - Xbyak::Xmm reg_b = reg_alloc.UseXmm(b); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]); + Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]); + bool quiet = args[2].GetImmediateU1(); if (quiet) { code->ucomisd(reg_a, reg_b); @@ -2471,9 +2430,8 @@ void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitFPSingleToDouble(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2486,12 +2444,13 @@ void EmitX64::EmitFPSingleToDouble(RegAlloc& reg_alloc, IR::Block& block, IR::In if (block.Location().FPSCR().DN()) { DefaultNaN64(code, result); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPDoubleToSingle(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2504,29 +2463,29 @@ void EmitX64::EmitFPDoubleToSingle(RegAlloc& reg_alloc, IR::Block& block, IR::In if (block.Location().FPSCR().DN()) { DefaultNaN32(code, result); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // Conversion to double is lossless, and allows for clamping. if (block.Location().FPSCR().FTZ()) { - DenormalsAreZero32(code, from, gpr_scratch); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); // First time is to set flags if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); @@ -2534,21 +2493,20 @@ void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->maxsd(from, code->MFloatMinS32()); // Second time is for real if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr } - code->movd(to, gpr_scratch); + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // Conversion to double is lossless, and allows for accurate clamping. @@ -2559,28 +2517,27 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) { if (block.Location().FPSCR().FTZ()) { - DenormalsAreZero32(code, from, gpr_scratch); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range code->addsd(from, code->MFloatMinS32()); // First time is to set flags - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr // Clamp to output range code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinS32()); // Actually convert - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr // Bring back into original range - code->add(gpr_scratch, u32(2147483648u)); - code->movd(to, gpr_scratch); + code->add(to, u32(2147483648u)); } else { Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32(); if (block.Location().FPSCR().FTZ()) { - DenormalsAreZero32(code, from, gpr_scratch); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); @@ -2593,26 +2550,26 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* // Bring into range if necessary code->addsd(from, xmm_mask); // First time is to set flags - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr // Clamp to output range code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinU32()); // Actually convert - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary - code->add(gpr_scratch, gpr_mask); - code->movd(to, gpr_scratch); + code->add(to, gpr_mask); } + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. @@ -2631,21 +2588,21 @@ void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->maxsd(from, code->MFloatMinS32()); // Second time is for real if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr } - code->movd(to, gpr_scratch); + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // TODO: Use VCVTPD2UDQ when AVX512VL is available. @@ -2664,10 +2621,9 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinS32()); // Actually convert - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr // Bring back into original range - code->add(gpr_scratch, u32(2147483648u)); - code->movd(to, gpr_scratch); + code->add(to, u32(2147483648u)); } else { Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32(); @@ -2690,65 +2646,64 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinU32()); // Actually convert - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary - code->add(gpr_scratch, gpr_mask); - code->movd(to, gpr_scratch); + code->add(to, gpr_mask); } + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPS32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + code->cvtsi2ss(to, from); - code->movd(gpr_scratch, from); - code->cvtsi2ss(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPU32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - // Use a 64-bit register to ensure we don't end up treating the input as signed - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); + // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed + code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code->cvtsi2ss(to, from); - code->movq(gpr_scratch, from); - code->cvtsi2ss(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPS32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + code->cvtsi2sd(to, from); - code->movd(gpr_scratch, from); - code->cvtsi2sd(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - // Use a 64-bit register to ensure we don't end up treating the input as signed - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); + // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed + code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code->cvtsi2sd(to, from); - code->movq(gpr_scratch, from); - code->cvtsi2sd(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } @@ -2761,8 +2716,9 @@ void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) { void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - ASSERT(inst->GetArg(1).IsImmediate()); - Xbyak::Reg32 address = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32(); code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(1)); code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address); @@ -2770,16 +2726,16 @@ void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) template static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) { + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(inst, args[0]); + if (!cb.page_table) { - reg_alloc.HostCall(inst, inst->GetArg(0)); code->CallFunction(fn); return; } using namespace Xbyak::util; - reg_alloc.HostCall(inst, inst->GetArg(0)); - Xbyak::Reg64 result = code->ABI_RETURN; Xbyak::Reg32 vaddr = code->ABI_PARAM1.cvt32(); Xbyak::Reg64 page_index = code->ABI_PARAM3; @@ -2787,26 +2743,26 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U Xbyak::Label abort, end; - code->mov(rax, reinterpret_cast(cb.page_table)); + code->mov(result, reinterpret_cast(cb.page_table)); code->mov(page_index.cvt32(), vaddr); code->shr(page_index.cvt32(), 12); - code->mov(rax, qword[rax + page_index * 8]); - code->test(rax, rax); + code->mov(result, qword[result + page_index * 8]); + code->test(result, result); code->jz(abort); code->mov(page_offset.cvt32(), vaddr); code->and_(page_offset.cvt32(), 4095); switch (bit_size) { case 8: - code->movzx(result, code->byte[rax + page_offset]); + code->movzx(result, code->byte[result + page_offset]); break; case 16: - code->movzx(result, word[rax + page_offset]); + code->movzx(result, word[result + page_offset]); break; case 32: - code->mov(result.cvt32(), dword[rax + page_offset]); + code->mov(result.cvt32(), dword[result + page_offset]); break; case 64: - code->mov(result.cvt64(), qword[rax + page_offset]); + code->mov(result.cvt64(), qword[result + page_offset]); break; default: ASSERT_MSG(false, "Invalid bit_size"); @@ -2820,16 +2776,16 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U template static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) { + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(nullptr, args[0], args[1]); + if (!cb.page_table) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); code->CallFunction(fn); return; } using namespace Xbyak::util; - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - Xbyak::Reg32 vaddr = code->ABI_PARAM1.cvt32(); Xbyak::Reg64 value = code->ABI_PARAM2; Xbyak::Reg64 page_index = code->ABI_PARAM3; @@ -2902,17 +2858,18 @@ void EmitX64::EmitWriteMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) template static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, FunctionPointer fn, bool prepend_high_word) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (prepend_high_word) { + reg_alloc.HostCall(nullptr, args[0], args[1], args[2]); + } else { + reg_alloc.HostCall(nullptr, args[0], args[1]); + } + Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. + using namespace Xbyak::util; Xbyak::Label end; - if (prepend_high_word) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1), inst->GetArg(2)); - } else { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - } - Xbyak::Reg32 passed = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. - code->mov(passed, u32(1)); code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0)); code->je(end); @@ -2929,6 +2886,8 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins code->CallFunction(fn); code->xor_(passed, passed); code->L(end); + + reg_alloc.DefineValue(inst, passed); } void EmitX64::EmitExclusiveWriteMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { @@ -2951,7 +2910,7 @@ static void EmitCoprocessorException() { ASSERT_MSG(false, "Should raise coproc exception here"); } -static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, Jit* jit_interface, Coprocessor::Callback callback, IR::Inst* inst = nullptr, IR::Value arg0 = {}, IR::Value arg1 = {}) { +static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, Jit* jit_interface, Coprocessor::Callback callback, IR::Inst* inst = nullptr, boost::optional arg0 = {}, boost::optional arg1 = {}) { reg_alloc.HostCall(inst, {}, {}, arg0, arg1); code->mov(code->ABI_PARAM1, reinterpret_cast(jit_interface)); @@ -2989,6 +2948,7 @@ void EmitX64::EmitCoprocInternalOperation(RegAlloc& reg_alloc, IR::Block&, IR::I } void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -2998,8 +2958,6 @@ void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i Arm::CoprocReg CRm = static_cast(coproc_info[4]); unsigned opc2 = static_cast(coproc_info[5]); - IR::Value word = inst->GetArg(1); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3012,12 +2970,12 @@ void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, word); + CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, args[1]); return; case 2: { u32* destination_ptr = boost::get(action); - Xbyak::Reg32 reg_word = reg_alloc.UseGpr(word).cvt32(); + Xbyak::Reg32 reg_word = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(destination_ptr)); @@ -3031,6 +2989,7 @@ void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i } void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -3038,9 +2997,6 @@ void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* unsigned opc = static_cast(coproc_info[2]); Arm::CoprocReg CRm = static_cast(coproc_info[3]); - IR::Value word1 = inst->GetArg(1); - IR::Value word2 = inst->GetArg(2); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3053,13 +3009,13 @@ void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, word1, word2); + CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, args[1], args[2]); return; case 2: { auto destination_ptrs = boost::get>(action); - Xbyak::Reg32 reg_word1 = reg_alloc.UseGpr(word1).cvt32(); - Xbyak::Reg32 reg_word2 = reg_alloc.UseGpr(word2).cvt32(); + Xbyak::Reg32 reg_word1 = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 reg_word2 = reg_alloc.UseGpr(args[2]).cvt32(); Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(destination_ptrs[0])); @@ -3101,12 +3057,14 @@ void EmitX64::EmitCoprocGetOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in case 2: { u32* source_ptr = boost::get(action); - Xbyak::Reg32 reg_word = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 reg_word = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg64 reg_source_addr = reg_alloc.ScratchGpr(); code->mov(reg_source_addr, reinterpret_cast(source_ptr)); code->mov(reg_word, code->dword[reg_source_addr]); + reg_alloc.DefineValue(inst, reg_word); + return; } default: @@ -3139,7 +3097,7 @@ void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i case 2: { auto source_ptrs = boost::get>(action); - Xbyak::Reg64 reg_result = reg_alloc.DefGpr(inst); + Xbyak::Reg64 reg_result = reg_alloc.ScratchGpr(); Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); Xbyak::Reg64 reg_tmp = reg_alloc.ScratchGpr(); @@ -3150,6 +3108,8 @@ void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i code->mov(reg_tmp.cvt32(), code->dword[reg_destination_addr]); code->or_(reg_result, reg_tmp); + reg_alloc.DefineValue(inst, reg_result); + return; } default: @@ -3158,6 +3118,7 @@ void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i } void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -3167,8 +3128,6 @@ void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* ins bool has_option = coproc_info[4] != 0; boost::optional option{has_option, coproc_info[5]}; - IR::Value address = inst->GetArg(1); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3181,10 +3140,11 @@ void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* ins return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, address); + CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]); } void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -3194,8 +3154,6 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in bool has_option = coproc_info[4] != 0; boost::optional option{has_option, coproc_info[5]}; - IR::Value address = inst->GetArg(1); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3208,7 +3166,7 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, address); + CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]); } void EmitX64::EmitAddCycles(size_t cycles) { diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 46e003d9..b2dd8276 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -56,9 +56,9 @@ static void EmitMove(BlockOfCode* code, HostLoc to, HostLoc from) { } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { code->mov(HostLocToReg64(to), HostLocToReg64(from)); } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) { - ASSERT_MSG(false, "TODO"); + code->movq(HostLocToXmm(to), HostLocToReg64(from)); } else if (HostLocIsGPR(to) && HostLocIsXMM(from)) { - ASSERT_MSG(false, "TODO"); + code->movq(HostLocToReg64(to), HostLocToXmm(from)); } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { code->movsd(HostLocToXmm(to), SpillToOpArg(from)); } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { @@ -82,6 +82,10 @@ static void EmitExchange(BlockOfCode* code, HostLoc a, HostLoc b) { } } +bool Argument::GetImmediateU1() const { + return value.GetU1(); +} + u8 Argument::GetImmediateU8() const { u64 imm = ImmediateToU64(value); ASSERT(imm < 0x100); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index cbc3e56c..e854ddfe 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -76,6 +76,7 @@ public: return value.IsImmediate(); } + bool GetImmediateU1() const; u8 GetImmediateU8() const; u16 GetImmediateU16() const; u32 GetImmediateU32() const; @@ -113,6 +114,9 @@ public: arg.allocated = true; return HostLocToXmm(UseHostLocReg(arg.value, any_xmm)); } + OpArg UseOpArg(Argument& arg) { + return UseGpr(arg); + } void Use(Argument& arg, HostLoc host_loc) { ASSERT(!arg.allocated); arg.allocated = true; @@ -135,12 +139,9 @@ public: UseScratchHostLocReg(arg.value, {host_loc}); } - void DefineValue(IR::Inst* inst, const Xbyak::Reg64& reg) { - HostLoc hostloc = static_cast(reg.getIdx() + static_cast(HostLoc::RAX)); - DefineValue(inst, hostloc); - } - void DefineValue(IR::Inst* inst, const Xbyak::Xmm& reg) { - HostLoc hostloc = static_cast(reg.getIdx() + static_cast(HostLoc::XMM0)); + void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) { + ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); + HostLoc hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); DefineValue(inst, hostloc); } void DefineValue(IR::Inst* inst, Argument& arg) {