diff --git a/src/backend/A64/emit_a64_data_processing.cpp b/src/backend/A64/emit_a64_data_processing.cpp index 8f548af8..e565312e 100644 --- a/src/backend/A64/emit_a64_data_processing.cpp +++ b/src/backend/A64/emit_a64_data_processing.cpp @@ -191,4 +191,530 @@ void EmitA64::EmitConditionalSelect64(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitConditionalSelectNZCV(EmitContext& ctx, IR::Inst* inst) { EmitConditionalSelect(code, ctx, inst, 32); } + +void EmitA64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + + if (!carry_inst) { + if (shift_arg.IsImmediate()) { + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + u8 shift = shift_arg.GetImmediateU8(); + + if (shift <= 31) { + code.LSL(result, result, shift); + } else { + code.MOV(result, WZR); + } + + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.Use(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = ctx.reg_alloc.UseScratchGpr(operand_arg); + + code.ANDI2R(shift, shift, 0xFF); + code.LSLV(result, result, shift); + code.CMPI2R(shift, 32); + code.CSEL(result, WZR, DecodeReg(result), CC_GE); + ctx.reg_alloc.DefineValue(inst, DecodeReg(result)); + } + } else { + if (shift_arg.IsImmediate()) { + u8 shift = shift_arg.GetImmediateU8(); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + if (shift == 0) { + // There is nothing more to do. + } else if (shift < 32) { + code.LSL(carry, result, shift - 1); + code.LSR(carry, carry, 31); + code.LSL(result, result, shift); + } else if (shift > 32) { + code.MOV(result, WZR); + code.MOV(carry, WZR); + } else { + code.ANDI2R(carry, result, 1); + code.MOV(result, WZR); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.Use(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + // TODO: Optimize this. + // TODO: Use CSEL instead? + FixupBranch Rs_gt32, Rs_eq32; + std::vector end; + + code.ANDI2R(shift, shift, 0xFF); + code.CMP(shift, WZR); + // if (Rs & 0xFF == 0) { + end.push_back(code.B(CC_EQ)); + // } + code.CMPI2R(shift, 32); + Rs_gt32 = code.B(CC_GT); + Rs_eq32 = code.B(CC_EQ); + // } else if (Rs & 0xFF < 32) { + code.SUBI2R(shift, shift, 1); // Subtract 1 to get the bit that is shiftedout, into the MSB. + code.LSLV(result, result, shift); + code.UBFX(carry, result, 31, 1); + code.LSL(result, result, 1); + end.push_back(code.B()); + // } else if (Rs & 0xFF > 32) { + code.SetJumpTarget(Rs_gt32); + code.MOV(result, WZR); + code.MOV(carry, WZR); + end.push_back(code.B()); + // } else if (Rs & 0xFF == 32) { + code.SetJumpTarget(Rs_eq32); + code.ANDI2R(carry, result, 1); + code.MOV(result, WZR); + // } + + for (FixupBranch e : end) { + code.SetJumpTarget(e); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } + } +} + +//void EmitA64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// auto& operand_arg = args[0]; +// auto& shift_arg = args[1]; +// +// if (shift_arg.IsImmediate()) { +// Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); +// u8 shift = shift_arg.GetImmediateU8(); +// +// if (shift < 64) { +// code.shl(result, shift); +// } else { +// code.xor_(result.cvt32(), result.cvt32()); +// } +// +// ctx.reg_alloc.DefineValue(inst, result); +// } else { +// ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); +// Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); +// Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(); +// +// // The x64 SHL instruction masks the shift count by 0x1F before performing the shift. +// // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. +// +// code.shl(result, code.cl); +// code.xor_(zero.cvt32(), zero.cvt32()); +// code.cmp(code.cl, 64); +// code.cmovnb(result, zero); +// +// ctx.reg_alloc.DefineValue(inst, result); +// } +//} + +void EmitA64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + + if (!carry_inst) { + if (shift_arg.IsImmediate()) { + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + u8 shift = shift_arg.GetImmediateU8(); + + if (shift <= 31) { + code.LSR(result,result, shift); + } else { + code.MOVI2R(result, 0); + } + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.Use(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + + // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. + // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. + + code.ANDI2R(shift, shift, 0xFF); + code.LSRV(result, result, shift); + code.CMPI2R(shift, 31); + code.CSEL(result, WZR, result, CC_GT); + + ctx.reg_alloc.DefineValue(inst, result); + } + } else { + if (shift_arg.IsImmediate()) { + u8 shift = shift_arg.GetImmediateU8(); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + if (shift == 0) { + // There is nothing more to do. + } else if (shift < 32) { + code.LSR(carry, result, shift - 1); + code.ANDI2R(carry, carry, 1); + code.LSR(result,result, shift); + } else if (shift == 32) { + code.LSR(carry, result, 31); + code.ANDI2R(carry, carry, 1); + code.MOV(result, WZR); + } else { + code.MOV(result, WZR); + code.MOV(carry, WZR); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.Use(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + // TODO: Optimize this. + // TODO: Use CSEL instead? + FixupBranch Rs_gt32 , Rs_eq32; + std::vector end; + + code.ANDI2R(shift, shift, 0xFF); + code.CMPI2R(shift, 32); + Rs_gt32 = code.B(CC_GT); + Rs_eq32 = code.B(CC_EQ); + // if (Rs & 0xFF == 0) goto end; + code.CMP(shift, WZR); + end.push_back(code.B(CC_EQ)); + // if (Rs & 0xFF <= 32) { + code.SUBI2R(shift, shift, 1); // Subtract 1 to get the bit that is shifted out to the carry. + code.LSRV(result, result, shift); + code.ANDI2R(carry, result, 1); + code.LSR(result, result, 1); + end.push_back(code.B()); + // else if (Rs & 0xFF == 32) { + code.SetJumpTarget(Rs_eq32); + code.LSR(carry, result, 31); + code.ANDI2R(carry, carry, 1); + code.MOV(result, WZR); + end.push_back(code.B()); + // } else if (Rs & 0xFF > 32) { + code.SetJumpTarget(Rs_gt32); + code.MOV(result, WZR); + code.MOV(carry, WZR); + // } + + for(FixupBranch e : end){ + code.SetJumpTarget(e); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } + } +} + +void EmitA64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + ARM64Reg result = ctx.reg_alloc.UseScratchGpr(operand_arg); + u8 shift = shift_arg.GetImmediateU8(); + + if (shift < 64) { + code.LSR(result, result, shift); + } else { + code.MOV(result, ZR); + } + + ctx.reg_alloc.DefineValue(inst, result); + } else { + ARM64Reg shift = ctx.reg_alloc.UseScratchGpr(shift_arg); + ARM64Reg result = ctx.reg_alloc.UseScratchGpr(operand_arg); + + code.ANDI2R(shift, shift, 0xFF); + code.LSRV(result, result, shift); + code.CMP(shift, 63); + code.CSEL(result, WZR, result, CC_GT); + + ctx.reg_alloc.DefineValue(inst, result); + } +} + +void EmitA64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + + if (!carry_inst) { + if (shift_arg.IsImmediate()) { + u8 shift = shift_arg.GetImmediateU8(); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + + code.ASR(result, result, u8(shift < 31 ? shift : 31)); + + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.UseScratch(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg const31 = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + // The 32-bit arm64 SAR instruction masks the shift count by 0x1F before performing the shift. + // ARM differs from the behaviour: It does not mask the count. + + // We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31. + code.ANDI2R(shift, shift, 0xFF); + code.MOVI2R(const31, 31); + code.CMPI2R(shift, u32(31)); + code.CSEL(shift, shift, const31, CC_LE); + code.ASRV(result, result, shift); + + ctx.reg_alloc.DefineValue(inst, result); + } + } else { + if (shift_arg.IsImmediate()) { + u8 shift = shift_arg.GetImmediateU8(); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + if (shift == 0) { + // There is nothing more to do. + } else if (shift <= 31) { + code.ASR(result, result, shift - 1); + code.ANDI2R(carry, result, 1); + code.ASR(result, result, 1); + } else { + code.ASR(result, result, 31); + code.ANDI2R(carry, result, 1); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.Use(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + // TODO: Optimize this. + + std::vector end; + FixupBranch Rs_gt31; + + code.ANDI2R(shift, shift, 0xFF); + code.CMPI2R(shift, u32(31)); + Rs_gt31 = code.B(CC_GT); + // if (Rs & 0xFF == 0) goto end; + code.CMP(shift, WZR); + end.push_back(code.B(CC_EQ)); + // if (Rs & 0xFF <= 31) { + code.SUBI2R(shift, shift, 1); + code.ASRV(result, result, shift); + code.ANDI2R(carry, result, 1); + code.ASR(result, result, 1); + end.push_back(code.B()); + // } else if (Rs & 0xFF > 31) { + code.SetJumpTarget(Rs_gt31); + code.ASR(result, result, 31); // 31 produces the same results as anything above 31 + code.ANDI2R(carry, result, 1); + // } + + for (FixupBranch e : end) { + code.SetJumpTarget(e); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } + } +} + +//void EmitA64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// auto& operand_arg = args[0]; +// auto& shift_arg = args[1]; +// +// if (shift_arg.IsImmediate()) { +// u8 shift = shift_arg.GetImmediateU8(); +// Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); +// +// code.sar(result, u8(shift < 63 ? shift : 63)); +// +// ctx.reg_alloc.DefineValue(inst, result); +// } else { +// ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); +// Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); +// Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr(); +// +// // The 64-bit x64 SAR instruction masks the shift count by 0x3F before performing the shift. +// // ARM differs from the behaviour: It does not mask the count. +// +// // We note that all shift values above 63 have the same behaviour as 63 does, so we saturate `shift` to 63. +// code.mov(const63, 63); +// code.movzx(code.ecx, code.cl); +// code.cmp(code.ecx, u32(63)); +// code.cmovg(code.ecx, const63); +// code.sar(result, code.cl); +// +// ctx.reg_alloc.DefineValue(inst, result); +// } +//} + +void EmitA64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + + if (!carry_inst) { + if (shift_arg.IsImmediate()) { + u8 shift = shift_arg.GetImmediateU8(); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + + code.ROR(result, result, u8(shift & 0x1F)); + + ctx.reg_alloc.DefineValue(inst, result); + } else { + ctx.reg_alloc.Use(shift_arg, HostLoc::X0); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + + // aarch64 ROR instruction does (shift & 0x1F) for us. + code.RORV(result, result, W0); + + ctx.reg_alloc.DefineValue(inst, result); + } + } else { + if (shift_arg.IsImmediate()) { + u8 shift = shift_arg.GetImmediateU8(); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + if (shift == 0) { + // There is nothing more to do. + } else if ((shift & 0x1F) == 0) { + code.MOV(carry, result, ArithOption{result, ST_LSR, 31}); + } else { + code.ROR(result, result, (shift & 0x1F) - 1); + code.ANDI2R(carry, result, 1); + code.ROR(result, result, 1); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } else { + //ctx.reg_alloc.UseScratch(shift_arg, HostLoc::X0) + Arm64Gen::ARM64Reg shift = DecodeReg(ctx.reg_alloc.UseScratchGpr(shift_arg)); + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(operand_arg)); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(carry_arg)); + + // TODO: Optimize + + std::vector end; + FixupBranch zero_1F; + + code.ANDSI2R(shift, shift, u32(0xFF)); + // if (Rs & 0xFF == 0) goto end; + code.CMP(shift, WZR); + end.push_back(code.B(CC_EQ)); + code.ANDSI2R(shift, shift, u32(0x1F)); + zero_1F = code.B(CC_EQ); + // if (Rs & 0x1F != 0) { + code.SUBI2R(shift, shift, 1); + code.RORV(result, result, shift); + code.ANDI2R(carry, result, 1); + code.ROR(result, result, 1); + end.push_back(code.B()); + // } else { + code.SetJumpTarget(zero_1F); + code.MOV(carry, result, ArithOption{result, ST_LSR, 31}); + // } + + for (FixupBranch e : end) { + code.SetJumpTarget(e); + } + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); + } + } +} + +//void EmitA64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// auto& operand_arg = args[0]; +// auto& shift_arg = args[1]; +// +// if (shift_arg.IsImmediate()) { +// u8 shift = shift_arg.GetImmediateU8(); +// Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); +// +// code.ror(result, u8(shift & 0x3F)); +// +// ctx.reg_alloc.DefineValue(inst, result); +// } else { +// ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); +// Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); +// +// // x64 ROR instruction does (shift & 0x3F) for us. +// code.ror(result, code.cl); +// +// ctx.reg_alloc.DefineValue(inst, result); +// } +//} + +void EmitA64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0])); + Arm64Gen::ARM64Reg carry = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[1])); + Arm64Gen::ARM64Reg temp = DecodeReg(ctx.reg_alloc.ScratchGpr()); + + if (carry_inst) { + code.MOV(temp, result); + } + + // Set carry to the LSB and ROR + code.BFI(result, carry, 0, 1); + code.ROR(result, result, 1); + + if (carry_inst) { + code.ANDI2R(carry, temp, 1); + + ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + } + + ctx.reg_alloc.DefineValue(inst, result); +} } // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index d1dff550..2ebd7bc3 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -97,24 +97,24 @@ OPCODE(MostSignificantBit, U1, U32 OPCODE(IsZero32, U1, U32 ) OPCODE(IsZero64, U1, U64 ) //OPCODE(TestBit, U1, U64, U8 ) -//OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 ) OPCODE(ConditionalSelect32, U32, Cond, U32, U32 ) OPCODE(ConditionalSelect64, U64, Cond, U64, U64 ) OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV ) +OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 ) //OPCODE(LogicalShiftLeft64, U64, U64, U8 ) -//OPCODE(LogicalShiftRight32, U32, U32, U8, U1 ) -//OPCODE(LogicalShiftRight64, U64, U64, U8 ) -//OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 ) +OPCODE(LogicalShiftRight32, U32, U32, U8, U1 ) +OPCODE(LogicalShiftRight64, U64, U64, U8 ) +OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 ) //OPCODE(ArithmeticShiftRight64, U64, U64, U8 ) -//OPCODE(RotateRight32, U32, U32, U8, U1 ) +OPCODE(RotateRight32, U32, U32, U8, U1 ) //OPCODE(RotateRight64, U64, U64, U8 ) -//OPCODE(RotateRightExtended, U32, U32, U1 ) //OPCODE(Add32, U32, U32, U32, U1 ) //OPCODE(Add64, U64, U64, U64, U1 ) //OPCODE(Sub32, U32, U32, U32, U1 ) //OPCODE(Sub64, U64, U64, U64, U1 ) //OPCODE(Mul32, U32, U32, U32 ) //OPCODE(Mul64, U64, U64, U64 ) +OPCODE(RotateRightExtended, U32, U32, U1 ) //OPCODE(SignedMultiplyHigh64, U64, U64, U64 ) //OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 ) //OPCODE(UnsignedDiv32, U32, U32, U32 )