diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d4811f6e..078d9d84 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -358,7 +358,7 @@ elseif(ARCHITECTURE_Aarch64) # backend/A64/emit_a64_aes.cpp # backend/A64/emit_a64_crc32.cpp backend/A64/emit_a64_data_processing.cpp - # backend/A64/emit_a64_floating_point.cpp + backend/A64/emit_a64_floating_point.cpp # backend/A64/emit_a64_packed.cpp # backend/A64/emit_a64_saturation.cpp # backend/A64/emit_a64_sm4.cpp diff --git a/src/backend/A64/emit_a64_floating_point.cpp b/src/backend/A64/emit_a64_floating_point.cpp index 708fb46c..94ce042a 100644 --- a/src/backend/A64/emit_a64_floating_point.cpp +++ b/src/backend/A64/emit_a64_floating_point.cpp @@ -297,4 +297,262 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) ctx.reg_alloc.DefineValue(inst, result); } } // anonymous namespace + +void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); + + code.fp_emitter.FCVT(64, 16, result, result); + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); + code.fp_emitter.FCVT(32, 16, result, result); + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); + + code.fp_emitter.FCVT(64, 32, result, result); + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); + code.fp_emitter.FCVT(16, 32, result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + code.fp_emitter.FCVT(16, 64, result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + code.fp_emitter.FCVT(32, 64, result, result); + ctx.reg_alloc.DefineValue(inst, result); +} + +template +static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const size_t fbits = args[1].GetImmediateU8(); + const auto rounding_mode = static_cast(args[2].GetImmediateU8()); + const auto round_imm = ConvertRoundingModeToA64RoundingMode(rounding_mode); + + ASSERT_MSG(fbits == 0, "fixed point conversions are not supported yet"); + + ARM64Reg src = ctx.reg_alloc.UseScratchFpr(args[0]); + ARM64Reg result = ctx.reg_alloc.ScratchGpr(); + src = fsize == 64 ? EncodeRegToDouble(src) : EncodeRegToSingle(src); + result = isize == 64 ? result : DecodeReg(result); + + if constexpr (unsigned_) { + code.fp_emitter.FCVTU(result, src, round_imm); + } + else { + code.fp_emitter.FCVTS(result, src, round_imm); + } + + ctx.reg_alloc.DefineValue(inst, result); + +} + +void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<64, false, 32>(code, ctx, inst); +} + +void EmitA64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<64, false, 64>(code, ctx, inst); +} + +void EmitA64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<64, true, 32>(code, ctx, inst); +} + +void EmitA64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<64, true, 64>(code, ctx, inst); +} + +void EmitA64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<32, false, 32>(code, ctx, inst); +} + +void EmitA64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<32, false, 64>(code, ctx, inst); +} + +void EmitA64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<32, true, 32>(code, ctx, inst); +} + +void EmitA64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<32, true, 64>(code, ctx, inst); +} + +void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.SCVTF(result, from, fbits); + } + else { + code.fp_emitter.SCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.UCVTF(result, from, fbits); + } + else { + code.fp_emitter.UCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.SCVTF(result, from, fbits); + } + else { + code.fp_emitter.SCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.SCVTF(result, from, fbits); + } + else { + code.fp_emitter.SCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.SCVTF(result, from, fbits); + } + else { + code.fp_emitter.SCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0])); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.UCVTF(result, from, fbits); + } + else { + code.fp_emitter.UCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + + const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.UCVTF(result, from, fbits); + } + else { + code.fp_emitter.UCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + + const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr()); + const size_t fbits = args[1].GetImmediateU8(); + const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); + ASSERT(rounding_mode == ctx.FPSCR_RMode()); + + if (fbits != 0) { + code.fp_emitter.UCVTF(result, from, fbits); + } + else { + code.fp_emitter.UCVTF(result, from); + } + + ctx.reg_alloc.DefineValue(inst, result); +} } // namespace Dynarmic::BackendX64 diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index 1723184a..46866b42 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -515,32 +515,32 @@ OPCODE(CountLeadingZeros32, U32, U32 //OPCODE(FPSub64, U64, U64, U64 ) // Floating-point conversions -//OPCODE(FPHalfToDouble, U64, U16, U8 ) -//OPCODE(FPHalfToSingle, U32, U16, U8 ) -//OPCODE(FPSingleToDouble, U64, U32, U8 ) -//OPCODE(FPSingleToHalf, U16, U32, U8 ) -//OPCODE(FPDoubleToHalf, U16, U64, U8 ) -//OPCODE(FPDoubleToSingle, U32, U64, U8 ) -//OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 ) -//OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) -//OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) -//OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) +OPCODE(FPHalfToDouble, U64, U16, U8 ) +OPCODE(FPHalfToSingle, U32, U16, U8 ) +OPCODE(FPSingleToDouble, U64, U32, U8 ) +OPCODE(FPSingleToHalf, U16, U32, U8 ) +OPCODE(FPDoubleToHalf, U16, U64, U8 ) +OPCODE(FPDoubleToSingle, U32, U64, U8 ) +OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 ) +OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) +OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) +OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) //OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 ) //OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 ) //OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 ) //OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 ) -//OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) -//OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) -//OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) -//OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 ) -//OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 ) -//OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 ) -//OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 ) -//OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 ) -//OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 ) -//OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 ) -//OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 ) -//OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 ) +OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) +OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) +OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) +OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 ) +OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 ) +OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 ) +OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 ) +OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 ) +OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 ) +OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 ) +OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 ) +OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 ) // Floating-point vector instructions //OPCODE(FPVectorAbs16, U128, U128 ) diff --git a/src/frontend/A32/decoder/vfp2_a64.inc b/src/frontend/A32/decoder/vfp2_a64.inc index c1a42494..33cc0103 100644 --- a/src/frontend/A32/decoder/vfp2_a64.inc +++ b/src/frontend/A32/decoder/vfp2_a64.inc @@ -26,10 +26,10 @@ //INST(vfp_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm") // VFPv2 //INST(vfp_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm") // VFPv2 //INST(vfp_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm") // VFPv2 -//INST(vfp_VCVT_f_to_f, "VCVT (f32<->f64)", "cccc11101D110111dddd101z11M0mmmm") // VFPv2 -//INST(vfp_VCVT_to_float, "VCVT (to float)", "cccc11101D111000dddd101zs1M0mmmm") // VFPv2 -//INST(vfp_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm") // VFPv2 -//INST(vfp_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm") // VFPv2 +INST(vfp_VCVT_f_to_f, "VCVT (f32<->f64)", "cccc11101D110111dddd101z11M0mmmm") // VFPv2 +INST(vfp_VCVT_to_float, "VCVT (to float)", "cccc11101D111000dddd101zs1M0mmmm") // VFPv2 +INST(vfp_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm") // VFPv2 +INST(vfp_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm") // VFPv2 //INST(vfp_VCMP, "VCMP", "cccc11101D110100dddd101zE1M0mmmm") // VFPv2 //INST(vfp_VCMP_zero, "VCMP (with zero)", "cccc11101D110101dddd101zE1000000") // VFPv2