From 9c789ded58652c69860a72fbef6a8791830faff3 Mon Sep 17 00:00:00 2001 From: SachinVin Date: Sun, 14 Jul 2019 22:06:28 +0530 Subject: [PATCH] backend\A64\emit_a64_floating_point.cpp: Implement VABS VNEG VCMP and a few others --- src/backend/A64/emit_a64_floating_point.cpp | 101 ++++++++++++++++++++ src/backend/A64/opcodes.inc | 16 ++-- src/frontend/A32/decoder/vfp2_a64.inc | 28 +++--- 3 files changed, 123 insertions(+), 22 deletions(-) diff --git a/src/backend/A64/emit_a64_floating_point.cpp b/src/backend/A64/emit_a64_floating_point.cpp index 94ce042a..2b1999f1 100644 --- a/src/backend/A64/emit_a64_floating_point.cpp +++ b/src/backend/A64/emit_a64_floating_point.cpp @@ -298,6 +298,107 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) } } // anonymous namespace +//void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]); +// +// code.pand(result, code.MConst(xword, f16_non_sign_mask)); +// +// ctx.reg_alloc.DefineValue(inst, result); +//} + +void EmitA64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); + + code.fp_emitter.FABS(result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + + code.fp_emitter.FABS(result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +//void EmitA64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) { +// auto args = ctx.reg_alloc.GetArgumentInfo(inst); +// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]); +// +// code.pxor(result, code.MConst(xword, f16_negative_zero)); +// +// ctx.reg_alloc.DefineValue(inst, result); +//} + +void EmitA64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); + + code.fp_emitter.FNEG(result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + + code.fp_emitter.FNEG(result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { + FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT); +} + +void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) { + FPTwoOp<64>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT); +} + +static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { + ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr(); + // Fpsr's nzcv is copied across integer nzcv + code.MRS(nzcv, FIELD_NZCV); + return nzcv; +} + +void EmitA64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ARM64Reg reg_a = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[0])); + ARM64Reg reg_b = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[1])); + bool exc_on_qnan = args[2].GetImmediateU1(); + + if (exc_on_qnan) { + code.fp_emitter.FCMPE(reg_a, reg_b); + } else { + code.fp_emitter.FCMP(reg_a, reg_b); + } + + ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx); + ctx.reg_alloc.DefineValue(inst, nzcv); +} + +void EmitA64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const ARM64Reg reg_a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[0])); + const ARM64Reg reg_b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + bool exc_on_qnan = args[2].GetImmediateU1(); + + if (exc_on_qnan) { + code.fp_emitter.FCMPE(reg_a, reg_b); + } else { + code.fp_emitter.FCMP(reg_a, reg_b); + } + + ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx); + ctx.reg_alloc.DefineValue(inst, nzcv); +} + void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0])); diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index 46866b42..253626d9 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -465,14 +465,14 @@ OPCODE(CountLeadingZeros32, U32, U32 // Floating-point operations //OPCODE(FPAbs16, U16, U16 ) -//OPCODE(FPAbs32, U32, U32 ) -//OPCODE(FPAbs64, U64, U64 ) //OPCODE(FPAdd32, U32, U32, U32 ) //OPCODE(FPAdd64, U64, U64, U64 ) -//OPCODE(FPCompare32, NZCV, U32, U32, U1 ) -//OPCODE(FPCompare64, NZCV, U64, U64, U1 ) //OPCODE(FPDiv32, U32, U32, U32 ) //OPCODE(FPDiv64, U64, U64, U64 ) +OPCODE(FPAbs32, U32, U32 ) +OPCODE(FPAbs64, U64, U64 ) +OPCODE(FPCompare32, NZCV, U32, U32, U1 ) +OPCODE(FPCompare64, NZCV, U64, U64, U1 ) //OPCODE(FPMax32, U32, U32, U32 ) //OPCODE(FPMax64, U64, U64, U64 ) //OPCODE(FPMaxNumeric32, U32, U32, U32 ) @@ -489,8 +489,8 @@ OPCODE(CountLeadingZeros32, U32, U32 //OPCODE(FPMulX32, U32, U32, U32 ) //OPCODE(FPMulX64, U64, U64, U64 ) //OPCODE(FPNeg16, U16, U16 ) -//OPCODE(FPNeg32, U32, U32 ) -//OPCODE(FPNeg64, U64, U64 ) +OPCODE(FPNeg32, U32, U32 ) +OPCODE(FPNeg64, U64, U64 ) //OPCODE(FPRecipEstimate16, U16, U16 ) //OPCODE(FPRecipEstimate32, U32, U32 ) //OPCODE(FPRecipEstimate64, U64, U64 ) @@ -509,10 +509,10 @@ OPCODE(CountLeadingZeros32, U32, U32 //OPCODE(FPRSqrtStepFused16, U16, U16, U16 ) //OPCODE(FPRSqrtStepFused32, U32, U32, U32 ) //OPCODE(FPRSqrtStepFused64, U64, U64, U64 ) -//OPCODE(FPSqrt32, U32, U32 ) -//OPCODE(FPSqrt64, U64, U64 ) //OPCODE(FPSub32, U32, U32, U32 ) //OPCODE(FPSub64, U64, U64, U64 ) +OPCODE(FPSqrt32, U32, U32 ) +OPCODE(FPSqrt64, U64, U64 ) // Floating-point conversions OPCODE(FPHalfToDouble, U64, U16, U8 ) diff --git a/src/frontend/A32/decoder/vfp2_a64.inc b/src/frontend/A32/decoder/vfp2_a64.inc index 128b34cf..8d211b4f 100644 --- a/src/frontend/A32/decoder/vfp2_a64.inc +++ b/src/frontend/A32/decoder/vfp2_a64.inc @@ -23,25 +23,25 @@ INST(vfp_VMOV_f64_2u32, "VMOV (f64 to 2xcore)", "cccc11000101uuuutttt10110 INST(vfp_VMOV_reg, "VMOV (reg)", "cccc11101D110000dddd101z01M0mmmm") // VFPv2 // Floating-point other instructions -//INST(vfp_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm") // VFPv2 -//INST(vfp_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm") // VFPv2 -//INST(vfp_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm") // VFPv2 +INST(vfp_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm") // VFPv2 +INST(vfp_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm") // VFPv2 +INST(vfp_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm") // VFPv2 INST(vfp_VCVT_f_to_f, "VCVT (f32<->f64)", "cccc11101D110111dddd101z11M0mmmm") // VFPv2 INST(vfp_VCVT_to_float, "VCVT (to float)", "cccc11101D111000dddd101zs1M0mmmm") // VFPv2 INST(vfp_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm") // VFPv2 INST(vfp_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm") // VFPv2 -//INST(vfp_VCMP, "VCMP", "cccc11101D110100dddd101zE1M0mmmm") // VFPv2 -//INST(vfp_VCMP_zero, "VCMP (with zero)", "cccc11101D110101dddd101zE1000000") // VFPv2 +INST(vfp_VCMP, "VCMP", "cccc11101D110100dddd101zE1M0mmmm") // VFPv2 +INST(vfp_VCMP_zero, "VCMP (with zero)", "cccc11101D110101dddd101zE1000000") // VFPv2 // Floating-point system register access -//INST(vfp_VMSR, "VMSR", "cccc111011100001tttt101000010000") // VFPv2 -//INST(vfp_VMRS, "VMRS", "cccc111011110001tttt101000010000") // VFPv2 +INST(vfp_VMSR, "VMSR", "cccc111011100001tttt101000010000") // VFPv2 +INST(vfp_VMRS, "VMRS", "cccc111011110001tttt101000010000") // VFPv2 // Extension register load-store instructions -//INST(vfp_VPUSH, "VPUSH", "cccc11010D101101dddd101zvvvvvvvv") // VFPv2 -//INST(vfp_VPOP, "VPOP", "cccc11001D111101dddd101zvvvvvvvv") // VFPv2 -//INST(vfp_VLDR, "VLDR", "cccc1101UD01nnnndddd101zvvvvvvvv") // VFPv2 -//INST(vfp_VSTR, "VSTR", "cccc1101UD00nnnndddd101zvvvvvvvv") // VFPv2 -//INST(vfp_VSTM_a1, "VSTM (A1)", "cccc110puDw0nnnndddd1011vvvvvvvv") // VFPv2 -//INST(vfp_VSTM_a2, "VSTM (A2)", "cccc110puDw0nnnndddd1010vvvvvvvv") // VFPv2 -//INST(vfp_VLDM_a2, "VLDM (A2)", "cccc110puDw1nnnndddd1010vvvvvvvv") // VFPv2 +INST(vfp_VPUSH, "VPUSH", "cccc11010D101101dddd101zvvvvvvvv") // VFPv2 +INST(vfp_VPOP, "VPOP", "cccc11001D111101dddd101zvvvvvvvv") // VFPv2 +INST(vfp_VLDR, "VLDR", "cccc1101UD01nnnndddd101zvvvvvvvv") // VFPv2 +INST(vfp_VSTR, "VSTR", "cccc1101UD00nnnndddd101zvvvvvvvv") // VFPv2 +INST(vfp_VSTM_a1, "VSTM (A1)", "cccc110puDw0nnnndddd1011vvvvvvvv") // VFPv2 +INST(vfp_VSTM_a2, "VSTM (A2)", "cccc110puDw0nnnndddd1010vvvvvvvv") // VFPv2 +INST(vfp_VLDM_a2, "VLDM (A2)", "cccc110puDw1nnnndddd1010vvvvvvvv") // VFPv2