From 506e544bfe38b57cffc157d9dffe71db80696102 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 23 Jul 2018 22:02:28 +0100 Subject: [PATCH] IR: Implement FPRSqrtStepFused --- src/backend_x64/emit_x64_floating_point.cpp | 17 +++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 7 +++++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 2 ++ 4 files changed, 27 insertions(+) diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 16bc49ce..007f6438 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -912,6 +912,23 @@ void EmitX64::EmitFPRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) { EmitFPRSqrtEstimate(code, ctx, inst); } +template +static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, args[0], args[1]); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRSqrtStepFused); +} + +void EmitX64::EmitFPRSqrtStepFused32(EmitContext& ctx, IR::Inst* inst) { + EmitFPRSqrtStepFused(code, ctx, inst); +} + +void EmitX64::EmitFPRSqrtStepFused64(EmitContext& ctx, IR::Inst* inst) { + EmitFPRSqrtStepFused(code, ctx, inst); +} + void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index df822fe4..da07d05d 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1463,6 +1463,13 @@ U32U64 IREmitter::FPRSqrtEstimate(const U32U64& a) { return Inst(Opcode::FPRSqrtEstimate64, a); } +U32U64 IREmitter::FPRSqrtStepFused(const U32U64& a, const U32U64& b) { + if (a.GetType() == Type::U32) { + return Inst(Opcode::FPRSqrtStepFused32, a, b); + } + return Inst(Opcode::FPRSqrtStepFused64, a, b); +} + U32U64 IREmitter::FPSqrt(const U32U64& a) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPSqrt32, a); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 2a4c240c..dde77933 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -268,6 +268,7 @@ public: U32U64 FPNeg(const U32U64& a); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRSqrtEstimate(const U32U64& a); + U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b); U32U64 FPSqrt(const U32U64& a); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 9e613abb..63756145 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -388,6 +388,8 @@ OPCODE(FPRoundInt32, T::U32, T::U32, T::U OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 ) OPCODE(FPRSqrtEstimate32, T::U32, T::U32 ) OPCODE(FPRSqrtEstimate64, T::U64, T::U64 ) +OPCODE(FPRSqrtStepFused32, T::U32, T::U32, T::U32 ) +OPCODE(FPRSqrtStepFused64, T::U64, T::U64, T::U64 ) OPCODE(FPSqrt32, T::U32, T::U32 ) OPCODE(FPSqrt64, T::U64, T::U64 ) OPCODE(FPSub32, T::U32, T::U32, T::U32 )