From 3f6c529da26fbfc9daa1af74c3641ef5196a58ca Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 8 May 2018 11:18:13 -0400 Subject: [PATCH] ir: Add opcode to perform the vector conversion S64->F64 Unfortunately x86 prior to AVX-512 doesn't really give us any convenient instruction to do the work for us --- .../emit_x64_vector_floating_point.cpp | 41 +++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 4 ++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 1 + 4 files changed, 47 insertions(+) diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 9ebbcb1e..9f258145 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -206,6 +206,47 @@ void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm); } +void EmitX64::EmitFPVectorS64ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) { + code.vcvtqq2pd(xmm, xmm); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + + // First quadword + code.movq(tmp, xmm); + code.cvtsi2sd(xmm, tmp); + + // Second quadword + code.pextrq(tmp, xmm, 1); + code.cvtsi2sd(xmm_tmp, tmp); + + // Combine + code.unpcklpd(xmm, xmm_tmp); + } else { + const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + + // First quadword + code.movhlps(high_xmm, xmm); + code.movq(tmp, xmm); + code.cvtsi2sd(xmm, tmp); + + // Second quadword + code.movq(tmp, high_xmm); + code.cvtsi2sd(xmm_tmp, tmp); + + // Combine + code.unpcklpd(xmm, xmm_tmp); + } + + ctx.reg_alloc.DefineValue(inst, xmm); +} + void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 87a32049..962f7027 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1488,6 +1488,10 @@ U128 IREmitter::FPVectorS32ToSingle(const U128& a) { return Inst(Opcode::FPVectorS32ToSingle, a); } +U128 IREmitter::FPVectorS64ToDouble(const U128& a) { + return Inst(Opcode::FPVectorS64ToDouble, a); +} + void IREmitter::Breakpoint() { Inst(Opcode::Breakpoint); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a95c7010..a9d91cdb 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -274,6 +274,7 @@ public: U128 FPVectorMul(size_t esize, const U128& a, const U128& b); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); U128 FPVectorS32ToSingle(const U128& a); + U128 FPVectorS64ToDouble(const U128& a); void Breakpoint(); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 48901af0..0f442998 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -392,6 +392,7 @@ OPCODE(FPVectorDiv64, T::U128, T::U128, T::U OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorS32ToSingle, T::U128, T::U128 ) +OPCODE(FPVectorS64ToDouble, T::U128, T::U128 ) OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )