diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 17b74679..8a468fbf 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -17,6 +17,79 @@ namespace BackendX64 { using namespace Xbyak::util; +void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + u8 index = args[1].GetImmediateU8(); + + Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + + if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + code->pextrb(dest, source, index); + ctx.reg_alloc.DefineValue(inst, dest); + } else { + Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + code->pextrw(dest, source, index); + ctx.reg_alloc.DefineValue(inst, dest); + } +} + +void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + u8 index = args[1].GetImmediateU8(); + + Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + code->pextrw(dest, source, index); + ctx.reg_alloc.DefineValue(inst, dest); +} + +void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + u8 index = args[1].GetImmediateU8(); + + Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + + if (index == 0) { + Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + code->movd(dest, source); + } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + code->pextrd(dest, source, index); + } else { + Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]); + code->pshufd(source, source, index); + code->movd(dest, source); + } + + ctx.reg_alloc.DefineValue(inst, dest); +} + +void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + u8 index = args[1].GetImmediateU8(); + + Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr().cvt64(); + + if (index == 0) { + Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + code->movq(dest, source); + } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + code->pextrq(dest, source, 1); + } else { + Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]); + code->punpckhqdq(source, source); + code->movq(dest, source); + } + + ctx.reg_alloc.DefineValue(inst, dest); +} + static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 3cb2f564..155cbfef 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -636,6 +636,23 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) { return Inst(Opcode::PackedSelect, ge, a, b); } +UAny IREmitter::VectorGetElement(size_t esize, const U128& a, size_t index) { + ASSERT_MSG(esize * index < 128, "Invalid index"); + switch (esize) { + case 8: + return Inst(Opcode::VectorGetElement8, a, Imm8(static_cast(index))); + case 16: + return Inst(Opcode::VectorGetElement16, a, Imm8(static_cast(index))); + case 32: + return Inst(Opcode::VectorGetElement32, a, Imm8(static_cast(index))); + case 64: + return Inst(Opcode::VectorGetElement64, a, Imm8(static_cast(index))); + default: + ASSERT_MSG(false, "Unreachable"); + return {}; + } +} + U128 IREmitter::VectorAdd8(const U128& a, const U128& b) { return Inst(Opcode::VectorAdd8, a, b); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 7087d9eb..102a67d5 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -178,6 +178,7 @@ public: U32 PackedAbsDiffSumS8(const U32& a, const U32& b); U32 PackedSelect(const U32& ge, const U32& a, const U32& b); + UAny VectorGetElement(size_t esize, const U128& a, size_t index); U128 VectorAdd8(const U128& a, const U128& b); U128 VectorAdd16(const U128& a, const U128& b); U128 VectorAdd32(const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index eebf95fc..f08b8c4e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -162,6 +162,10 @@ OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 ) // Vector instructions +OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 ) +OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 ) +OPCODE(VectorGetElement32, T::U32, T::U128, T::U8 ) +OPCODE(VectorGetElement64, T::U64, T::U128, T::U8 ) OPCODE(VectorAdd8, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )