diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bc57c5b0..d22195aa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,6 +94,7 @@ add_library(dynarmic frontend/A64/translate/impl/simd_aes.cpp frontend/A64/translate/impl/simd_copy.cpp frontend/A64/translate/impl/simd_modified_immediate.cpp + frontend/A64/translate/impl/simd_permute.cpp frontend/A64/translate/impl/simd_scalar_three_same.cpp frontend/A64/translate/impl/simd_three_same.cpp frontend/A64/translate/impl/system.cpp diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 12b78746..618ff076 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -390,6 +390,46 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { } } +static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + + switch (size) { + case 8: + code.punpcklbw(a, b); + break; + case 16: + code.punpcklwd(a, b); + break; + case 32: + code.punpckldq(a, b); + break; + case 64: + code.punpcklqdq(a, b); + break; + } + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitVectorInterleaveLower8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveLower(code, ctx, inst, 8); +} + +void EmitX64::EmitVectorInterleaveLower16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveLower(code, ctx, inst, 16); +} + +void EmitX64::EmitVectorInterleaveLower32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveLower(code, ctx, inst, 32); +} + +void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveLower(code, ctx, inst, 64); +} + void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index b4a6e104..14951b9c 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -583,7 +583,7 @@ INST(SUB_1, "SUB (vector)", "01111 // Data Processing - FP and SIMD - SIMD Permute //INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") //INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") -//INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") +INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") //INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") //INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") //INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_permute.cpp b/src/frontend/A64/translate/impl/simd_permute.cpp new file mode 100644 index 00000000..00d28c8b --- /dev/null +++ b/src/frontend/A64/translate/impl/simd_permute.cpp @@ -0,0 +1,39 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "frontend/A64/translate/impl/impl.h" + +namespace Dynarmic::A64 { + +bool TranslatorVisitor::ZIP1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { + if (size == 0b11 && !Q) { + return ReservedValue(); + } + + const size_t datasize = Q ? 128 : 64; + + const IR::U128 result = [&] { + const IR::U128 operand1 = V(datasize, Vn); + const IR::U128 operand2 = V(datasize, Vm); + + switch (size.ZeroExtend()) { + case 0b00: + return ir.VectorInterleaveLower8(operand1, operand2); + case 0b01: + return ir.VectorInterleaveLower16(operand1, operand2); + case 0b10: + return ir.VectorInterleaveLower32(operand1, operand2); + case 0b11: + default: + return ir.VectorInterleaveLower64(operand1, operand2); + } + }(); + + V(datasize, Vd, result); + return true; +} + +} // namespace Dynarmic::A64 diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index ded7f2d1..2697178d 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -865,6 +865,22 @@ U128 IREmitter::VectorEqual128(const U128& a, const U128& b) { return Inst(Opcode::VectorEqual128, a, b); } +U128 IREmitter::VectorInterleaveLower8(const U128& a, const U128& b) { + return Inst(Opcode::VectorInterleaveLower8, a, b); +} + +U128 IREmitter::VectorInterleaveLower16(const U128& a, const U128& b) { + return Inst(Opcode::VectorInterleaveLower16, a, b); +} + +U128 IREmitter::VectorInterleaveLower32(const U128& a, const U128& b) { + return Inst(Opcode::VectorInterleaveLower32, a, b); +} + +U128 IREmitter::VectorInterleaveLower64(const U128& a, const U128& b) { + return Inst(Opcode::VectorInterleaveLower64, a, b); +} + U128 IREmitter::VectorLowerPairedAdd8(const U128& a, const U128& b) { return Inst(Opcode::VectorLowerPairedAdd8, a, b); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 1c5cc261..f9812a21 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -227,6 +227,10 @@ public: U128 VectorEqual32(const U128& a, const U128& b); U128 VectorEqual64(const U128& a, const U128& b); U128 VectorEqual128(const U128& a, const U128& b); + U128 VectorInterleaveLower8(const U128& a, const U128& b); + U128 VectorInterleaveLower16(const U128& a, const U128& b); + U128 VectorInterleaveLower32(const U128& a, const U128& b); + U128 VectorInterleaveLower64(const U128& a, const U128& b); U128 VectorLowerPairedAdd8(const U128& a, const U128& b); U128 VectorLowerPairedAdd16(const U128& a, const U128& b); U128 VectorLowerPairedAdd32(const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 8a04a217..772e776c 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -218,6 +218,10 @@ OPCODE(VectorEqual16, T::U128, T::U128, T::U128 OPCODE(VectorEqual32, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual64, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual128, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveLower64, T::U128, T::U128, T::U128 ) OPCODE(VectorLowerPairedAdd8, T::U128, T::U128, T::U128 ) OPCODE(VectorLowerPairedAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorLowerPairedAdd32, T::U128, T::U128, T::U128 )