From 61d509dda2312390c12ba3e24a9f899d3b255112 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 2 Aug 2022 11:03:54 +0100 Subject: [PATCH] IR: Add VectorMultiply{Signed,Unsigned}Widen instructions Polyfill for x86-64 backend --- .../backend/arm64/emit_arm64_vector.cpp | 48 +++++++++++++++++++ src/dynarmic/backend/x64/a32_interface.cpp | 1 + src/dynarmic/backend/x64/a64_interface.cpp | 1 + src/dynarmic/backend/x64/emit_x64_vector.cpp | 24 ++++++++++ .../A32/translate/impl/asimd_three_regs.cpp | 36 ++++++++++---- .../translate/impl/asimd_two_regs_scalar.cpp | 9 ++-- src/dynarmic/ir/ir_emitter.cpp | 24 ++++++++++ src/dynarmic/ir/ir_emitter.h | 2 + src/dynarmic/ir/opcodes.inc | 6 +++ src/dynarmic/ir/opt/passes.h | 1 + src/dynarmic/ir/opt/polyfill_pass.cpp | 43 +++++++++++++++++ 11 files changed, 180 insertions(+), 15 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index 758e7a54..5077f33f 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -1001,6 +1001,54 @@ void EmitIR(oaknut::CodeGenerator& code, EmitConte ASSERT_FALSE("Unimplemented"); } +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; diff --git a/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/backend/x64/a32_interface.cpp index 5e7fa8e8..e671c192 100644 --- a/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/backend/x64/a32_interface.cpp @@ -55,6 +55,7 @@ static std::function GenRCP(const A32::UserConfig& conf) { static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) { return Optimization::PolyfillOptions{ .sha256 = !code.HasHostFeature(HostFeature::SHA), + .vector_multiply_widen = true, }; } diff --git a/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/backend/x64/a64_interface.cpp index 7440ab57..f7f53c99 100644 --- a/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/backend/x64/a64_interface.cpp @@ -51,6 +51,7 @@ static std::function GenRCP(const A64::UserConfig& conf) { static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) { return Optimization::PolyfillOptions{ .sha256 = !code.HasHostFeature(HostFeature::SHA), + .vector_multiply_widen = true, }; } diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index 4084ac98..c7293e02 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -2221,6 +2221,30 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, tmp2); } +void EmitX64::EmitVectorMultiplySignedWiden8(EmitContext&, IR::Inst*) { + ASSERT_FALSE("Unexpected VectorMultiplySignedWiden8"); +} + +void EmitX64::EmitVectorMultiplySignedWiden16(EmitContext&, IR::Inst*) { + ASSERT_FALSE("Unexpected VectorMultiplySignedWiden16"); +} + +void EmitX64::EmitVectorMultiplySignedWiden32(EmitContext&, IR::Inst*) { + ASSERT_FALSE("Unexpected VectorMultiplySignedWiden32"); +} + +void EmitX64::EmitVectorMultiplyUnsignedWiden8(EmitContext&, IR::Inst*) { + ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden8"); +} + +void EmitX64::EmitVectorMultiplyUnsignedWiden16(EmitContext&, IR::Inst*) { + ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden16"); +} + +void EmitX64::EmitVectorMultiplyUnsignedWiden32(EmitContext&, IR::Inst*) { + ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden32"); +} + void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp index bf7abde2..f6da2700 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp @@ -909,11 +909,30 @@ bool TranslatorVisitor::asimd_VABDL(bool U, bool D, size_t sz, size_t Vn, size_t } bool TranslatorVisitor::asimd_VMLAL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool op, bool N, bool M, size_t Vm) { - return WideInstruction(*this, U, D, sz, Vn, Vd, N, M, Vm, WidenBehaviour::Both, [this, op](size_t esize, const auto& reg_d, const auto& reg_n, const auto& reg_m) { - const auto multiply = ir.VectorMultiply(esize, reg_n, reg_m); - return op ? ir.VectorSub(esize, reg_d, multiply) - : ir.VectorAdd(esize, reg_d, multiply); - }); + const size_t esize = 8U << sz; + + if (sz == 0b11) { + return DecodeError(); + } + + if (mcl::bit::get_bit<0>(Vd)) { + return UndefinedInstruction(); + } + + const auto d = ToVector(true, Vd, D); + const auto m = ToVector(false, Vm, M); + const auto n = ToVector(false, Vn, N); + + const auto reg_d = ir.GetVector(d); + const auto reg_m = ir.GetVector(m); + const auto reg_n = ir.GetVector(n); + const auto multiply = U ? ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m) + : ir.VectorMultiplySignedWiden(esize, reg_n, reg_m); + const auto result = op ? ir.VectorSub(esize * 2, reg_d, multiply) + : ir.VectorAdd(esize * 2, reg_d, multiply); + + ir.SetVector(d, result); + return true; } bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool P, bool N, bool M, size_t Vm) { @@ -930,14 +949,11 @@ bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t const auto m = ToVector(false, Vm, M); const auto n = ToVector(false, Vn, N); - const auto extend_reg = [&](const auto& reg) { - return U ? ir.VectorZeroExtend(esize, reg) : ir.VectorSignExtend(esize, reg); - }; - const auto reg_n = ir.GetVector(n); const auto reg_m = ir.GetVector(m); const auto result = P ? ir.VectorPolynomialMultiplyLong(esize, reg_n, reg_m) - : ir.VectorMultiply(2 * esize, extend_reg(reg_n), extend_reg(reg_m)); + : U ? ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m) + : ir.VectorMultiplySignedWiden(esize, reg_n, reg_m); ir.SetVector(d, result); return true; diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp index 4e774d41..534612fe 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp @@ -85,11 +85,10 @@ bool ScalarMultiplyLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t const auto [m, index] = GetScalarLocation(esize, M, Vm); const auto scalar = v.ir.VectorGetElement(esize, v.ir.GetVector(m), index); - const auto ext_scalar = U ? (esize == 16 ? IR::U32U64{v.ir.ZeroExtendToWord(scalar)} : IR::U32U64{v.ir.ZeroExtendToLong(scalar)}) - : (esize == 16 ? IR::U32U64{v.ir.SignExtendToWord(scalar)} : IR::U32U64{v.ir.SignExtendToLong(scalar)}); - const auto reg_n = U ? v.ir.VectorZeroExtend(esize, v.ir.GetVector(n)) : v.ir.VectorSignExtend(esize, v.ir.GetVector(n)); - const auto reg_m = v.ir.VectorBroadcast(esize * 2, ext_scalar); - const auto addend = v.ir.VectorMultiply(esize * 2, reg_n, reg_m); + const auto reg_n = v.ir.GetVector(n); + const auto reg_m = v.ir.VectorBroadcast(esize, scalar); + const auto addend = U ? v.ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m) + : v.ir.VectorMultiplySignedWiden(esize, reg_n, reg_m); const auto result = [&] { switch (multiply) { case MultiplyBehavior::Multiply: diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index d3079ebe..d05e959c 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -1404,6 +1404,30 @@ U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) { UNREACHABLE(); } +U128 IREmitter::VectorMultiplySignedWiden(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorMultiplySignedWiden8, a, b); + case 16: + return Inst(Opcode::VectorMultiplySignedWiden16, a, b); + case 32: + return Inst(Opcode::VectorMultiplySignedWiden32, a, b); + } + UNREACHABLE(); +} + +U128 IREmitter::VectorMultiplyUnsignedWiden(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorMultiplyUnsignedWiden8, a, b); + case 16: + return Inst(Opcode::VectorMultiplyUnsignedWiden16, a, b); + case 32: + return Inst(Opcode::VectorMultiplyUnsignedWiden32, a, b); + } + UNREACHABLE(); +} + U128 IREmitter::VectorNarrow(size_t original_esize, const U128& a) { switch (original_esize) { case 16: diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index d19d5db5..17b27de8 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -264,6 +264,8 @@ public: U128 VectorMinSigned(size_t esize, const U128& a, const U128& b); U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMultiply(size_t esize, const U128& a, const U128& b); + U128 VectorMultiplySignedWiden(size_t esize, const U128& a, const U128& b); + U128 VectorMultiplyUnsignedWiden(size_t esize, const U128& a, const U128& b); U128 VectorNarrow(size_t original_esize, const U128& a); U128 VectorNot(const U128& a); U128 VectorOr(const U128& a, const U128& b); diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index 59391944..75d2af2e 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -402,6 +402,12 @@ OPCODE(VectorMultiply8, U128, U128 OPCODE(VectorMultiply16, U128, U128, U128 ) OPCODE(VectorMultiply32, U128, U128, U128 ) OPCODE(VectorMultiply64, U128, U128, U128 ) +OPCODE(VectorMultiplySignedWiden8, U128, U128, U128 ) +OPCODE(VectorMultiplySignedWiden16, U128, U128, U128 ) +OPCODE(VectorMultiplySignedWiden32, U128, U128, U128 ) +OPCODE(VectorMultiplyUnsignedWiden8, U128, U128, U128 ) +OPCODE(VectorMultiplyUnsignedWiden16, U128, U128, U128 ) +OPCODE(VectorMultiplyUnsignedWiden32, U128, U128, U128 ) OPCODE(VectorNarrow16, U128, U128 ) OPCODE(VectorNarrow32, U128, U128 ) OPCODE(VectorNarrow64, U128, U128 ) diff --git a/src/dynarmic/ir/opt/passes.h b/src/dynarmic/ir/opt/passes.h index 08bfe4fe..9a77accb 100644 --- a/src/dynarmic/ir/opt/passes.h +++ b/src/dynarmic/ir/opt/passes.h @@ -22,6 +22,7 @@ namespace Dynarmic::Optimization { struct PolyfillOptions { bool sha256 = false; + bool vector_multiply_widen = false; bool operator==(const PolyfillOptions&) const = default; }; diff --git a/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/ir/opt/polyfill_pass.cpp index 8580f482..6aac3b39 100644 --- a/src/dynarmic/ir/opt/polyfill_pass.cpp +++ b/src/dynarmic/ir/opt/polyfill_pass.cpp @@ -138,6 +138,19 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { inst.ReplaceUsesWith(part1 ? x : y); } +template +void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + } // namespace void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { @@ -166,6 +179,36 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { PolyfillSHA256Hash(ir, inst); } break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; default: break; }