diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index e47eeb6e..79a17582 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -2419,6 +2419,42 @@ void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code); } +void EmitX64::EmitVectorUnsignedSaturatedNarrow16(EmitContext& ctx, IR::Inst* inst) { + EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { + bool qc_flag = false; + for (size_t i = 0; i < a.size(); ++i) { + const u16 saturated = std::clamp(a[i], 0, 0xFF); + result[i] = static_cast(saturated); + qc_flag |= saturated != a[i]; + } + return qc_flag; + }); +} + +void EmitX64::EmitVectorUnsignedSaturatedNarrow32(EmitContext& ctx, IR::Inst* inst) { + EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { + bool qc_flag = false; + for (size_t i = 0; i < a.size(); ++i) { + const u32 saturated = std::clamp(a[i], 0, 0xFFFF); + result[i] = static_cast(saturated); + qc_flag |= saturated != a[i]; + } + return qc_flag; + }); +} + +void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* inst) { + EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { + bool qc_flag = false; + for (size_t i = 0; i < a.size(); ++i) { + const u64 saturated = std::clamp(a[i], 0, 0xFFFFFFFF); + result[i] = static_cast(saturated); + qc_flag |= saturated != a[i]; + } + return qc_flag; + }); +} + void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index eca666e7..c5cc28cc 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -624,7 +624,7 @@ INST(CMLE_2, "CMLE (zero)", "0Q101 INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") -//INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") +INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") //INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") //INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") //INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index 97c9456d..f2fb710e 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -727,7 +727,7 @@ struct TranslatorVisitor final { bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); - bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd); + bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd); bool FRINTN_1(bool Q, Vec Vn, Vec Vd); bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd); diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index f26f6caa..1eaa9fb7 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -106,6 +106,23 @@ bool IntegerConvertToFloat(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd v.V(datasize, Vd, result); return true; } + +bool SaturatedNarrow(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, IR::U128 (IR::IREmitter::*fn)(size_t, const IR::U128&)) { + if (size == 0b11) { + return v.ReservedValue(); + } + + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = 64; + const size_t part = Q ? 1 : 0; + + const IR::U128 operand = v.V(2 * datasize, Vn); + const IR::U128 result = (v.ir.*fn)(2 * esize, operand); + + v.Vpart(datasize, Vd, part, result); + return true; +} + } // Anonymous namespace bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) { @@ -276,35 +293,15 @@ bool TranslatorVisitor::NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { } bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { - if (size == 0b11) { - return ReservedValue(); - } - - const size_t esize = 8 << size.ZeroExtend(); - const size_t datasize = 64; - const size_t part = Q ? 1 : 0; - - const IR::U128 operand = V(2 * datasize, Vn); - const IR::U128 result = ir.VectorSignedSaturatedNarrowToUnsigned(2 * esize, operand); - - Vpart(datasize, Vd, part, result); - return true; + return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToUnsigned); } bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { - if (size == 0b11) { - return ReservedValue(); - } + return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToSigned); +} - const size_t esize = 8 << size.ZeroExtend(); - const size_t datasize = 64; - const size_t part = Q ? 1 : 0; - - const IR::U128 operand = V(2 * datasize, Vn); - const IR::U128 result = ir.VectorSignedSaturatedNarrowToSigned(2 * esize, operand); - - Vpart(datasize, Vd, part, result); - return true; +bool TranslatorVisitor::UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { + return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorUnsignedSaturatedNarrow); } bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 5b3055b1..9234b660 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1346,6 +1346,19 @@ U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, co return {}; } +U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) { + switch (esize) { + case 16: + return Inst(Opcode::VectorUnsignedSaturatedNarrow16, a); + case 32: + return Inst(Opcode::VectorUnsignedSaturatedNarrow32, a); + case 64: + return Inst(Opcode::VectorUnsignedSaturatedNarrow64, a); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) { switch (original_esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 2e879bc0..e5f73c60 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -253,6 +253,7 @@ public: U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); + U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroUpper(const U128& a); U128 ZeroVector(); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 76a5af53..7bc1fd4c 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -347,6 +347,9 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const { case Opcode::VectorSignedSaturatedNarrowToUnsigned16: case Opcode::VectorSignedSaturatedNarrowToUnsigned32: case Opcode::VectorSignedSaturatedNarrowToUnsigned64: + case Opcode::VectorUnsignedSaturatedNarrow16: + case Opcode::VectorUnsignedSaturatedNarrow32: + case Opcode::VectorUnsignedSaturatedNarrow64: return true; default: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index a0bca9a1..2cbe0ca2 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -360,6 +360,9 @@ OPCODE(VectorSub64, T::U128, T::U128, OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 ) OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 ) OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 ) +OPCODE(VectorUnsignedSaturatedNarrow16, T::U128, T::U128 ) +OPCODE(VectorUnsignedSaturatedNarrow32, T::U128, T::U128 ) +OPCODE(VectorUnsignedSaturatedNarrow64, T::U128, T::U128 ) OPCODE(VectorZeroExtend8, T::U128, T::U128 ) OPCODE(VectorZeroExtend16, T::U128, T::U128 ) OPCODE(VectorZeroExtend32, T::U128, T::U128 )