From fd01d6fe0a7ca03f3db88e8408d08e276062dcde Mon Sep 17 00:00:00 2001 From: SachinVin Date: Sat, 27 Jul 2019 09:49:19 +0530 Subject: [PATCH] backend\A64\emit_a64_packed.cpp: Implement Packed Saturating instructions --- src/backend/A64/emit_a64_packed.cpp | 80 ++++++++++++++++++++++++++++ src/backend/A64/opcodes.inc | 16 +++--- src/frontend/A32/decoder/arm_a64.inc | 16 +++--- 3 files changed, 96 insertions(+), 16 deletions(-) diff --git a/src/backend/A64/emit_a64_packed.cpp b/src/backend/A64/emit_a64_packed.cpp index 14c7a5fa..231b5f34 100644 --- a/src/backend/A64/emit_a64_packed.cpp +++ b/src/backend/A64/emit_a64_packed.cpp @@ -181,4 +181,84 @@ void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } + +void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UQADD(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SQADD(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UQSUB(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SQSUB(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UQADD(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SQADD(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UQSUB(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SQSUB(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} } // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index 69515cba..77f8b15f 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -203,15 +203,15 @@ OPCODE(PackedSubU16, U32, U32, //OPCODE(PackedHalvingAddSubS16, U32, U32, U32 ) //OPCODE(PackedHalvingSubAddU16, U32, U32, U32 ) //OPCODE(PackedHalvingSubAddS16, U32, U32, U32 ) -//OPCODE(PackedSaturatedAddU8, U32, U32, U32 ) -//OPCODE(PackedSaturatedAddS8, U32, U32, U32 ) -//OPCODE(PackedSaturatedSubU8, U32, U32, U32 ) -//OPCODE(PackedSaturatedSubS8, U32, U32, U32 ) -//OPCODE(PackedSaturatedAddU16, U32, U32, U32 ) -//OPCODE(PackedSaturatedAddS16, U32, U32, U32 ) -//OPCODE(PackedSaturatedSubU16, U32, U32, U32 ) -//OPCODE(PackedSaturatedSubS16, U32, U32, U32 ) OPCODE(PackedSubS16, U32, U32, U32 ) +OPCODE(PackedSaturatedAddU8, U32, U32, U32 ) +OPCODE(PackedSaturatedAddS8, U32, U32, U32 ) +OPCODE(PackedSaturatedSubU8, U32, U32, U32 ) +OPCODE(PackedSaturatedSubS8, U32, U32, U32 ) +OPCODE(PackedSaturatedAddU16, U32, U32, U32 ) +OPCODE(PackedSaturatedAddS16, U32, U32, U32 ) +OPCODE(PackedSaturatedSubU16, U32, U32, U32 ) +OPCODE(PackedSaturatedSubS16, U32, U32, U32 ) //OPCODE(PackedAbsDiffSumS8, U32, U32, U32 ) //OPCODE(PackedSelect, U32, U32, U32, U32 ) diff --git a/src/frontend/A32/decoder/arm_a64.inc b/src/frontend/A32/decoder/arm_a64.inc index afca3626..3d76920f 100644 --- a/src/frontend/A32/decoder/arm_a64.inc +++ b/src/frontend/A32/decoder/arm_a64.inc @@ -258,18 +258,18 @@ INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 // Parallel Add/Subtract (Saturating) instructions -//INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6 -//INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6 +INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6 +INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6 //INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6 //INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6 -//INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6 -//INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6 -//INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6 -//INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6 +INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6 +INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6 +INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6 +INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6 //INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6 //INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6 -//INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6 -//INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 +INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6 +INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 // Parallel Add/Subtract (Halving) instructions //INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6