From c8a910a009502376f3ea7b516d979caa0098fc5b Mon Sep 17 00:00:00 2001 From: SachinVin Date: Sat, 27 Jul 2019 09:46:27 +0530 Subject: [PATCH] backend\A64\emit_a64_packed.cpp: Implement SignedPacked*- ADD and SUB --- src/backend/A64/emit_a64_packed.cpp | 85 ++++++++++++++++++++++++++++ src/backend/A64/opcodes.inc | 8 +-- src/frontend/A32/decoder/arm_a64.inc | 8 +-- 3 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/backend/A64/emit_a64_packed.cpp b/src/backend/A64/emit_a64_packed.cpp index 78f0aa61..14c7a5fa 100644 --- a/src/backend/A64/emit_a64_packed.cpp +++ b/src/backend/A64/emit_a64_packed.cpp @@ -32,6 +32,28 @@ void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, sum); } +void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.SQADD(B, ge, a, b); + code.fp_emitter.CMGE_zero(B, ge, ge); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + code.fp_emitter.ADD(B, a, a, b); + + ctx.reg_alloc.DefineValue(inst, a); +} + void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -53,6 +75,27 @@ void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, sum); } +void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.SQADD(H, ge, a, b); + code.fp_emitter.CMGE_zero(H, ge, ge); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + code.fp_emitter.ADD(H, a, a, b); + + ctx.reg_alloc.DefineValue(inst, a); +} void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -75,6 +118,27 @@ void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } +void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.SQSUB(B, ge, a, b); + code.fp_emitter.CMGE_zero(B, ge, ge); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + code.fp_emitter.SUB(B, a, a, b); + + ctx.reg_alloc.DefineValue(inst, a); +} void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -96,4 +160,25 @@ void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } +void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.SQSUB(H, ge, a, b); + code.fp_emitter.CMGE_zero(H, ge, ge); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + code.fp_emitter.SUB(H, a, a, b); + + ctx.reg_alloc.DefineValue(inst, a); +} } // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index 24f27770..69515cba 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -181,13 +181,12 @@ OPCODE(CountLeadingZeros64, U64, U64 // Packed instructions OPCODE(PackedAddU8, U32, U32, U32 ) -//OPCODE(PackedAddS8, U32, U32, U32 ) +OPCODE(PackedAddS8, U32, U32, U32 ) OPCODE(PackedSubU8, U32, U32, U32 ) -//OPCODE(PackedSubS8, U32, U32, U32 ) +OPCODE(PackedSubS8, U32, U32, U32 ) OPCODE(PackedAddU16, U32, U32, U32 ) -//OPCODE(PackedAddS16, U32, U32, U32 ) +OPCODE(PackedAddS16, U32, U32, U32 ) OPCODE(PackedSubU16, U32, U32, U32 ) -//OPCODE(PackedSubS16, U32, U32, U32 ) //OPCODE(PackedAddSubU16, U32, U32, U32 ) //OPCODE(PackedAddSubS16, U32, U32, U32 ) //OPCODE(PackedSubAddU16, U32, U32, U32 ) @@ -212,6 +211,7 @@ OPCODE(PackedSubU16, U32, U32, //OPCODE(PackedSaturatedAddS16, U32, U32, U32 ) //OPCODE(PackedSaturatedSubU16, U32, U32, U32 ) //OPCODE(PackedSaturatedSubS16, U32, U32, U32 ) +OPCODE(PackedSubS16, U32, U32, U32 ) //OPCODE(PackedAbsDiffSumS8, U32, U32, U32 ) //OPCODE(PackedSelect, U32, U32, U32, U32 ) diff --git a/src/frontend/A32/decoder/arm_a64.inc b/src/frontend/A32/decoder/arm_a64.inc index a8a84067..afca3626 100644 --- a/src/frontend/A32/decoder/arm_a64.inc +++ b/src/frontend/A32/decoder/arm_a64.inc @@ -244,12 +244,12 @@ INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6 // Parallel Add/Subtract (Modulo) instructions -//INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6 -//INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6 //INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6 //INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6 -//INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 -//INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 +INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6 +INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6 +INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 +INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 //INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6