From a698e35422bafca2fdbc186478a4bbfe40e50d2a Mon Sep 17 00:00:00 2001 From: SachinVin Date: Mon, 22 Jul 2019 22:28:31 +0530 Subject: [PATCH] backend\A64\emit_a64_packed.cpp: Implement UnsignedPacked*- ADD and SUB... with few other in the emitter --- src/CMakeLists.txt | 2 +- src/backend/A64/emit_a64_packed.cpp | 99 +++++++++++++++++++++++++ src/backend/A64/emitter/a64_emitter.cpp | 28 +++++++ src/backend/A64/emitter/a64_emitter.h | 8 ++ src/backend/A64/opcodes.inc | 8 +- src/frontend/A32/decoder/arm_a64.inc | 8 +- 6 files changed, 144 insertions(+), 9 deletions(-) create mode 100644 src/backend/A64/emit_a64_packed.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5c5e9cb6..08488f61 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -362,7 +362,7 @@ elseif(ARCHITECTURE_Aarch64) # backend/A64/emit_a64_crc32.cpp backend/A64/emit_a64_data_processing.cpp backend/A64/emit_a64_floating_point.cpp - # backend/A64/emit_a64_packed.cpp + backend/A64/emit_a64_packed.cpp # backend/A64/emit_a64_saturation.cpp # backend/A64/emit_a64_sm4.cpp # backend/A64/emit_a64_vector.cpp diff --git a/src/backend/A64/emit_a64_packed.cpp b/src/backend/A64/emit_a64_packed.cpp new file mode 100644 index 00000000..78f0aa61 --- /dev/null +++ b/src/backend/A64/emit_a64_packed.cpp @@ -0,0 +1,99 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "backend/A64/block_of_code.h" +#include "backend/A64/emit_a64.h" +#include "frontend/ir/microinstruction.h" +#include "frontend/ir/opcodes.h" + +namespace Dynarmic::BackendA64 { + +void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.ADD(B, sum, sum, b); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.CMHI(B, ge, b, sum); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + ctx.reg_alloc.DefineValue(inst, sum); +} + +void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.ADD(H, sum, sum, b); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.CMHI(H, ge, b, sum); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + ctx.reg_alloc.DefineValue(inst, sum); +} + + +void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.CMHS(B, ge, a, b); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + code.fp_emitter.SUB(B, a, a, b); + + ctx.reg_alloc.DefineValue(inst, a); +} + + +void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if (ge_inst) { + const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr()); + + code.fp_emitter.CMHS(H, ge, a, b); + + ctx.reg_alloc.DefineValue(ge_inst, ge); + ctx.EraseInstruction(ge_inst); + } + + code.fp_emitter.SUB(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +} // namespace Dynarmic::BackendA64 diff --git a/src/backend/A64/emitter/a64_emitter.cpp b/src/backend/A64/emitter/a64_emitter.cpp index 6eb399e2..4e16b6a4 100644 --- a/src/backend/A64/emitter/a64_emitter.cpp +++ b/src/backend/A64/emitter/a64_emitter.cpp @@ -2837,12 +2837,32 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8) { } // Vector +void ARM64FloatEmitter::ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(0, static_cast(esize), 0b10000, Rd, Rn, Rm); +} +void ARM64FloatEmitter::SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(1, static_cast(esize), 0b10000, Rd, Rn, Rm); +} void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EmitThreeSame(0, 0, 3, Rd, Rn, Rm); } void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EmitThreeSame(1, 1, 3, Rd, Rn, Rm); } +void ARM64FloatEmitter::CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(0, static_cast(esize), 0b00110, Rd, Rn, Rm); +} +void ARM64FloatEmitter::CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(1, static_cast(esize), 0b00110, Rd, Rn, Rm); +} +void ARM64FloatEmitter::CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(1, static_cast(esize), 0b00111, Rd, Rn, Rm); +} void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) { u32 imm5 = 0; @@ -2928,6 +2948,14 @@ void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn) { void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn) { Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn); } +void ARM64FloatEmitter::SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(0, static_cast(esize), 0b01101, Rd, Rn, Rm); +} +void ARM64FloatEmitter::UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { + ASSERT(!(IsDouble(Rd) && esize == D)); + EmitThreeSame(1, static_cast(esize), 0b01101, Rd, Rn, Rm); +} void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn) { Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn); } diff --git a/src/backend/A64/emitter/a64_emitter.h b/src/backend/A64/emitter/a64_emitter.h index 533b1175..c89b0424 100644 --- a/src/backend/A64/emitter/a64_emitter.h +++ b/src/backend/A64/emitter/a64_emitter.h @@ -963,8 +963,13 @@ public: void FMOV(ARM64Reg Rd, uint8_t imm8); // Vector + void ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index); void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); @@ -992,6 +997,8 @@ public: void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn); void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn); void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); @@ -1092,6 +1099,7 @@ private: void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn); void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index fbd02d75..24f27770 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -180,13 +180,13 @@ OPCODE(CountLeadingZeros64, U64, U64 //OPCODE(UnsignedSaturation, U32, U32, U8 ) // Packed instructions -//OPCODE(PackedAddU8, U32, U32, U32 ) +OPCODE(PackedAddU8, U32, U32, U32 ) //OPCODE(PackedAddS8, U32, U32, U32 ) -//OPCODE(PackedSubU8, U32, U32, U32 ) +OPCODE(PackedSubU8, U32, U32, U32 ) //OPCODE(PackedSubS8, U32, U32, U32 ) -//OPCODE(PackedAddU16, U32, U32, U32 ) +OPCODE(PackedAddU16, U32, U32, U32 ) //OPCODE(PackedAddS16, U32, U32, U32 ) -//OPCODE(PackedSubU16, U32, U32, U32 ) +OPCODE(PackedSubU16, U32, U32, U32 ) //OPCODE(PackedSubS16, U32, U32, U32 ) //OPCODE(PackedAddSubU16, U32, U32, U32 ) //OPCODE(PackedAddSubS16, U32, U32, U32 ) diff --git a/src/frontend/A32/decoder/arm_a64.inc b/src/frontend/A32/decoder/arm_a64.inc index fa8efbdf..a8a84067 100644 --- a/src/frontend/A32/decoder/arm_a64.inc +++ b/src/frontend/A32/decoder/arm_a64.inc @@ -250,12 +250,12 @@ INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn //INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6 //INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 //INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 -//INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 -//INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 +INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 +INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 //INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6 //INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6 -//INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6 -//INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 +INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6 +INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 // Parallel Add/Subtract (Saturating) instructions //INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6