backend\A64\emit_a64_packed.cpp: Implement UnsignedPacked*- ADD and SUB...

with few other in the emitter
This commit is contained in:
SachinVin 2019-07-22 22:28:31 +05:30 committed by xperia64
parent 7b6cc4ec70
commit a698e35422
6 changed files with 144 additions and 9 deletions

View File

@ -362,7 +362,7 @@ elseif(ARCHITECTURE_Aarch64)
# backend/A64/emit_a64_crc32.cpp
backend/A64/emit_a64_data_processing.cpp
backend/A64/emit_a64_floating_point.cpp
# backend/A64/emit_a64_packed.cpp
backend/A64/emit_a64_packed.cpp
# backend/A64/emit_a64_saturation.cpp
# backend/A64/emit_a64_sm4.cpp
# backend/A64/emit_a64_vector.cpp

View File

@ -0,0 +1,99 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(B, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(B, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(H, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(H, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(B, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(H, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
} // namespace Dynarmic::BackendA64

View File

@ -2837,12 +2837,32 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8) {
}
// Vector
void ARM64FloatEmitter::ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(0, static_cast<u32>(esize), 0b10000, Rd, Rn, Rm);
}
void ARM64FloatEmitter::SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(1, static_cast<u32>(esize), 0b10000, Rd, Rn, Rm);
}
void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(0, static_cast<u32>(esize), 0b00110, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(1, static_cast<u32>(esize), 0b00110, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(1, static_cast<u32>(esize), 0b00111, Rd, Rn, Rm);
}
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) {
u32 imm5 = 0;
@ -2928,6 +2948,14 @@ void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
}
void ARM64FloatEmitter::SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(0, static_cast<u32>(esize), 0b01101, Rd, Rn, Rm);
}
void ARM64FloatEmitter::UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
ASSERT(!(IsDouble(Rd) && esize == D));
EmitThreeSame(1, static_cast<u32>(esize), 0b01101, Rd, Rn, Rm);
}
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
}

View File

@ -963,8 +963,13 @@ public:
void FMOV(ARM64Reg Rd, uint8_t imm8);
// Vector
void ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -992,6 +997,8 @@ public:
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
@ -1092,6 +1099,7 @@ private:
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,

View File

@ -180,13 +180,13 @@ OPCODE(CountLeadingZeros64, U64, U64
//OPCODE(UnsignedSaturation, U32, U32, U8 )
// Packed instructions
//OPCODE(PackedAddU8, U32, U32, U32 )
OPCODE(PackedAddU8, U32, U32, U32 )
//OPCODE(PackedAddS8, U32, U32, U32 )
//OPCODE(PackedSubU8, U32, U32, U32 )
OPCODE(PackedSubU8, U32, U32, U32 )
//OPCODE(PackedSubS8, U32, U32, U32 )
//OPCODE(PackedAddU16, U32, U32, U32 )
OPCODE(PackedAddU16, U32, U32, U32 )
//OPCODE(PackedAddS16, U32, U32, U32 )
//OPCODE(PackedSubU16, U32, U32, U32 )
OPCODE(PackedSubU16, U32, U32, U32 )
//OPCODE(PackedSubS16, U32, U32, U32 )
//OPCODE(PackedAddSubU16, U32, U32, U32 )
//OPCODE(PackedAddSubS16, U32, U32, U32 )

View File

@ -250,12 +250,12 @@ INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn
//INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6
//INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6
//INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6
//INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
//INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
//INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6
//INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6
//INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6
//INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6
INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Saturating) instructions
//INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6