backend\A64\emit_a64_packed.cpp: Implement SignedPacked*- ADD and SUB

This commit is contained in:
SachinVin 2019-07-27 09:46:27 +05:30 committed by xperia64
parent 2a378692fa
commit c8a910a009
3 changed files with 93 additions and 8 deletions

View File

@ -32,6 +32,28 @@ void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
@ -53,6 +75,27 @@ void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -75,6 +118,27 @@ void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -96,4 +160,25 @@ void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
} // namespace Dynarmic::BackendA64

View File

@ -181,13 +181,12 @@ OPCODE(CountLeadingZeros64, U64, U64
// Packed instructions
OPCODE(PackedAddU8, U32, U32, U32 )
//OPCODE(PackedAddS8, U32, U32, U32 )
OPCODE(PackedAddS8, U32, U32, U32 )
OPCODE(PackedSubU8, U32, U32, U32 )
//OPCODE(PackedSubS8, U32, U32, U32 )
OPCODE(PackedSubS8, U32, U32, U32 )
OPCODE(PackedAddU16, U32, U32, U32 )
//OPCODE(PackedAddS16, U32, U32, U32 )
OPCODE(PackedAddS16, U32, U32, U32 )
OPCODE(PackedSubU16, U32, U32, U32 )
//OPCODE(PackedSubS16, U32, U32, U32 )
//OPCODE(PackedAddSubU16, U32, U32, U32 )
//OPCODE(PackedAddSubS16, U32, U32, U32 )
//OPCODE(PackedSubAddU16, U32, U32, U32 )
@ -212,6 +211,7 @@ OPCODE(PackedSubU16, U32, U32,
//OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
//OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
//OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
OPCODE(PackedSubS16, U32, U32, U32 )
//OPCODE(PackedAbsDiffSumS8, U32, U32, U32 )
//OPCODE(PackedSelect, U32, U32, U32, U32 )

View File

@ -244,12 +244,12 @@ INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn
INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6
// Parallel Add/Subtract (Modulo) instructions
//INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6
//INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6
//INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6
//INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6
//INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6
//INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6
INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6
INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6
INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6
INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6
INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
//INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6