backend\A64\emit_a64_packed.cpp: Implement Packed Saturating instructions

This commit is contained in:
SachinVin 2019-07-27 09:49:19 +05:30
parent b4fb2569ad
commit fd01d6fe0a
3 changed files with 96 additions and 16 deletions

View File

@ -181,4 +181,84 @@ void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
} // namespace Dynarmic::BackendA64

View File

@ -203,15 +203,15 @@ OPCODE(PackedSubU16, U32, U32,
//OPCODE(PackedHalvingAddSubS16, U32, U32, U32 )
//OPCODE(PackedHalvingSubAddU16, U32, U32, U32 )
//OPCODE(PackedHalvingSubAddS16, U32, U32, U32 )
//OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
//OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
//OPCODE(PackedSaturatedSubU8, U32, U32, U32 )
//OPCODE(PackedSaturatedSubS8, U32, U32, U32 )
//OPCODE(PackedSaturatedAddU16, U32, U32, U32 )
//OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
//OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
//OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
OPCODE(PackedSubS16, U32, U32, U32 )
OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
OPCODE(PackedSaturatedSubU8, U32, U32, U32 )
OPCODE(PackedSaturatedSubS8, U32, U32, U32 )
OPCODE(PackedSaturatedAddU16, U32, U32, U32 )
OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
//OPCODE(PackedAbsDiffSumS8, U32, U32, U32 )
//OPCODE(PackedSelect, U32, U32, U32, U32 )

View File

@ -258,18 +258,18 @@ INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm
INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Saturating) instructions
//INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
//INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6
INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6
//INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
//INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
//INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6
//INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6
//INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6
//INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6
INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6
INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6
INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6
INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6
//INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
//INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
//INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6
//INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6
INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6
INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Halving) instructions
//INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6