backend\A64\emit_a64_packed.cpp: Implement Packed Halving Add/Sub instructions

This commit is contained in:
SachinVin 2019-07-27 09:54:48 +05:30
parent fd01d6fe0a
commit a5564f588d
3 changed files with 96 additions and 16 deletions

View File

@ -182,6 +182,86 @@ void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View File

@ -191,19 +191,19 @@ OPCODE(PackedSubU16, U32, U32,
//OPCODE(PackedAddSubS16, U32, U32, U32 )
//OPCODE(PackedSubAddU16, U32, U32, U32 )
//OPCODE(PackedSubAddS16, U32, U32, U32 )
//OPCODE(PackedHalvingAddU8, U32, U32, U32 )
//OPCODE(PackedHalvingAddS8, U32, U32, U32 )
//OPCODE(PackedHalvingSubU8, U32, U32, U32 )
//OPCODE(PackedHalvingSubS8, U32, U32, U32 )
//OPCODE(PackedHalvingAddU16, U32, U32, U32 )
//OPCODE(PackedHalvingAddS16, U32, U32, U32 )
//OPCODE(PackedHalvingSubU16, U32, U32, U32 )
//OPCODE(PackedHalvingSubS16, U32, U32, U32 )
//OPCODE(PackedHalvingAddSubU16, U32, U32, U32 )
//OPCODE(PackedHalvingAddSubS16, U32, U32, U32 )
//OPCODE(PackedHalvingSubAddU16, U32, U32, U32 )
//OPCODE(PackedHalvingSubAddS16, U32, U32, U32 )
OPCODE(PackedSubS16, U32, U32, U32 )
OPCODE(PackedHalvingAddU8, U32, U32, U32 )
OPCODE(PackedHalvingAddS8, U32, U32, U32 )
OPCODE(PackedHalvingSubU8, U32, U32, U32 )
OPCODE(PackedHalvingSubS8, U32, U32, U32 )
OPCODE(PackedHalvingAddU16, U32, U32, U32 )
OPCODE(PackedHalvingAddS16, U32, U32, U32 )
OPCODE(PackedHalvingSubU16, U32, U32, U32 )
OPCODE(PackedHalvingSubS16, U32, U32, U32 )
OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
OPCODE(PackedSaturatedSubU8, U32, U32, U32 )

View File

@ -272,18 +272,18 @@ INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm
INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Halving) instructions
//INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6
//INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6
//INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6
//INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6
//INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6
//INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6
//INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6
//INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6
//INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6
//INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6
//INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6
//INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6
INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6
INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6
INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6
INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6
INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6
INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6
INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6
INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6
// Saturated Add/Subtract instructions
//INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP