diff --git a/src/backend/A64/emit_a64_packed.cpp b/src/backend/A64/emit_a64_packed.cpp index 231b5f34..0a39f5cd 100644 --- a/src/backend/A64/emit_a64_packed.cpp +++ b/src/backend/A64/emit_a64_packed.cpp @@ -182,6 +182,86 @@ void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } +void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UHADD(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); + +} + +void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UHADD(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SHADD(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SHADD(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UHSUB(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SHSUB(B, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.UHSUB(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + code.fp_emitter.SHSUB(H, a, a, b); + ctx.reg_alloc.DefineValue(inst, a); +} void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index 77f8b15f..54368515 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -191,19 +191,19 @@ OPCODE(PackedSubU16, U32, U32, //OPCODE(PackedAddSubS16, U32, U32, U32 ) //OPCODE(PackedSubAddU16, U32, U32, U32 ) //OPCODE(PackedSubAddS16, U32, U32, U32 ) -//OPCODE(PackedHalvingAddU8, U32, U32, U32 ) -//OPCODE(PackedHalvingAddS8, U32, U32, U32 ) -//OPCODE(PackedHalvingSubU8, U32, U32, U32 ) -//OPCODE(PackedHalvingSubS8, U32, U32, U32 ) -//OPCODE(PackedHalvingAddU16, U32, U32, U32 ) -//OPCODE(PackedHalvingAddS16, U32, U32, U32 ) -//OPCODE(PackedHalvingSubU16, U32, U32, U32 ) -//OPCODE(PackedHalvingSubS16, U32, U32, U32 ) //OPCODE(PackedHalvingAddSubU16, U32, U32, U32 ) //OPCODE(PackedHalvingAddSubS16, U32, U32, U32 ) //OPCODE(PackedHalvingSubAddU16, U32, U32, U32 ) //OPCODE(PackedHalvingSubAddS16, U32, U32, U32 ) OPCODE(PackedSubS16, U32, U32, U32 ) +OPCODE(PackedHalvingAddU8, U32, U32, U32 ) +OPCODE(PackedHalvingAddS8, U32, U32, U32 ) +OPCODE(PackedHalvingSubU8, U32, U32, U32 ) +OPCODE(PackedHalvingSubS8, U32, U32, U32 ) +OPCODE(PackedHalvingAddU16, U32, U32, U32 ) +OPCODE(PackedHalvingAddS16, U32, U32, U32 ) +OPCODE(PackedHalvingSubU16, U32, U32, U32 ) +OPCODE(PackedHalvingSubS16, U32, U32, U32 ) OPCODE(PackedSaturatedAddU8, U32, U32, U32 ) OPCODE(PackedSaturatedAddS8, U32, U32, U32 ) OPCODE(PackedSaturatedSubU8, U32, U32, U32 ) diff --git a/src/frontend/A32/decoder/arm_a64.inc b/src/frontend/A32/decoder/arm_a64.inc index 3d76920f..69ae415a 100644 --- a/src/frontend/A32/decoder/arm_a64.inc +++ b/src/frontend/A32/decoder/arm_a64.inc @@ -272,18 +272,18 @@ INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 // Parallel Add/Subtract (Halving) instructions -//INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6 -//INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6 //INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6 //INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6 -//INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6 -//INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6 -//INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6 -//INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6 //INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6 //INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6 -//INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6 -//INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 +INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6 +INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6 +INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6 +INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6 +INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6 +INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6 +INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6 +INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 // Saturated Add/Subtract instructions //INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP