backend/A64/emit_a64_saturation.cpp: Implement EmitSignedSaturation and EmitUnsignedSaturation

Implements SSAT SSAT16 USAT USAT16 QASX QSAX UQASX UQSAX
This commit is contained in:
SachinVin 2019-09-28 18:14:44 +05:30
parent 011d62d958
commit a6c2d1952a
4 changed files with 89 additions and 13 deletions

View File

@ -360,14 +360,13 @@ elseif(ARCHITECTURE_Aarch64)
backend/A64/emit_a64_data_processing.cpp
backend/A64/emit_a64_floating_point.cpp
backend/A64/emit_a64_packed.cpp
# backend/A64/emit_a64_saturation.cpp
backend/A64/emit_a64_saturation.cpp
# backend/A64/emit_a64_sm4.cpp
# backend/A64/emit_a64_vector.cpp
# backend/A64/emit_a64_vector_floating_point.cpp
backend/A64/hostloc.cpp
backend/A64/hostloc.h
backend/A64/jitstate_info.h
# backend/A64/oparg.h
backend/A64/opcodes.inc
backend/A64/perf_map.cpp
backend/A64/perf_map.h

View File

@ -20,5 +20,82 @@ namespace Dynarmic::BackendA64 {
namespace mp = Dynarmic::Common::mp;
namespace {
void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
if (overflow_inst) {
const auto no_overflow = IR::Value(false);
overflow_inst->ReplaceUsesWith(no_overflow);
}
ctx.reg_alloc.DefineValue(inst, args[0]);
return;
}
const u32 mask = (1u << N) - 1;
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
// Put the appropriate saturated value in result
code.MOVI2R(tmp, positive_saturated_value);
code.CMP(reg_a, tmp);
code.MOVI2R(result, sext_negative_satured_value);
code.CSEL(result, tmp, result, CC_GT);
// Do the saturation
code.CMPI2R(overflow, mask, tmp);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.MOVI2R(result, saturated_value);
code.CMP(reg_a, result);
code.CSEL(result, WZR, result, CC_LE);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

View File

@ -168,7 +168,7 @@ OPCODE(CountLeadingZeros64, U64, U64
//OPCODE(SignedSaturatedSub16, U16, U16, U16 )
//OPCODE(SignedSaturatedSub32, U32, U32, U32 )
//OPCODE(SignedSaturatedSub64, U64, U64, U64 )
//OPCODE(SignedSaturation, U32, U32, U8 )
OPCODE(SignedSaturation, U32, U32, U8 )
//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
@ -177,7 +177,7 @@ OPCODE(CountLeadingZeros64, U64, U64
//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
//OPCODE(UnsignedSaturation, U32, U32, U8 )
OPCODE(UnsignedSaturation, U32, U32, U8 )
// Packed instructions
OPCODE(PackedAddU8, U32, U32, U32 )

View File

@ -200,10 +200,10 @@ INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm
INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6
// Saturation instructions
//INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6
//INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6
//INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6
//INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6
INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6
INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6
INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6
INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6
// Divide instructions
INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a
@ -260,14 +260,14 @@ INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm
// Parallel Add/Subtract (Saturating) instructions
INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6
//INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
//INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6
INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6
INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6
INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6
//INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
//INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6
INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6