backend/A64/emit_a64_saturation.cpp: Implement EmitSignedSaturation and EmitUnsignedSaturation
Implements SSAT SSAT16 USAT USAT16 QASX QSAX UQASX UQSAX
This commit is contained in:
parent
011d62d958
commit
a6c2d1952a
@ -360,14 +360,13 @@ elseif(ARCHITECTURE_Aarch64)
|
||||
backend/A64/emit_a64_data_processing.cpp
|
||||
backend/A64/emit_a64_floating_point.cpp
|
||||
backend/A64/emit_a64_packed.cpp
|
||||
# backend/A64/emit_a64_saturation.cpp
|
||||
backend/A64/emit_a64_saturation.cpp
|
||||
# backend/A64/emit_a64_sm4.cpp
|
||||
# backend/A64/emit_a64_vector.cpp
|
||||
# backend/A64/emit_a64_vector_floating_point.cpp
|
||||
backend/A64/hostloc.cpp
|
||||
backend/A64/hostloc.h
|
||||
backend/A64/jitstate_info.h
|
||||
# backend/A64/oparg.h
|
||||
backend/A64/opcodes.inc
|
||||
backend/A64/perf_map.cpp
|
||||
backend/A64/perf_map.h
|
||||
|
@ -20,5 +20,82 @@ namespace Dynarmic::BackendA64 {
|
||||
|
||||
namespace mp = Dynarmic::Common::mp;
|
||||
|
||||
namespace {
|
||||
void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N >= 1 && N <= 32);
|
||||
|
||||
if (N == 32) {
|
||||
if (overflow_inst) {
|
||||
const auto no_overflow = IR::Value(false);
|
||||
overflow_inst->ReplaceUsesWith(no_overflow);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 mask = (1u << N) - 1;
|
||||
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||
const u32 negative_saturated_value = 1u << (N - 1);
|
||||
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
||||
|
||||
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||
const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||
|
||||
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||
code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
|
||||
|
||||
// Put the appropriate saturated value in result
|
||||
code.MOVI2R(tmp, positive_saturated_value);
|
||||
code.CMP(reg_a, tmp);
|
||||
code.MOVI2R(result, sext_negative_satured_value);
|
||||
code.CSEL(result, tmp, result, CC_GT);
|
||||
|
||||
// Do the saturation
|
||||
code.CMPI2R(overflow, mask, tmp);
|
||||
code.CSEL(result, reg_a, result, CC_LS);
|
||||
|
||||
if (overflow_inst) {
|
||||
code.CSET(overflow, CC_HI);
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N <= 31);
|
||||
|
||||
const u32 saturated_value = (1u << N) - 1;
|
||||
|
||||
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
|
||||
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
|
||||
|
||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||
code.MOVI2R(result, saturated_value);
|
||||
code.CMP(reg_a, result);
|
||||
code.CSEL(result, WZR, result, CC_LE);
|
||||
code.CSEL(result, reg_a, result, CC_LS);
|
||||
|
||||
if (overflow_inst) {
|
||||
code.CSET(overflow, CC_HI);
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendA64
|
||||
|
@ -168,7 +168,7 @@ OPCODE(CountLeadingZeros64, U64, U64
|
||||
//OPCODE(SignedSaturatedSub16, U16, U16, U16 )
|
||||
//OPCODE(SignedSaturatedSub32, U32, U32, U32 )
|
||||
//OPCODE(SignedSaturatedSub64, U64, U64, U64 )
|
||||
//OPCODE(SignedSaturation, U32, U32, U8 )
|
||||
OPCODE(SignedSaturation, U32, U32, U8 )
|
||||
//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
|
||||
//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
|
||||
//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
|
||||
@ -177,7 +177,7 @@ OPCODE(CountLeadingZeros64, U64, U64
|
||||
//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
|
||||
//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
|
||||
//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
|
||||
//OPCODE(UnsignedSaturation, U32, U32, U8 )
|
||||
OPCODE(UnsignedSaturation, U32, U32, U8 )
|
||||
|
||||
// Packed instructions
|
||||
OPCODE(PackedAddU8, U32, U32, U32 )
|
||||
|
@ -200,10 +200,10 @@ INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm
|
||||
INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6
|
||||
|
||||
// Saturation instructions
|
||||
//INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6
|
||||
//INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6
|
||||
//INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6
|
||||
//INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6
|
||||
INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6
|
||||
INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6
|
||||
INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6
|
||||
INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6
|
||||
|
||||
// Divide instructions
|
||||
INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a
|
||||
@ -260,14 +260,14 @@ INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm
|
||||
// Parallel Add/Subtract (Saturating) instructions
|
||||
INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
|
||||
INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6
|
||||
//INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
|
||||
//INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
|
||||
INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
|
||||
INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
|
||||
INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6
|
||||
INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6
|
||||
INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6
|
||||
INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6
|
||||
//INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
|
||||
//INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
|
||||
INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
|
||||
INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
|
||||
INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6
|
||||
INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user