diff --git a/src/backend/A64/a32_emit_a64.cpp b/src/backend/A64/a32_emit_a64.cpp index cef7d3e7..a5a429d6 100644 --- a/src/backend/A64/a32_emit_a64.cpp +++ b/src/backend/A64/a32_emit_a64.cpp @@ -407,11 +407,19 @@ void A32EmitA64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // TODO:Inline ctx.reg_alloc.HostCall(nullptr, args[0]); + + ARM64Reg host_fpsr = ctx.reg_alloc.ScratchGpr(); if (config.always_little_endian) { code.ANDI2R(code.ABI_PARAM1, code.ABI_PARAM1, 0xFFFFFDFF, ctx.reg_alloc.ScratchGpr()); } + // Since this is one of the only places where the ~sticky~ + // guest's Q flag can be cleared it is also a great place to clear the host's Q flag + code.MRS(host_fpsr, FIELD_FPSR); + code.ANDI2R(host_fpsr, host_fpsr, ~(1 << 27)); + code._MSR(FIELD_FPSR, host_fpsr); + code.MOV(code.ABI_PARAM2, X28); code.QuickCallFunction(&SetCpsrImpl); } @@ -426,6 +434,7 @@ void A32EmitA64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitA64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ARM64Reg host_fpsr = ctx.reg_alloc.ScratchGpr(); if (args[0].IsImmediate()) { u32 imm = args[0].GetImmediateU32(); ARM64Reg a = DecodeReg(ctx.reg_alloc.ScratchGpr()); @@ -443,6 +452,13 @@ void A32EmitA64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { code.ANDI2R(a, a, 0xF0000000); code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv)); } + + // Since this is one of the only places where the ~sticky~ + // guest's Q flag can be cleared it is also a great place to clear the host's Q flag. + // TODO : possibly a better job at explaining. + code.MRS(host_fpsr, FIELD_FPSR); + code.ANDI2R(host_fpsr, host_fpsr, ~(1 << 27)); + code._MSR(FIELD_FPSR, host_fpsr); } void A32EmitA64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend/A64/emit_a64_saturation.cpp b/src/backend/A64/emit_a64_saturation.cpp index 43959517..0f02be4b 100644 --- a/src/backend/A64/emit_a64_saturation.cpp +++ b/src/backend/A64/emit_a64_saturation.cpp @@ -20,6 +20,75 @@ namespace Dynarmic::BackendA64 { namespace mp = Dynarmic::Common::mp; +namespace { + +enum class Op { + Add, + Sub, +}; + +template +void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0])); + ARM64Reg addend = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1])); + + if constexpr (op == Op::Add) { + code.fp_emitter.SQADD(size, result, result, addend); + } + else { + code.fp_emitter.SQSUB(size, result, result, addend); + } + + if (overflow_inst) { + ARM64Reg overflow = ctx.reg_alloc.ScratchGpr(); + + code.MRS(overflow, FIELD_FPSR); + code.UBFX(overflow, overflow, 27, 1); + + ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); + } + + ctx.reg_alloc.DefineValue(inst, result); +} +} // anonymous namespace + +void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitA64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); diff --git a/src/backend/A64/opcodes.inc b/src/backend/A64/opcodes.inc index cee8895a..cfb262df 100644 --- a/src/backend/A64/opcodes.inc +++ b/src/backend/A64/opcodes.inc @@ -158,16 +158,16 @@ OPCODE(CountLeadingZeros64, U64, U64 //OPCODE(MinUnsigned64, U64, U64, U64 ) // Saturated instructions -//OPCODE(SignedSaturatedAdd8, U8, U8, U8 ) -//OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) -//OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) -//OPCODE(SignedSaturatedAdd64, U64, U64, U64 ) +OPCODE(SignedSaturatedAdd8, U8, U8, U8 ) +OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) +OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) +OPCODE(SignedSaturatedAdd64, U64, U64, U64 ) //OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 ) //OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 ) -//OPCODE(SignedSaturatedSub8, U8, U8, U8 ) -//OPCODE(SignedSaturatedSub16, U16, U16, U16 ) -//OPCODE(SignedSaturatedSub32, U32, U32, U32 ) -//OPCODE(SignedSaturatedSub64, U64, U64, U64 ) +OPCODE(SignedSaturatedSub8, U8, U8, U8 ) +OPCODE(SignedSaturatedSub16, U16, U16, U16 ) +OPCODE(SignedSaturatedSub32, U32, U32, U32 ) +OPCODE(SignedSaturatedSub64, U64, U64, U64 ) OPCODE(SignedSaturation, U32, U32, U8 ) //OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 ) //OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 ) diff --git a/src/frontend/A32/decoder/arm_a64.inc b/src/frontend/A32/decoder/arm_a64.inc index eafb39a8..1440b30c 100644 --- a/src/frontend/A32/decoder/arm_a64.inc +++ b/src/frontend/A32/decoder/arm_a64.inc @@ -286,10 +286,10 @@ INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 // Saturated Add/Subtract instructions -//INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP -//INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP -//INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP -//INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP +INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP +INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP +INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP +INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP // Status Register Access instructions INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6