diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index bf485cc9..69fb3cf9 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -3829,29 +3829,29 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR:: ctx.EraseInstruction(lower_inst); } + const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); + + if (code.HasHostFeature(HostFeature::AVX)) { + code.vpsrlw(lower_tmp, lower_tmp, 15); + code.vpaddw(upper_tmp, upper_tmp, upper_tmp); + code.vpor(upper_result, upper_tmp, lower_tmp); + code.vpcmpeqw(upper_tmp, upper_result, code.XmmBConst<16>(xword, 0x8000)); + code.vpxor(upper_result, upper_result, upper_tmp); + } else { + code.paddw(upper_tmp, upper_tmp); + code.psrlw(lower_tmp, 15); + code.movdqa(upper_result, upper_tmp); + code.por(upper_result, lower_tmp); + code.movdqa(upper_tmp, code.XmmBConst<16>(xword, 0x8000)); + code.pcmpeqw(upper_tmp, upper_result); + code.pxor(upper_result, upper_tmp); + } + + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + code.pmovmskb(bit, upper_tmp); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + if (upper_inst) { - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); - - if (code.HasHostFeature(HostFeature::AVX)) { - code.vpsrlw(lower_tmp, lower_tmp, 15); - code.vpaddw(upper_tmp, upper_tmp, upper_tmp); - code.vpor(upper_result, upper_tmp, lower_tmp); - code.vpcmpeqw(upper_tmp, upper_result, code.XmmBConst<16>(xword, 0x8000)); - code.vpxor(upper_result, upper_result, upper_tmp); - } else { - code.paddw(upper_tmp, upper_tmp); - code.psrlw(lower_tmp, 15); - code.movdqa(upper_result, upper_tmp); - code.por(upper_result, lower_tmp); - code.movdqa(upper_tmp, code.XmmBConst<16>(xword, 0x8000)); - code.pcmpeqw(upper_tmp, upper_result); - code.pxor(upper_result, upper_tmp); - } - - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); - code.pmovmskb(bit, upper_tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(upper_inst, upper_result); ctx.EraseInstruction(upper_inst); } @@ -3880,23 +3880,23 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR:: code.vpaddq(odds, odds, odds); code.vpaddq(even, even, even); + const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); + + code.vpsrlq(upper_result, odds, 32); + code.vblendps(upper_result, upper_result, even, 0b1010); + + const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.vpcmpeqd(mask, upper_result, code.XmmBConst<32>(xword, 0x80000000)); + code.vpxor(upper_result, upper_result, mask); + code.pmovmskb(bit, mask); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + + ctx.reg_alloc.Release(mask); + ctx.reg_alloc.Release(bit); + if (upper_inst) { - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); - - code.vpsrlq(upper_result, odds, 32); - code.vblendps(upper_result, upper_result, even, 0b1010); - - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); - - code.vpcmpeqd(mask, upper_result, code.XmmBConst<32>(xword, 0x80000000)); - code.vpxor(upper_result, upper_result, mask); - code.pmovmskb(bit, mask); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - - ctx.reg_alloc.Release(mask); - ctx.reg_alloc.Release(bit); - ctx.reg_alloc.DefineValue(upper_inst, upper_result); ctx.EraseInstruction(upper_inst); } @@ -3955,15 +3955,15 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR:: code.por(lower_result, x); code.psubd(upper_result, sign_correction); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000)); + code.pcmpeqd(tmp, upper_result); + code.pxor(upper_result, tmp); + code.pmovmskb(bit, tmp); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + if (upper_inst) { - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); - - code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000)); - code.pcmpeqd(tmp, upper_result); - code.pxor(upper_result, tmp); - code.pmovmskb(bit, tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(upper_inst, upper_result); ctx.EraseInstruction(upper_inst); } diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp index 79520569..d6ac341b 100644 --- a/tests/A64/a64.cpp +++ b/tests/A64/a64.cpp @@ -1179,3 +1179,23 @@ TEST_CASE("A64: Memory access (fastmem)", "[a64]") { jit.Run(); REQUIRE(strncmp(backing_memory + 0x100, backing_memory + 0x1F0, 23) == 0); } + +TEST_CASE("A64: SQRDMULH QC flag when output invalidated", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x0fbcd38b); // SQRDMULH.2S V11, V28, V28[1] + env.code_mem.emplace_back(0x7ef0f8eb); // FMINP.2D D11, V7 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetVector(7, {0xb1b5'd0b1'4e54'e281, 0xb4cb'4fec'8563'1032}); + jit.SetVector(28, {0x8000'0000'0000'0000, 0x0000'0000'0000'0000}); + jit.SetFpcr(0x05400000); + + env.ticks_left = 3; + jit.Run(); + + REQUIRE(jit.GetFpsr() == 0x08000000); + REQUIRE(jit.GetVector(11) == Vector{0xb4cb'4fec'8563'1032, 0x0000'0000'0000'0000}); +} diff --git a/tests/A64/fuzz_with_unicorn.cpp b/tests/A64/fuzz_with_unicorn.cpp index 0307751c..847a2ae9 100644 --- a/tests/A64/fuzz_with_unicorn.cpp +++ b/tests/A64/fuzz_with_unicorn.cpp @@ -211,7 +211,7 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv fmt::print("{:3s}: {:016x}\n", A64::RegToString(static_cast(i)), regs[i]); } for (size_t i = 0; i < vecs.size(); ++i) { - fmt::print("{:3s}: {}{}\n", A64::VecToString(static_cast(i)), vecs[i][1], vecs[i][0]); + fmt::print("{:3s}: {:016x}{:016x}\n", A64::VecToString(static_cast(i)), vecs[i][1], vecs[i][0]); } fmt::print("sp : {:016x}\n", initial_sp); fmt::print("pc : {:016x}\n", instructions_start);