From 85034beaac34bf20bef87137695b57383747d7ce Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 25 Jan 2018 18:37:03 +0000 Subject: [PATCH] emit_x64_packed: EmitPackedSubU16 modified xmm_b wasn't writeable For CPUs that didn't support SSE4.1, this was a bug. --- src/backend_x64/emit_x64_packed.cpp | 64 +++++++++++++++++------------ 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/src/backend_x64/emit_x64_packed.cpp b/src/backend_x64/emit_x64_packed.cpp index 30e31a26..8a8cb60e 100644 --- a/src/backend_x64/emit_x64_packed.cpp +++ b/src/backend_x64/emit_x64_packed.cpp @@ -194,38 +194,50 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + if (!ge_inst) { + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - if (ge_inst) { - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + code->psubw(xmm_a, xmm_b); - code->movdqa(xmm_ge, xmm_a); - code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 - code->pcmpeqw(xmm_ge, xmm_a); - - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); - ctx.EraseInstruction(ge_inst); - } else { - Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); - - // (a >= b) == !(b > a) - code->pcmpeqb(ones, ones); - code->paddw(xmm_a, code->MConst(0x80008000)); - code->paddw(xmm_b, code->MConst(0x80008000)); - code->movdqa(xmm_ge, xmm_b); - code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! - code->pxor(xmm_ge, ones); - - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); - ctx.EraseInstruction(ge_inst); - } + ctx.reg_alloc.DefineValue(inst, xmm_a); + return; } + if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + + code->movdqa(xmm_ge, xmm_a); + code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 + code->pcmpeqw(xmm_ge, xmm_a); + + code->psubw(xmm_a, xmm_b); + + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); + ctx.reg_alloc.DefineValue(inst, xmm_a); + return; + } + + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); + + // (a >= b) == !(b > a) + code->pcmpeqb(ones, ones); + code->paddw(xmm_a, code->MConst(0x80008000)); + code->paddw(xmm_b, code->MConst(0x80008000)); + code->movdqa(xmm_ge, xmm_b); + code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! + code->pxor(xmm_ge, ones); + code->psubw(xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); ctx.reg_alloc.DefineValue(inst, xmm_a); }