From 3ccc415c521f1cc21ebb8c41be5eaebd4124e848 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 12 Jun 2020 15:24:37 +0100 Subject: [PATCH] emit_x64_saturation: Improve codegen for saturated result in EmitSignedSaturation --- src/backend/x64/emit_x64_saturation.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/backend/x64/emit_x64_saturation.cpp b/src/backend/x64/emit_x64_saturation.cpp index 1320428a..c739c0af 100644 --- a/src/backend/x64/emit_x64_saturation.cpp +++ b/src/backend/x64/emit_x64_saturation.cpp @@ -219,21 +219,18 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { const u32 mask = (1u << N) - 1; const u32 positive_saturated_value = (1u << (N - 1)) - 1; const u32 negative_saturated_value = 1u << (N - 1); - const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]); // Put the appropriate saturated value in result - code.cmp(reg_a, positive_saturated_value); - code.mov(tmp, positive_saturated_value); - code.mov(result, sext_negative_satured_value); - code.cmovg(result, tmp); + code.mov(result, reg_a); + code.sar(result, 31); + code.xor_(result, positive_saturated_value); // Do the saturation code.cmp(overflow, mask);