From e9c5c01edaf650c1e83f9199d5b51b527a14ba3b Mon Sep 17 00:00:00 2001
From: Wunkolo <Wunkolo@gmail.com>
Date: Fri, 28 May 2021 17:26:33 -0700
Subject: [PATCH] emit_x64{_vector}_floating_point: AVX512 implementation of
 ZeroIfNaN

Using a single `vfixupimm` to turn `QNaN`/`SNan` to `+0`
---
 src/dynarmic/backend/x64/emit_x64_floating_point.cpp       | 7 +++++++
 .../backend/x64/emit_x64_vector_floating_point.cpp         | 7 ++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp
index 9d908a08..89e9711e 100644
--- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp
@@ -16,6 +16,7 @@
 
 #include "dynarmic/backend/x64/abi.h"
 #include "dynarmic/backend/x64/block_of_code.h"
+#include "dynarmic/backend/x64/constants.h"
 #include "dynarmic/backend/x64/emit_x64.h"
 #include "dynarmic/common/assert.h"
 #include "dynarmic/common/cast_util.h"
@@ -116,6 +117,12 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
 
 template<size_t fsize>
 void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
+    if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
+        constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
+                                             FpFixup::PosZero);
+        FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
+        return;
+    }
     code.xorps(xmm_scratch, xmm_scratch);
     FCODE(cmpords)(xmm_scratch, xmm_value);  // true mask when ordered (i.e.: when not an NaN)
     code.pand(xmm_value, xmm_scratch);
diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp
index e4f77d6d..dfbd8da0 100644
--- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp
@@ -19,6 +19,7 @@
 
 #include "dynarmic/backend/x64/abi.h"
 #include "dynarmic/backend/x64/block_of_code.h"
+#include "dynarmic/backend/x64/constants.h"
 #include "dynarmic/backend/x64/emit_x64.h"
 #include "dynarmic/common/assert.h"
 #include "dynarmic/common/fp/fpcr.h"
@@ -203,7 +204,11 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) {
 template<size_t fsize>
 void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
     const Xbyak::Xmm nan_mask = xmm0;
-    if (code.HasHostFeature(HostFeature::AVX)) {
+    if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
+        constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
+                                             FpFixup::PosZero);
+        FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0));
+    } else if (code.HasHostFeature(HostFeature::AVX)) {
         FCODE(vcmpordp)(nan_mask, result, result);
         FCODE(vandp)(result, result, nan_mask);
     } else {