diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0887b4eb..c9ef4fcb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,6 +28,8 @@ add_library(dynarmic common/fp/op/FPMulAdd.h common/fp/op/FPRecipEstimate.cpp common/fp/op/FPRecipEstimate.h + common/fp/op/FPRecipExponent.cpp + common/fp/op/FPRecipExponent.h common/fp/op/FPRecipStepFused.cpp common/fp/op/FPRecipStepFused.h common/fp/op/FPRoundInt.cpp diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index b6f8a06c..c499e20f 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -719,6 +719,23 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { EmitFPRecipEstimate(code, ctx, inst); } +template +static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, args[0]); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRecipExponent); +} + +void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipExponent(code, ctx, inst); +} + +void EmitX64::EmitFPRecipExponent64(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipExponent(code, ctx, inst); +} + template static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { using FPT = mp::unsigned_integer_of_size; diff --git a/src/common/fp/op.h b/src/common/fp/op.h index f7232407..c792e50e 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -8,6 +8,7 @@ #include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPRecipEstimate.h" +#include "common/fp/op/FPRecipExponent.h" #include "common/fp/op/FPRecipStepFused.h" #include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRSqrtEstimate.h" diff --git a/src/common/fp/op/FPRecipExponent.cpp b/src/common/fp/op/FPRecipExponent.cpp new file mode 100644 index 00000000..eaa7addb --- /dev/null +++ b/src/common/fp/op/FPRecipExponent.cpp @@ -0,0 +1,70 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/common_types.h" +#include "common/bit_util.h" +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/info.h" +#include "common/fp/op/FPRecipExponent.h" +#include "common/fp/process_nan.h" +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { +namespace { +// We don't care about unreachable code warnings here +// TODO: Remove this warning disabling of warnings when +// half-float support is added. +#ifdef _MSC_VER +#pragma warning(disable:4702) +#endif +template +FPT DetermineExponentValue(size_t value) { + if constexpr (sizeof(FPT) == sizeof(u32)) { + return static_cast(Common::Bits<23, 30>(value)); + } + + if constexpr (sizeof(FPT) == sizeof(u64)) { + return static_cast(Common::Bits<52, 62>(value)); + } + + // Half-float + return static_cast(Common::Bits<10, 14>(value)); +} +#ifdef _MSC_VER +#pragma warning(default:4702) +#endif +} // Anonymous namespace + +template +FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) { + const auto [type, sign, value] = FPUnpack(op, fpcr, fpsr); + (void)value; + + if (type == FPType::SNaN || type == FPType::QNaN) { + return FPProcessNaN(type, op, fpcr, fpsr); + } + + const FPT sign_bits = FPInfo::Zero(sign); + const FPT exponent = DetermineExponentValue(op); + + // Zero and denormals + if (exponent == 0) { + const FPT max_exponent = Common::Ones(FPInfo::exponent_width) - 1; + return FPT(sign_bits | (max_exponent << FPInfo::explicit_mantissa_width)); + } + + // Infinities and normals + const auto negated_exponent = (~exponent << FPInfo::explicit_mantissa_width) & FPInfo::exponent_mask; + return FPT(sign_bits | negated_exponent); +} + +template u32 FPRecipExponent(u32 op, FPCR fpcr, FPSR& fpsr); +template u64 FPRecipExponent(u64 op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPRecipExponent.h b/src/common/fp/op/FPRecipExponent.h new file mode 100644 index 00000000..285d0e9d --- /dev/null +++ b/src/common/fp/op/FPRecipExponent.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2019 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::FP { + +class FPCR; +class FPSR; + +template +FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 92f5e008..2b24778f 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -423,7 +423,7 @@ INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "01011 //INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") //INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") -//INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") +INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") //INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") //INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp index ebb621c1..b8283d7c 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp @@ -170,6 +170,16 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) { + const size_t esize = sz ? 64 : 32; + + const IR::U32U64 operand = V_scalar(esize, Vn); + const IR::U32U64 result = ir.FPRecipExponent(operand); + + V_scalar(esize, Vd, result); + return true; +} + bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) { const size_t esize = sz ? 64 : 32; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 46b27c0c..6452e7f7 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1895,6 +1895,13 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) { return Inst(Opcode::FPRecipEstimate64, a); } +U32U64 IREmitter::FPRecipExponent(const U32U64& a) { + if (a.GetType() == Type::U32) { + return Inst(Opcode::FPRecipExponent32, a); + } + return Inst(Opcode::FPRecipExponent64, a); +} + U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPRecipStepFused32, a, b); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 5bfb7484..f58c6efa 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -305,6 +305,7 @@ public: U32U64 FPMulX(const U32U64& a, const U32U64& b); U32U64 FPNeg(const U32U64& a); U32U64 FPRecipEstimate(const U32U64& a); + U32U64 FPRecipExponent(const U32U64& a); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRSqrtEstimate(const U32U64& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 54ea8504..7306e3d0 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -484,6 +484,8 @@ OPCODE(FPNeg32, U32, U32 OPCODE(FPNeg64, U64, U64 ) OPCODE(FPRecipEstimate32, U32, U32 ) OPCODE(FPRecipEstimate64, U64, U64 ) +OPCODE(FPRecipExponent32, U32, U32 ) +OPCODE(FPRecipExponent64, U64, U64 ) OPCODE(FPRecipStepFused32, U32, U32, U32 ) OPCODE(FPRecipStepFused64, U64, U64, U64 ) OPCODE(FPRoundInt32, U32, U32, U8, U1 )