diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 512e283f..f9782fb2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,6 +25,8 @@ add_library(dynarmic common/fp/op.h common/fp/op/FPMulAdd.cpp common/fp/op/FPMulAdd.h + common/fp/op/FPRecipEstimate.cpp + common/fp/op/FPRecipEstimate.h common/fp/op/FPRoundInt.cpp common/fp/op/FPRoundInt.h common/fp/op/FPRSqrtEstimate.cpp diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 007f6438..85235399 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -813,6 +813,23 @@ void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) { EmitFPMulAddFallback(code, ctx, inst); } +template +static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, args[0]); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRecipEstimate); +} + +void EmitX64::EmitFPRecipEstimate32(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipEstimate(code, ctx, inst); +} + +void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipEstimate(code, ctx, inst); +} + static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) { const auto rounding = static_cast(inst->GetArg(1).GetU8()); const bool exact = inst->GetArg(2).GetU1(); diff --git a/src/common/fp/op.h b/src/common/fp/op.h index b74cc14d..6f8749ff 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -7,6 +7,7 @@ #pragma once #include "common/fp/op/FPMulAdd.h" +#include "common/fp/op/FPRecipEstimate.h" #include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRSqrtEstimate.h" #include "common/fp/op/FPRSqrtStepFused.h" diff --git a/src/common/fp/op/FPRecipEstimate.cpp b/src/common/fp/op/FPRecipEstimate.cpp new file mode 100644 index 00000000..3cfdd799 --- /dev/null +++ b/src/common/fp/op/FPRecipEstimate.cpp @@ -0,0 +1,121 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/info.h" +#include "common/fp/op/FPRecipEstimate.h" +#include "common/fp/process_exception.h" +#include "common/fp/process_nan.h" +#include "common/fp/unpacked.h" +#include "common/safe_ops.h" + +namespace Dynarmic::FP { + +/// Input is a u0.9 fixed point number. Only values in [0.5, 1.0) are valid. +/// Output is a u0.8 fixed point number, with an implied 1 prefixed. +/// i.e.: The output is a value in [1.0, 2.0). +static u8 RecipEstimate(u64 a) { + constexpr u64 offset = 256; + using LUT = std::array; + + static const LUT lut = [] { + LUT result{}; + for (u64 i = 0; i < result.size(); i++) { + u64 a = i + offset; + + a = a * 2 + 1; + u64 b = (1u << 19) / a; + result[i] = static_cast((b + 1) / 2); + } + return result; + }(); + + return lut[a - offset]; +} + +template +FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { + FPType type; + bool sign; + FPUnpacked value; + std::tie(type, sign, value) = FPUnpack(op, fpcr, fpsr); + + if (type == FPType::SNaN || type == FPType::QNaN) { + return FPProcessNaN(type, op, fpcr, fpsr); + } + + if (type == FPType::Infinity) { + return FPInfo::Zero(sign); + } + + if (type == FPType::Zero) { + FPProcessException(FPExc::DivideByZero, fpcr, fpsr); + return FPInfo::Infinity(sign); + } + + if (value.exponent < FPInfo::exponent_min - 2) { + const bool overflow_to_inf = [&]{ + switch (fpcr.RMode()) { + case RoundingMode::ToNearest_TieEven: + return true; + case RoundingMode::TowardsPlusInfinity: + return !sign; + case RoundingMode::TowardsMinusInfinity: + return sign; + case RoundingMode::TowardsZero: + return false; + default: + UNREACHABLE(); + } + return false; + }(); + + FPProcessException(FPExc::Overflow, fpcr, fpsr); + FPProcessException(FPExc::Inexact, fpcr, fpsr); + return overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); + } + + if ((fpcr.FZ() && !std::is_same_v) || (fpcr.FZ16() && std::is_same_v)) { + if (value.exponent >= -FPInfo::exponent_min) { + fpsr.UFC(true); + return FPInfo::Zero(sign); + } + } + + const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - 8); + u64 estimate = static_cast(RecipEstimate(scaled)) << (FPInfo::explicit_mantissa_width - 8); + int result_exponent = -value.exponent; + if (result_exponent < FPInfo::exponent_min) { + switch (result_exponent) { + case (FPInfo::exponent_min - 1): + estimate |= FPInfo::implicit_leading_bit; + estimate >>= 1; + break; + case (FPInfo::exponent_min - 2): + estimate |= FPInfo::implicit_leading_bit; + estimate >>= 2; + result_exponent = 0; + break; + default: + UNREACHABLE(); + } + } + + const FPT bits_exponent = static_cast(result_exponent + FPInfo::exponent_bias); + const FPT bits_mantissa = static_cast(estimate); + return (bits_exponent << FPInfo::explicit_mantissa_width) | (bits_mantissa & FPInfo::mantissa_mask); +} + +template u32 FPRecipEstimate(u32 op, FPCR fpcr, FPSR& fpsr); +template u64 FPRecipEstimate(u64 op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPRecipEstimate.h b/src/common/fp/op/FPRecipEstimate.h new file mode 100644 index 00000000..61bcecfd --- /dev/null +++ b/src/common/fp/op/FPRecipEstimate.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::FP { + +class FPCR; +class FPSR; + +template +FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 8be07c93..fc69608e 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1488,6 +1488,13 @@ U32U64 IREmitter::FPNeg(const U32U64& a) { } } +U32U64 IREmitter::FPRecipEstimate(const U32U64& a) { + if (a.GetType() == Type::U32) { + return Inst(Opcode::FPRecipEstimate32, a); + } + return Inst(Opcode::FPRecipEstimate64, a); +} + U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPRoundInt32, a, static_cast(rounding), Imm1(exact)); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 94c23038..85747d1b 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -269,6 +269,7 @@ public: U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPMulAdd(const U32U64& addend, const U32U64& op1, const U32U64& op2, bool fpscr_controlled); U32U64 FPNeg(const U32U64& a); + U32U64 FPRecipEstimate(const U32U64& a); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRSqrtEstimate(const U32U64& a); U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 409bb2f8..1f115310 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -394,6 +394,8 @@ OPCODE(FPMulAdd32, T::U32, T::U32, OPCODE(FPMulAdd64, T::U64, T::U64, T::U64, T::U64 ) OPCODE(FPNeg32, T::U32, T::U32 ) OPCODE(FPNeg64, T::U64, T::U64 ) +OPCODE(FPRecipEstimate32, T::U32, T::U32 ) +OPCODE(FPRecipEstimate64, T::U64, T::U64 ) OPCODE(FPRoundInt32, T::U32, T::U32, T::U8, T::U1 ) OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 ) OPCODE(FPRSqrtEstimate32, T::U32, T::U32 )