Implement high precision frame limiter

Limits frames at a much higher level of precision than 'fps_max' and 'fps_max_gfx', probably ideal to reduce input latency even more. Also changed the logic of the NVIDIA Reflex frame limiter, to which it would use the desktop's refresh rate if set to '-1'. The new render thread frame limiter has a similar behavior. Using desktop refresh rates on the render thread or NVIDIA Reflex frame limiter requires 'fps_max' to be set to 0 (unlimited), as it would otherwise result in a major performance drop due to a contest if fps_max_(gfx/rt) is set to a similar number as fps_max.
This commit is contained in:
Kawe Mazidjatari 2023-09-11 22:20:24 +02:00
parent fdb4a4d429
commit 07dade5d5d
8 changed files with 216 additions and 4 deletions

View File

@ -21,8 +21,11 @@ ConVar* debug_draw_box_depth_test = nullptr;
ConVar* developer = nullptr;
ConVar* fps_max = nullptr;
ConVar* fps_max_vsync = nullptr;
#ifndef DEDICATED
ConVar* fps_max_rt = nullptr;
ConVar* fps_max_rt_tolerance = nullptr;
ConVar* fps_max_gfx = nullptr;
#endif // !DEDICATED
@ -308,7 +311,9 @@ void ConVar_StaticInit(void)
r_drawWorldMeshesDepthAtTheEnd = ConVar::StaticCreate("r_drawWorldMeshesDepthAtTheEnd", "1", FCVAR_DEVELOPMENTONLY | FCVAR_CHEAT, "Render world meshes (depth at the end).", false, 0.f, false, 0.f, nullptr, nullptr);
#ifndef DEDICATED
fps_max_gfx = ConVar::StaticCreate("fps_max_gfx", "0", FCVAR_RELEASE, "Frame rate limiter using NVIDIA Reflex Low Latency SDK.", true, 0.f, false, 0.f, nullptr, nullptr);
fps_max_rt = ConVar::StaticCreate("fps_max_rt", "0", FCVAR_RELEASE, "Frame rate limiter within the render thread. -1 indicates use the desktop refresh. 0 is unlocked.", true, -1.f, false, 0.f, nullptr, nullptr);
fps_max_rt_tolerance = ConVar::StaticCreate("fps_max_rt_tolerance", "0.25", FCVAR_RELEASE, "Maximum amount of frame time before frame limiter restarts.", true, 0.f, false, 0.f, nullptr, nullptr);
fps_max_gfx = ConVar::StaticCreate("fps_max_gfx", "0", FCVAR_RELEASE, "Frame rate limiter using NVIDIA Reflex Low Latency SDK. -1 indicates use the desktop refresh. 0 is unlocked.", true, -1.f, false, 0.f, nullptr, nullptr);
gfx_nvnUseLowLatency = ConVar::StaticCreate("gfx_nvnUseLowLatency" , "1", FCVAR_RELEASE | FCVAR_ARCHIVE, "Enables NVIDIA Reflex Low Latency SDK." , false, 0.f, false, 0.f, nullptr, nullptr);
gfx_nvnUseLowLatencyBoost = ConVar::StaticCreate("gfx_nvnUseLowLatencyBoost", "1", FCVAR_RELEASE | FCVAR_ARCHIVE, "Enables NVIDIA Reflex Low Latency Boost.", false, 0.f, false, 0.f, nullptr, nullptr);
#endif // !DEDICATED
@ -495,6 +500,7 @@ void ConVar_InitShipped(void)
#endif // !CLIENT_DLL
developer = g_pCVar->FindVar("developer");
fps_max = g_pCVar->FindVar("fps_max");
fps_max_vsync = g_pCVar->FindVar("fps_max_vsync");
base_tickinterval_sp = g_pCVar->FindVar("base_tickinterval_sp");
base_tickinterval_mp = g_pCVar->FindVar("base_tickinterval_mp");
fs_showAllReads = g_pCVar->FindVar("fs_showAllReads");
@ -579,6 +585,7 @@ void ConVar_InitShipped(void)
origin_disconnectWhenOffline->RemoveFlags(FCVAR_DEVELOPMENTONLY);
discord_updatePresence->RemoveFlags(FCVAR_DEVELOPMENTONLY);
#endif // !DEDICATED
fps_max_vsync->RemoveFlags(FCVAR_DEVELOPMENTONLY);
base_tickinterval_sp->RemoveFlags(FCVAR_DEVELOPMENTONLY);
base_tickinterval_mp->RemoveFlags(FCVAR_DEVELOPMENTONLY);

View File

@ -12,8 +12,11 @@ extern ConVar* debug_draw_box_depth_test;
extern ConVar* developer;
extern ConVar* fps_max;
extern ConVar* fps_max_vsync;
#ifndef DEDICATED
extern ConVar* fps_max_rt;
extern ConVar* fps_max_rt_tolerance;
extern ConVar* fps_max_gfx;
#endif // !DEDICATED

View File

@ -27,6 +27,8 @@ add_sources( SOURCE_GROUP "Debug"
)
add_sources( SOURCE_GROUP "Render"
"framelimit.cpp"
"framelimit.h"
"gl_matsysiface.h"
"gl_model_private.h"
"gl_rmain.cpp"

147
r5dev/engine/framelimit.cpp Normal file
View File

@ -0,0 +1,147 @@
//===========================================================================//
//
// Purpose: High-precision render-thread based frame rate limiter
//
//===========================================================================//
#include <dwmapi.h>
#include "tier0/platform_internal.h"
#include "windows/id3dx.h"
#include "sys_mainwind.h"
#include "framelimit.h"
//-----------------------------------------------------------------------------
// Purpose: constructor
//-----------------------------------------------------------------------------
CFrameLimit::CFrameLimit(void)
{
m_MilliSeconds = 0.0;
m_FramesPerSecond = 0.0;
//effective_ms = 0.0;
//m_Last.QuadPart = 0;
m_Next.QuadPart = 0;
m_Time.QuadPart = 0;
m_Frames = 0;
m_bRestart = false;
}
//-----------------------------------------------------------------------------
// Purpose: initializer
// Input : targetFps -
//-----------------------------------------------------------------------------
void CFrameLimit::Reset(double targetFps)
{
m_MilliSeconds = 1000.0 / targetFps;
m_FramesPerSecond = targetFps;
QueryPerformanceCounter(&m_Start);
m_Next.QuadPart = 0ULL;
m_Time.QuadPart = 0ULL;
//m_Last.QuadPart = m_Start.QuadPart - (LONGLONG)((m_MilliSeconds / 1000.0) * g_pPerformanceFrequency->QuadPart);
m_Next.QuadPart = m_Start.QuadPart + (LONGLONG)((m_MilliSeconds / 1000.0) * g_pPerformanceFrequency->QuadPart);
m_Frames = 0;
}
//-----------------------------------------------------------------------------
// Purpose: halts the thread until the next vertical blank occurs
// Output : true on success, false otherwise
//-----------------------------------------------------------------------------
bool CFrameLimit::WaitForVBlank(void)
{
IDXGIOutput* dxgiOutput;
IDXGISwapChain* swapChain = D3D11SwapChain();
if (swapChain != nullptr &&
SUCCEEDED(swapChain->GetContainingOutput(&dxgiOutput)))
{
DwmFlush();
dxgiOutput->WaitForVBlank();
return true;
}
return false;
}
//-----------------------------------------------------------------------------
// Purpose: runs the frame limiter logic
//-----------------------------------------------------------------------------
void CFrameLimit::Run(void)
{
float targetFps = fps_max_rt->GetFloat();
if (targetFps == 0.0f)
return;
const float globalFps = fps_max->GetFloat();
// Make sure the global fps limiter is 'unlimited'
// before we let the rt frame limiter cap it to
// the desktop's refresh rate; not adhering to
// this will result in a major performance drop.
if (globalFps == 0.0f && targetFps == -1)
{
float desktopRefreshRate = (float)g_pGame->GetDesktopRefreshRate();
//if (dekstopRefreshRate == 59.0f || dekstopRefreshRate == 60.0f)
//{
// desktopRefreshRate = 59.939999f;
//}
targetFps = desktopRefreshRate;
}
if (m_FramesPerSecond != targetFps)
Reset(targetFps);
if (targetFps == 0)
return;
m_Frames++;
QueryPerformanceCounter(&m_Time);
// Actual frametime before we forced a delay
//m_EffectiveMilliSeconds = 1000.0 * ((double)(m_Time.QuadPart - m_Last.QuadPart) / (double)g_pPerformanceFrequency->QuadPart);
if ((double)(m_Time.QuadPart - m_Next.QuadPart) / (double)g_pPerformanceFrequency->QuadPart / (m_MilliSeconds / 1000.0) > (fps_max_rt_tolerance->GetFloat() * m_FramesPerSecond))
{
DevMsg(eDLL_T::ENGINE, "%s: Frame time too long (expected: %3.01fx); restarting...\n",
__FUNCTION__, (double)(m_Time.QuadPart - m_Next.QuadPart) / (double)freq.QuadPart / (m_MilliSeconds / 1000.0) / m_FramesPerSecond );
m_bRestart = true;
}
if (m_bRestart)
{
m_Frames = 0;
m_Start.QuadPart = m_Time.QuadPart + (LONGLONG)((m_MilliSeconds / 1000.0) * (double)g_pPerformanceFrequency->QuadPart);
m_bRestart = false;
//Reset (targetFps);
//return;
}
ID3D11Device* pDevice = D3D11Device();
m_Next.QuadPart = (LONGLONG)((m_Start.QuadPart + (double)m_Frames * (m_MilliSeconds / 1000.0) * (double)g_pPerformanceFrequency->QuadPart));
if (m_Next.QuadPart > 0ULL)
{
while (m_Time.QuadPart < m_Next.QuadPart)
{
if ((double)(m_Next.QuadPart - m_Time.QuadPart) > (0.016666666667 * (double)g_pPerformanceFrequency->QuadPart))
{
WaitForVBlank();
}
QueryPerformanceCounter(&m_Time);
}
if (pDevice != nullptr)
D3D11Device()->Release();
}
//m_Last.QuadPart = m_Time.QuadPart;
}
CFrameLimit g_FrameLimiter;

31
r5dev/engine/framelimit.h Normal file
View File

@ -0,0 +1,31 @@
#ifndef FRAMELIMIT_H
#define FRAMELIMIT_H
//-----------------------------------------------------------------------------
// RenderThread frame limiter
//-----------------------------------------------------------------------------
class CFrameLimit
{
public:
CFrameLimit(void);
void Reset(double target);
void Run(void);
bool WaitForVBlank(void);
private:
double m_MilliSeconds;
double m_FramesPerSecond;
//double m_EffectiveMilliSeconds;
//LARGE_INTEGER m_Last;
LARGE_INTEGER m_Start;
LARGE_INTEGER m_Next;
LARGE_INTEGER m_Time;
uint32_t m_Frames;
bool m_bRestart;
};
extern CFrameLimit g_FrameLimiter;
#endif // FRAMELIMIT_H

View File

@ -15,11 +15,12 @@
#include "engine/sys_dll2.h"
#include "engine/host_cmd.h"
#include "engine/traceinit.h"
#include "rtech/rtech_utils.h"
#ifndef DEDICATED
#include "engine/sys_mainwind.h"
#include "windows/id3dx.h"
#include "client/vengineclient_impl.h"
#endif // !DEDICATED
#include "rtech/rtech_utils.h"
#include "filesystem/filesystem.h"
constexpr char DFS_ENABLE_PATH[] = "/vpk/enable.txt";
@ -173,7 +174,26 @@ bool CEngineAPI::MainLoop()
#ifndef DEDICATED
const bool bUseLowLatencyMode = gfx_nvnUseLowLatency->GetBool();
const bool bUseLowLatencyBoost = gfx_nvnUseLowLatencyBoost->GetBool();
const float fpsMax = fps_max_gfx->GetFloat();
float fpsMax = fps_max_gfx->GetFloat();
if (fpsMax == -1.0f)
{
const float globalFps = fps_max->GetFloat();
// Make sure the global fps limiter is 'unlimited'
// before we let the gfx frame limiter cap it to
// the desktop's refresh rate; not adhering to
// this will result in a major performance drop.
if (globalFps == 0.0f)
{
fpsMax = (float)g_pGame->GetDesktopRefreshRate();
}
else // Don't let NVIDIA limit the frame rate.
{
fpsMax = 0.0f;
}
}
NV_SET_SLEEP_MODE_PARAMS_V1 params = {};
params.version = NV_SET_SLEEP_MODE_PARAMS_VER1;

View File

@ -8,6 +8,7 @@
#include "windows/input.h"
#include "gameui/IConsole.h"
#include "gameui/IBrowser.h"
#include "engine/framelimit.h"
#include "engine/sys_mainwind.h"
#include "inputsystem/inputsystem.h"
#include "public/bitmap/stb_image.h"
@ -139,6 +140,7 @@ HRESULT __stdcall Present(IDXGISwapChain* pSwapChain, UINT nSyncInterval, UINT n
g_bImGuiInitialized = true;
}
g_FrameLimiter.Run();
DrawImGui();
///////////////////////////////////////////////////////////////////////////////
HRESULT result = s_fnSwapChainPresent(pSwapChain, nSyncInterval, nFlags);

View File

@ -23,7 +23,7 @@ typedef HRESULT(__stdcall* IDXGIResizeBuffers) (IDXGISwapChain* pSwapChain, UI
/////////////////////////////////////////////////////////////////////////////
// Globals
extern BOOL g_bImGuiInitialized;
extern UINT g_nWindowRect[2];
extern UINT g_nWindowRect[2]; // TODO[ AMOS ]: Remove this in favor of CGame's window rect members???
/////////////////////////////////////////////////////////////////////////////
// Enums