NVIDIA: move markers to actual IDXGISwapChain::Present runtime call

Testing revealed better latency reduction when moved directly here as we also take into account the additional buffy copy's the game does in SpinPresent, our new render thread frame limiter and and ImGui. The difference is very large when the render thread frame limiter is used. The code still passes NVIDIA's reflex testing utility after this patch.
This commit is contained in:
Kawe Mazidjatari 2023-12-26 01:42:54 +01:00
parent fde79e9f93
commit 77a20f4d9e
2 changed files with 13 additions and 15 deletions

View File

@ -115,20 +115,7 @@ void* __fastcall DispatchDrawCall(int64_t a1, uint64_t a2, int a3, int a4, int64
//---------------------------------------------------------------------------------
ssize_t SpinPresent(void)
{
// NOTE: -1 since we need to sync this with its corresponding frame, g_FrameNum
// gets incremented in CMaterialSystem::SwapBuffers, which is after the markers
// for simulation start/end and render submit start. The render thread (here)
// continues after to finish the frame.
const NvU64 frameID = (NvU64)MaterialSystem()->GetCurrentFrameCount() -1;
GFX_SetLatencyMarker(D3D11Device(), RENDERSUBMIT_END, frameID);
// TODO[ AMOS ]: move to actual Present runtime call? SpinPresent calls some
// other DX buffer copy API's before the actual Present calls is being made.
GFX_SetLatencyMarker(D3D11Device(), PRESENT_START, frameID);
const ssize_t val = v_SpinPresent();
GFX_SetLatencyMarker(D3D11Device(), PRESENT_END, frameID);
return val;
}

View File

@ -13,6 +13,7 @@
#include "engine/sys_engine.h"
#include "engine/sys_mainwind.h"
#include "inputsystem/inputsystem.h"
#include "materialsystem/cmaterialsystem.h"
#include "public/bitmap/stb_image.h"
#include "public/rendersystem/schema/texture.g.h"
@ -146,9 +147,19 @@ HRESULT __stdcall Present(IDXGISwapChain* pSwapChain, UINT nSyncInterval, UINT n
if (g_pEngine->GetQuitting() == IEngine::QUIT_NOTQUITTING)
DrawImGui();
///////////////////////////////////////////////////////////////////////////////
HRESULT result = s_fnSwapChainPresent(pSwapChain, nSyncInterval, nFlags);
///////////////////////////////////////////////////////////////////////////////
// NOTE: -1 since we need to sync this with its corresponding frame, g_FrameNum
// gets incremented in CMaterialSystem::SwapBuffers, which is after the markers
// for simulation start/end and render submit start. The render thread (here)
// continues after to finish the frame.
const NvU64 frameID = (NvU64)MaterialSystem()->GetCurrentFrameCount() - 1;
GFX_SetLatencyMarker(D3D11Device(), RENDERSUBMIT_END, frameID);
GFX_SetLatencyMarker(D3D11Device(), PRESENT_START, frameID);
const HRESULT result = s_fnSwapChainPresent(pSwapChain, nSyncInterval, nFlags);
GFX_SetLatencyMarker(D3D11Device(), PRESENT_END, frameID);
return result;
}