Improve threadtools

* Added and utilized auto lock/unlock mechanism.
* Rebuild CThreadFastMutex's 'Lock' and 'Unlock' methods in the SDK.
* Forced thread intrinsics and ID checkers as inline.
This commit is contained in:
Kawe Mazidjatari 2023-07-02 11:38:36 +02:00
parent bfd093d22b
commit 9b74ac2e53
4 changed files with 222 additions and 89 deletions

View File

@ -28,8 +28,8 @@ std::unordered_set<MDLHandle_t> g_vBadMDLHandles;
//-----------------------------------------------------------------------------
studiohdr_t* CMDLCache::FindMDL(CMDLCache* cache, MDLHandle_t handle, void* a3)
{
studiohdr_t* pStudioHdr; // rax
studiodata_t* pStudioData = cache->GetStudioData(handle);
studiohdr_t* pStudioHdr;
if (pStudioData)
{
@ -116,7 +116,8 @@ void CMDLCache::FindCachedMDL(CMDLCache* cache, studiodata_t* pStudioData, void*
{
if (a3)
{
pStudioData->m_Mutex.WaitForLock();
AUTO_LOCK(pStudioData->m_Mutex);
*(_QWORD*)((int64_t)a3 + 0x880) = *(_QWORD*)&pStudioData->pad[0x24];
int64_t v6 = *(_QWORD*)&pStudioData->pad[0x24];
if (v6)
@ -124,7 +125,6 @@ void CMDLCache::FindCachedMDL(CMDLCache* cache, studiodata_t* pStudioData, void*
*(_QWORD*)&pStudioData->pad[0x24] = (int64_t)a3;
*(_QWORD*)((int64_t)a3 + 0x870) = (int64_t)cache;
*(_WORD*)((int64_t)a3 + 0x888) = pStudioData->m_Handle;
pStudioData->m_Mutex.ReleaseWaiter();
}
}
@ -138,14 +138,13 @@ void CMDLCache::FindCachedMDL(CMDLCache* cache, studiodata_t* pStudioData, void*
//-----------------------------------------------------------------------------
studiohdr_t* CMDLCache::FindUncachedMDL(CMDLCache* cache, MDLHandle_t handle, studiodata_t* pStudioData, void* a4)
{
studiohdr_t* pStudioHdr; // rdi
studiohdr_t** pAnimData; // rax
AUTO_LOCK(pStudioData->m_Mutex);
pStudioData->m_Mutex.WaitForLock();
const char* szModelName = cache->GetModelName(handle);
size_t nFileNameLen = strlen(szModelName);
studiohdr_t* pStudioHdr;
if (nFileNameLen < 5 ||
(Q_stricmp(&szModelName[nFileNameLen - 5], ".rmdl") != 0) &&
(Q_stricmp(&szModelName[nFileNameLen - 5], ".rrig") != 0) &&
@ -160,7 +159,6 @@ studiohdr_t* CMDLCache::FindUncachedMDL(CMDLCache* cache, MDLHandle_t handle, st
Error(eDLL_T::ENGINE, NO_ERROR, "Attempted to load old model \"%s\"; replacing with \"%s\".\n", szModelName, ERROR_MODEL);
}
pStudioData->m_Mutex.ReleaseWaiter();
return pStudioHdr;
}
@ -170,7 +168,7 @@ studiohdr_t* CMDLCache::FindUncachedMDL(CMDLCache* cache, MDLHandle_t handle, st
if (!pStudioData->m_MDLCache)
{
pAnimData = (studiohdr_t**)pStudioData->m_pAnimData;
studiohdr_t** pAnimData = (studiohdr_t**)pStudioData->m_pAnimData;
if (pAnimData)
{
pStudioHdr = *pAnimData;
@ -186,7 +184,6 @@ studiohdr_t* CMDLCache::FindUncachedMDL(CMDLCache* cache, MDLHandle_t handle, st
Error(eDLL_T::ENGINE, NO_ERROR, "Model \"%s\" not found; replacing with \"%s\".\n", szModelName, ERROR_MODEL);
}
pStudioData->m_Mutex.ReleaseWaiter();
return pStudioHdr;
}
}
@ -224,7 +221,6 @@ studiohdr_t* CMDLCache::FindUncachedMDL(CMDLCache* cache, MDLHandle_t handle, st
}
}
pStudioData->m_Mutex.ReleaseWaiter();
return pStudioHdr;
}

View File

@ -375,4 +375,17 @@ protected:
}
#define ExecuteOnce( x ) ExecuteNTimes( 1, x )
#define ExecuteOnce( x ) ExecuteNTimes( 1, x )
#define UID_PREFIX generated_id_
#define UID_CAT1(a,c) a ## c
#define UID_CAT2(a,c) UID_CAT1(a,c)
#define EXPAND_CONCAT(a,c) UID_CAT1(a,c)
#ifdef _MSC_VER
#define UNIQUE_ID UID_CAT2(UID_PREFIX,__COUNTER__)
#else
#define UNIQUE_ID UID_CAT2(UID_PREFIX,__LINE__)
#endif
#define _MKSTRING(arg) #arg
#define MKSTRING(arg) _MKSTRING(arg)

View File

@ -53,11 +53,6 @@ inline void ThreadPause()
#endif
}
bool ThreadInMainThread();
bool ThreadInRenderThread();
bool ThreadInServerFrameThread();
ThreadId_t ThreadGetCurrentId();
//-----------------------------------------------------------------------------
//
// Interlock methods. These perform very fast atomic thread
@ -65,10 +60,71 @@ ThreadId_t ThreadGetCurrentId();
//
//-----------------------------------------------------------------------------
int32 ThreadInterlockedCompareExchange(LONG volatile* pDest, int32 value, int32 comperand);
bool ThreadInterlockedAssignIf(LONG volatile* p, int32 value, int32 comperand);
int64 ThreadInterlockedCompareExchange64(int64 volatile* pDest, int64 value, int64 comperand);
bool ThreadInterlockedAssignIf64(int64 volatile* pDest, int64 value, int64 comperand);
FORCEINLINE int32 ThreadInterlockedCompareExchange(LONG volatile* pDest, int32 value, int32 comperand)
{
return _InterlockedCompareExchange(pDest, comperand, value);
}
FORCEINLINE bool ThreadInterlockedAssignIf(LONG volatile* p, int32 value, int32 comperand)
{
Assert((size_t)p % 4 == 0);
return _InterlockedCompareExchange(p, comperand, value);
}
FORCEINLINE int64 ThreadInterlockedCompareExchange64(int64 volatile* pDest, int64 value, int64 comperand)
{
return _InterlockedCompareExchange64(pDest, comperand, value);
}
FORCEINLINE bool ThreadInterlockedAssignIf64(int64 volatile* pDest, int64 value, int64 comperand)
{
return _InterlockedCompareExchange64(pDest, comperand, value);
}
//-----------------------------------------------------------------------------
//
// Thread checking methods.
//
//-----------------------------------------------------------------------------
#ifndef BUILDING_MATHLIB
inline ThreadId_t* g_ThreadMainThreadID = nullptr;
inline ThreadId_t g_ThreadRenderThreadID = NULL;
inline ThreadId_t* g_ThreadServerFrameThreadID = nullptr;
FORCEINLINE ThreadId_t ThreadGetCurrentId()
{
#ifdef _WIN32
return GetCurrentThreadId();
#elif defined( _PS3 )
sys_ppu_thread_t th = 0;
sys_ppu_thread_get_id(&th);
return th;
#elif defined(POSIX)
return (ThreadId_t)pthread_self();
#else
Assert(0);
DebuggerBreak();
return 0;
#endif
}
FORCEINLINE bool ThreadInMainThread()
{
return (ThreadGetCurrentId() == (*g_ThreadMainThreadID));
}
FORCEINLINE bool ThreadInRenderThread()
{
return (ThreadGetCurrentId() == g_ThreadRenderThreadID);
}
FORCEINLINE bool ThreadInServerFrameThread()
{
return (ThreadGetCurrentId() == (*g_ThreadServerFrameThreadID));
}
#endif // !BUILDING_MATHLIB
#ifdef _WIN32
#define NOINLINE
@ -204,31 +260,17 @@ typedef CInterlockedIntT<unsigned> CInterlockedUInt;
#ifndef BUILDING_MATHLIB
//=============================================================================
class CThreadFastMutex;
inline CMemory p_MutexInternal_WaitForLock;
inline auto v_MutexInternal_WaitForLock = p_MutexInternal_WaitForLock.RCast<int (*)(CThreadFastMutex* mutex)>();
inline CMemory p_MutexInternal_ReleaseWaiter;
inline auto v_MutexInternal_ReleaseWaiter = p_MutexInternal_ReleaseWaiter.RCast<int (*)(CThreadFastMutex* mutex)>();
inline CMemory p_DeclareCurrentThreadIsMainThread;
inline auto v_DeclareCurrentThreadIsMainThread = p_DeclareCurrentThreadIsMainThread.RCast<ThreadId_t (*)(void)>();
inline ThreadId_t* g_ThreadMainThreadID = nullptr;
inline ThreadId_t g_ThreadRenderThreadID = NULL;
inline ThreadId_t* g_ThreadServerFrameThreadID = nullptr;
#endif // !BUILDING_MATHLIB
///////////////////////////////////////////////////////////////////////////////
class CThreadFastMutex
{
public:
int WaitForLock(void) {
return v_MutexInternal_WaitForLock(this);
}
int ReleaseWaiter(void) {
return v_MutexInternal_ReleaseWaiter(this);
}
int Lock(void);
int Unlock(void);
inline uint32 GetOwnerId(void) const { return m_nOwnerID; }
inline int GetDepth(void) const { return m_nDepth; }
@ -242,25 +284,65 @@ private:
HANDLE m_hSemaphore;
};
///////////////////////////////////////////////////////////////////////////////
template <class MUTEX_TYPE = CThreadFastMutex>
class CAutoLockT
{
public:
FORCEINLINE CAutoLockT(MUTEX_TYPE& lock)
: m_lock(lock)
{
m_lock.Lock();
}
FORCEINLINE CAutoLockT(const MUTEX_TYPE& lock)
: m_lock(const_cast<MUTEX_TYPE&>(lock))
{
m_lock.Lock();
}
FORCEINLINE ~CAutoLockT()
{
m_lock.Unlock();
}
private:
MUTEX_TYPE& m_lock;
// Disallow copying
CAutoLockT<MUTEX_TYPE>(const CAutoLockT<MUTEX_TYPE>&);
CAutoLockT<MUTEX_TYPE>& operator=(const CAutoLockT<MUTEX_TYPE>&);
};
///////////////////////////////////////////////////////////////////////////////
template <int size> struct CAutoLockTypeDeducer {};
template <> struct CAutoLockTypeDeducer<sizeof(CThreadFastMutex)> { typedef CThreadFastMutex Type_t; };
#define AUTO_LOCK_( type, mutex ) \
CAutoLockT< type > UNIQUE_ID( static_cast<const type &>( mutex ) )
#if defined(__clang__)
#define AUTO_LOCK(mutex) \
AUTO_LOCK_(typename CAutoLockTypeDeducer<sizeof(mutex)>::Type_t, mutex)
#else
#define AUTO_LOCK(mutex) \
AUTO_LOCK_(CAutoLockTypeDeducer<sizeof(mutex)>::Type_t, mutex)
#endif
#ifndef BUILDING_MATHLIB
///////////////////////////////////////////////////////////////////////////////
class VThreadTools : public IDetour
{
virtual void GetAdr(void) const
{
LogFunAdr("CThreadFastMutex::WaitForLock", p_MutexInternal_WaitForLock.GetPtr());
LogFunAdr("CThreadFastMutex::ReleaseWaiter", p_MutexInternal_ReleaseWaiter.GetPtr());
LogFunAdr("DeclareCurrentThreadIsMainThread", p_DeclareCurrentThreadIsMainThread.GetPtr());
LogVarAdr("g_ThreadMainThreadID", reinterpret_cast<uintptr_t>(g_ThreadMainThreadID));
LogVarAdr("g_ThreadServerFrameThreadID", reinterpret_cast<uintptr_t>(g_ThreadServerFrameThreadID));
}
virtual void GetFun(void) const
{
p_MutexInternal_WaitForLock = g_GameDll.FindPatternSIMD("48 89 5C 24 ?? 48 89 74 24 ?? 57 48 83 EC 20 48 8B D9 FF 15 ?? ?? ?? ??");
p_MutexInternal_ReleaseWaiter = g_GameDll.FindPatternSIMD("40 53 48 83 EC 20 8B 41 04 48 8B D9 83 E8 01");
p_DeclareCurrentThreadIsMainThread = g_GameDll.FindPatternSIMD("48 83 EC 28 FF 15 ?? ?? ?? ?? 89 05 ?? ?? ?? ?? 48 83 C4 28");
v_MutexInternal_WaitForLock = p_MutexInternal_WaitForLock.RCast<int (*)(CThreadFastMutex*)>(); /*48 89 5C 24 ?? 48 89 74 24 ?? 57 48 83 EC 20 48 8B D9 FF 15 ?? ?? ?? ??*/
v_MutexInternal_ReleaseWaiter = p_MutexInternal_ReleaseWaiter.RCast<int (*)(CThreadFastMutex*)>(); /*40 53 48 83 EC 20 8B 41 04 48 8B D9 83 E8 01*/
v_DeclareCurrentThreadIsMainThread = p_DeclareCurrentThreadIsMainThread.RCast<ThreadId_t(*)(void)>(); /*48 83 EC 28 FF 15 ?? ?? ?? ?? 89 05 ?? ?? ?? ?? 48 83 C4 28 */
}
virtual void GetVar(void) const
@ -275,5 +357,4 @@ class VThreadTools : public IDetour
///////////////////////////////////////////////////////////////////////////////
#endif // !BUILDING_MATHLIB
#endif // THREADTOOLS_H

View File

@ -8,55 +8,98 @@
#include "tier0/threadtools.h"
int32 ThreadInterlockedCompareExchange(LONG volatile* pDest, int32 value, int32 comperand)
#define INIT_SEM_COUNT 0
#define MAX_SEM_COUNT 1
int CThreadFastMutex::Lock(void)
{
return _InterlockedCompareExchange(pDest, comperand, value);
DWORD threadId = GetCurrentThreadId();
LONG result = ThreadInterlockedCompareExchange((volatile LONG*)&m_lAddend, 0, 1);
if (result)
{
if (m_nOwnerID == threadId)
{
result = m_nDepth + 1;
m_nDepth = result;
return result;
}
LONG cycle = 1;
LONG64 delay;
while (true)
{
delay = (10 * cycle);
if (delay)
{
do
{
ThreadPause();
--delay;
} while (delay);
}
result = ThreadInterlockedCompareExchange((volatile LONG*)&m_lAddend, 0, 1);
if (result)
break;
if (++cycle > 5)
{
if (_InterlockedIncrement((volatile LONG*)&m_lAddend) != 1)
{
if (!m_hSemaphore)
{
HANDLE hSemaphore = CreateSemaphoreA(
NULL, INIT_SEM_COUNT, MAX_SEM_COUNT, NULL);
if (ThreadInterlockedCompareExchange64(
(volatile LONG64*)&m_hSemaphore, NULL, (LONG64)hSemaphore))
CloseHandle(hSemaphore);
}
WaitForSingleObject(m_hSemaphore, INFINITE);
}
m_nOwnerID = threadId;
m_nDepth = 1;
return m_nDepth;
}
}
}
m_nDepth = 1;
m_nOwnerID = threadId;
return result;
}
bool ThreadInterlockedAssignIf(LONG volatile* p, int32 value, int32 comperand)
int CThreadFastMutex::Unlock()
{
Assert((size_t)p % 4 == 0);
return _InterlockedCompareExchange(p, comperand, value);
}
LONG result; // eax
HANDLE SemaphoreA; // rcx
int64 ThreadInterlockedCompareExchange64(int64 volatile* pDest, int64 value, int64 comperand)
{
return _InterlockedCompareExchange64(pDest, comperand, value);
}
result = m_nDepth - 1;
m_nDepth = result;
bool ThreadInterlockedAssignIf64(int64 volatile* pDest, int64 value, int64 comperand)
{
return _InterlockedCompareExchange64(pDest, comperand, value);
}
if (!result)
{
m_nOwnerID = 0;
result = _InterlockedExchangeAdd((volatile LONG*)&m_lAddend, 0xFFFFFFFF);
bool ThreadInMainThread()
{
return (ThreadGetCurrentId() == (*g_ThreadMainThreadID));
}
if (result != 1)
{
if (!m_hSemaphore)
{
SemaphoreA = CreateSemaphoreA(NULL, INIT_SEM_COUNT, MAX_SEM_COUNT, NULL);
bool ThreadInRenderThread()
{
return (ThreadGetCurrentId() == g_ThreadRenderThreadID);
if (ThreadInterlockedAssignIf64(
(volatile LONG64*)&m_hSemaphore, NULL, (LONG64)SemaphoreA))
CloseHandle(SemaphoreA);
}
return ReleaseSemaphore(m_hSemaphore, 1, NULL);
}
}
return result;
}
bool ThreadInServerFrameThread()
{
return (ThreadGetCurrentId() == (*g_ThreadServerFrameThreadID));
}
ThreadId_t ThreadGetCurrentId()
{
#ifdef _WIN32
return GetCurrentThreadId();
#elif defined( _PS3 )
sys_ppu_thread_t th = 0;
sys_ppu_thread_get_id(&th);
return th;
#elif defined(POSIX)
return (ThreadId_t)pthread_self();
#else
Assert(0);
DebuggerBreak();
return 0;
#endif
}