From 4bb3be2a8e5aa9d05bc4d601f9ffe955fbae1b1d Mon Sep 17 00:00:00 2001 From: Kawe Mazidjatari <48657826+Mauler125@users.noreply.github.com> Date: Mon, 21 Mar 2022 23:13:03 +0100 Subject: [PATCH] Implement CPU utils and FastTimer CPU system utilities and FastTimer (inline) --- r5dev/core/stdafx.h | 8 +- r5dev/tier0/cpu.cpp | 564 +++++++++++++++ r5dev/tier0/cpu.h | 21 + r5dev/tier0/cputopology.cpp | 997 ++++++++++++++++++++++++++ r5dev/tier0/cputopology.h | 38 + r5dev/tier0/fasttimer.cpp | 18 + r5dev/tier0/fasttimer.h | 549 ++++++++++++++ r5dev/tier0/platform.h | 76 ++ r5dev/vproj/dedicated.vcxproj | 7 + r5dev/vproj/dedicated.vcxproj.filters | 21 + r5dev/vproj/gamesdk.vcxproj | 7 + r5dev/vproj/gamesdk.vcxproj.filters | 21 + 12 files changed, 2324 insertions(+), 3 deletions(-) create mode 100644 r5dev/tier0/cpu.cpp create mode 100644 r5dev/tier0/cpu.h create mode 100644 r5dev/tier0/cputopology.cpp create mode 100644 r5dev/tier0/cputopology.h create mode 100644 r5dev/tier0/fasttimer.cpp create mode 100644 r5dev/tier0/fasttimer.h create mode 100644 r5dev/tier0/platform.h diff --git a/r5dev/core/stdafx.h b/r5dev/core/stdafx.h index 7b34fb74..32a6a2b7 100644 --- a/r5dev/core/stdafx.h +++ b/r5dev/core/stdafx.h @@ -5,15 +5,16 @@ #include #include #include - +#include #include #include #include #include +#include #include #include -#include #include +#include #include #include #include @@ -59,8 +60,9 @@ #include "core/assert.h" #include "core/termutil.h" -#include "common/pseudodefs.h" #include "tier0/basetypes.h" +#include "tier0/platform.h" +#include "common/pseudodefs.h" #if !defined(SDKLAUNCHER) && !defined (NETCONSOLE) namespace diff --git a/r5dev/tier0/cpu.cpp b/r5dev/tier0/cpu.cpp new file mode 100644 index 00000000..4f5a7f64 --- /dev/null +++ b/r5dev/tier0/cpu.cpp @@ -0,0 +1,564 @@ +//=============================================================================// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// +#include "core/stdafx.h" +#include "tier0/cpu.h" +#include "tier0/cputopology.h" +#include "tier0/fasttimer.h" + +/*******************************************************************************/ +static CPUInformation s_cpuInformation; +static char s_CpuVendorID[13] = "unknown"; +bool s_bCpuBrandInitialized = false; +bool s_bCpuVendorIdInitialized = false; + +/*******************************************************************************/ +struct CpuIdResult_t +{ + unsigned long eax; + unsigned long ebx; + unsigned long ecx; + unsigned long edx; + + void Reset(void) + { + eax = ebx = ecx = edx = 0; + } +}; + +struct IntelCacheDesc_t +{ + uint8_t nDesc; + uint16_t nCacheSize; +}; + +/*******************************************************************************/ +union CpuBrand_t +{ + CpuIdResult_t cpuid[3]; + char name[49]; +}; +CpuBrand_t s_CpuBrand; + +/*******************************************************************************/ +inline static IntelCacheDesc_t s_IntelL1DataCacheDesc[] = { + { 0xA, 8 }, + { 0xC, 16 }, + { 0xD, 16 }, + { 0x2C, 32 }, + { 0x30, 32 }, + { 0x60, 16 }, + { 0x66, 8 }, + { 0x67, 16 }, + { 0x68, 32 } +}; + + +inline static IntelCacheDesc_t s_IntelL2DataCacheDesc[] = +{ + { 0x21, 256 }, + { 0x39, 128 }, + { 0x3a, 192 }, + { 0x3b, 128 }, + { 0x3c, 256 }, + { 0x3D, 384 }, + { 0x3E, 512 }, + { 0x41, 128 }, + { 0x42, 256 }, + { 0x43, 512 }, + { 0x44, 1024 }, + { 0x45, 2048 }, + { 0x48, 3 * 1024 }, + { 0x4e, 6 * 1024 }, + { 0x78, 1024 }, + { 0x79, 128 }, + { 0x7a, 256 }, + { 0x7b, 512 }, + { 0x7c, 1024 }, + { 0x7d, 2048 }, + { 0x7f, 512 }, + { 0x82, 256 }, + { 0x83, 512 }, + { 0x84, 1024 }, + { 0x85, 2048 }, + { 0x86, 512 }, + { 0x87, 1024 } +}; + + +inline static IntelCacheDesc_t s_IntelL3DataCacheDesc[] = { + { 0x22, 512 }, + { 0x23, 1024 }, + { 0x25, 2 * 1024 }, + { 0x29, 4 * 1024 }, + { 0x46, 4 * 1024 }, + { 0x47, 8 * 1024 }, + // { 49, + { 0x4a, 6 * 1024 }, + { 0x4b, 8 * 1024 }, + { 0x4c, 12 * 1024 }, + { 0x4d, 16 * 1014 }, + { 0xD0, 512 }, + { 0xD1, 1024 }, + { 0xD2, 2048 }, + { 0xD6, 1024 }, + { 0xD7, 2048 }, + { 0xD8, 4096 }, + { 0xDC, 1536 }, + { 0xDD, 3 * 1024 }, + { 0xDE, 6 * 1024 }, + { 0xE2, 2048 }, + { 0xE3, 4096 }, + { 0xE4, 8 * 1024 }, + { 0xEA, 12 * 1024 }, + { 0xEB, 18 * 1024 }, + { 0xEC, 24 * 1024 } +}; + +/*******************************************************************************/ +static bool cpuid(unsigned long function, CpuIdResult_t& out) +{ + int pCPUInfo[4]; + __cpuid(pCPUInfo, (int)function); + out.eax = pCPUInfo[0]; + out.ebx = pCPUInfo[1]; + out.ecx = pCPUInfo[2]; + out.edx = pCPUInfo[3]; + return true; +} + + +static bool cpuidex(unsigned long function, unsigned long subfunction, CpuIdResult_t& out) +{ + int pCPUInfo[4]; + __cpuidex(pCPUInfo, (int)function, (int)subfunction); + out.eax = pCPUInfo[0]; + out.ebx = pCPUInfo[1]; + out.ecx = pCPUInfo[2]; + out.edx = pCPUInfo[3]; + return false; +} + + +static CpuIdResult_t cpuid(unsigned long function) +{ + CpuIdResult_t out; + if (!cpuid(function, out)) + { + out.Reset(); + } + return out; +} + +static CpuIdResult_t cpuidex(unsigned long function, unsigned long subfunction) +{ + CpuIdResult_t out; + if (!cpuidex(function, subfunction, out)) + { + out.Reset(); + } + return out; +} + +/*******************************************************************************/ +static bool CheckSSETechnology(void) +{ + return (cpuid(1).edx & 0x2000000L) != 0; +} + +static bool CheckSSE2Technology(void) +{ + return (cpuid(1).edx & 0x04000000) != 0; +} + +bool CheckSSE3Technology(void) +{ + return (cpuid(1).ecx & 0x00000001) != 0; // bit 1 of ECX. +} + +bool CheckSSSE3Technology(void) +{ + // SSSE 3 is implemented by both Intel and AMD. + // Detection is done the same way for both vendors. + return (cpuid(1).ecx & (1 << 9)) != 0; // bit 9 of ECX. +} + +bool CheckSSE41Technology(void) +{ + // SSE 4.1 is implemented by both Intel and AMD. + // Detection is done the same way for both vendors. + + return (cpuid(1).ecx & (1 << 19)) != 0; // bit 19 of ECX. +} + +bool CheckSSE42Technology(void) +{ + // SSE4.2 is an Intel-only feature. + + const char* pchVendor = GetProcessorVendorId(); + if (0 != _stricmp(pchVendor, "GenuineIntel")) + { + return false; + } + + return (cpuid(1).ecx & (1 << 20)) != 0; // bit 20 of ECX. +} + + +bool CheckSSE4aTechnology(void) +{ + // SSE 4a is an AMD-only feature. + + const char* pchVendor = GetProcessorVendorId(); + if (0 != _stricmp(pchVendor, "AuthenticAMD")) + { + return false; + } + + return (cpuid(1).ecx & (1 << 6)) != 0; // bit 6 of ECX. +} + + +static bool Check3DNowTechnology(void) +{ + if (cpuid(0x80000000).eax > 0x80000000L) + { + return (cpuid(0x80000001).eax & (1 << 31)) != 0; + } + return false; +} + +static bool CheckCMOVTechnology(void) +{ + return (cpuid(1).edx & (1 << 15)) != 0; +} + +static bool CheckFCMOVTechnology(void) +{ + return (cpuid(1).edx & (1 << 16)) != 0; +} + +static bool CheckRDTSCTechnology(void) +{ + return (cpuid(1).edx & 0x10) != 0; +} + +// Return the Processor's vendor identification string, or "Generic_x86" if it doesn't exist on this CPU. +const char* GetProcessorVendorId(void) +{ + if (s_bCpuVendorIdInitialized) + { + return s_CpuVendorID; + } + + s_bCpuVendorIdInitialized = true; + + CpuIdResult_t cpuid0 = cpuid(0); + + memset(s_CpuVendorID, 0, sizeof(s_CpuVendorID)); + + if (!cpuid0.eax) + { + strcpy(s_CpuVendorID, ("Generic_x86")); + } + else + { + memcpy(s_CpuVendorID + 0, &(cpuid0.ebx), sizeof(cpuid0.ebx)); + memcpy(s_CpuVendorID + 4, &(cpuid0.edx), sizeof(cpuid0.edx)); + memcpy(s_CpuVendorID + 8, &(cpuid0.ecx), sizeof(cpuid0.ecx)); + } + + return s_CpuVendorID; +} + +const char* GetProcessorBrand(void) +{ + if (s_bCpuBrandInitialized) + { + return s_CpuBrand.name; + } + s_bCpuBrandInitialized = true; + + memset(&s_CpuBrand, 0, sizeof(s_CpuBrand)); + + const char* pchVendor = GetProcessorVendorId(); + if (0 == _stricmp(pchVendor, "GenuineIntel")) + { + // Intel brand string. + if (cpuid(0x80000000).eax >= 0x80000004) + { + s_CpuBrand.cpuid[0] = cpuid(0x80000002); + s_CpuBrand.cpuid[1] = cpuid(0x80000003); + s_CpuBrand.cpuid[2] = cpuid(0x80000004); + } + } + return s_CpuBrand.name; +} + +/*******************************************************************************/ +// Returns non-zero if Hyper-Threading Technology is supported on the processors and zero if not. +// If it's supported, it does not mean that it's been enabled. So we test another flag to see if it's enabled +// See Intel Processor Identification and the CPUID instruction Application Note 485. +// http://www.intel.com/Assets/PDF/appnote/241618.pdf +static bool HTSupported(void) +{ + enum { + HT_BIT = 0x10000000,// EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware. + FAMILY_ID = 0x0f00, // EAX[11:8] - Bit 11 thru 8 contains family processor id. + EXT_FAMILY_ID = 0x0f00000, // EAX[23:20] - Bit 23 thru 20 contains extended family processor id. + FAMILY_ID_386 = 0x0300, + FAMILY_ID_486 = 0x0400, // EAX[8:12] - 486, 487 and overdrive. + FAMILY_ID_PENTIUM = 0x0500, // Pentium, Pentium OverDrive 60 - 200. + FAMILY_ID_PENTIUM_PRO = 0x0600, // P Pro, P II, P III, P M, Celeron M, Core Duo, Core Solo, Core2 Duo, Core2 Extreme, P D, Xeon model F, + // also 45-nm : Intel Atom, Core i7, Xeon MP ; see Intel Processor Identification and the CPUID instruction pg 20,21. + FAMILY_ID_EXTENDED = 0x0F00 // P IV, Xeon, Celeron D, P D, . + }; + + // This works on both newer AMD and Intel CPUs. + CpuIdResult_t cpuid1 = cpuid(1); + + // Previously, we detected P4 specifically; now, we detect GenuineIntel with HT enabled in general. + // if (((cpuid1.eax & FAMILY_ID) == FAMILY_ID_EXTENDED) || (cpuid1.eax & EXT_FAMILY_ID)) + + // Check to see if this is an Intel Processor with HT or CMT capability , and if HT/CMT is enabled. + // ddk: This codef is actually correct: see example code at http://software.intel.com/en-us/articles/multi-core-detect/ + return (cpuid1.edx & HT_BIT) != 0 && // Genuine Intel Processor with Hyper-Threading Technology implemented. + ((cpuid1.ebx >> 16) & 0xFF) > 1; // Hyper-Threading OR Core Multi-Processing has been enabled. +} + +// Returns the number of logical processors per physical processors. +static uint8_t LogicalProcessorsPerPackage(void) +{ + // EBX[23:16] indicate number of logical processors per package. + const unsigned NUM_LOGICAL_BITS = 0x00FF0000; + + if (!HTSupported()) + { + return 1; + } + + return (uint8_t)((cpuid(1).ebx & NUM_LOGICAL_BITS) >> 16); +} + +// Measure the processor clock speed by sampling the cycle count, waiting +// for some fraction of a second, then measuring the elapsed number of cycles. +static int64_t CalculateClockSpeed(void) +{ + LARGE_INTEGER waitTime, startCount, curCount; + CCycleCount start, end; + + // Take 1/32 of a second for the measurement. + QueryPerformanceFrequency(&waitTime); + int scale = 5; + waitTime.QuadPart >>= scale; + + QueryPerformanceCounter(&startCount); + start.Sample(); + do + { + QueryPerformanceCounter(&curCount); + } while (curCount.QuadPart - startCount.QuadPart < waitTime.QuadPart); + end.Sample(); + + return (end.GetLongCycles() - start.GetLongCycles()) << scale; +} + +static void FindIntelCacheDesc(uint8_t nDesc, const IntelCacheDesc_t* pDesc, int nDescCount, uint32_t& nCache, uint32_t& nCacheDesc) +{ + for (int i = 0; i < nDescCount; ++i) + { + if (pDesc->nDesc == nDesc) + { + nCache = pDesc->nCacheSize; + nCacheDesc = nDesc; + break; + } + } +} + +// See "Output of the CPUID instruction" from Intel, page 26. +static void InterpretIntelCacheDescriptors(uint32_t nPackedDesc) +{ + if (nPackedDesc & 0x80000000) + { + return; // This is a wrong descriptor. + } + for (int i = 0; i < 4; ++i) + { + FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL1DataCacheDesc, ARRAYSIZE(s_IntelL1DataCacheDesc), s_cpuInformation.m_nL1CacheSizeKb, s_cpuInformation.m_nL1CacheDesc); + FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL2DataCacheDesc, ARRAYSIZE(s_IntelL2DataCacheDesc), s_cpuInformation.m_nL2CacheSizeKb, s_cpuInformation.m_nL2CacheDesc); + FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL3DataCacheDesc, ARRAYSIZE(s_IntelL3DataCacheDesc), s_cpuInformation.m_nL3CacheSizeKb, s_cpuInformation.m_nL3CacheDesc); + nPackedDesc >>= 8; + } +} + + +const CPUInformation& GetCPUInformation(void) +{ + CPUInformation& pi = s_cpuInformation; + // Has the structure already been initialized and filled out? + if (pi.m_Size == sizeof(pi)) + { + return pi; + } + + // Redundant, but just in case the user somehow messes with the size. + memset(&pi, 0x0, sizeof(pi)); + + // Fill out the structure, and return it: + pi.m_Size = sizeof(pi); + + // Grab the processor frequency: + pi.m_Speed = CalculateClockSpeed(); + + // Get the logical and physical processor counts: + pi.m_nLogicalProcessors = LogicalProcessorsPerPackage(); + + bool bAuthenticAMD = (0 == _stricmp(GetProcessorVendorId(), "AuthenticAMD")); + bool bGenuineIntel = !bAuthenticAMD && (0 == _stricmp(GetProcessorVendorId(), "GenuineIntel")); + + SYSTEM_INFO si; + ZeroMemory(&si, sizeof(si)); + + GetSystemInfo(&si); + + // Fixing: si.dwNumberOfProcessors is the number of logical processors according to experiments on i7, P4 and a DirectX sample (Aug'09). + // This is contrary to MSDN documentation on GetSystemInfo(). + pi.m_nLogicalProcessors = si.dwNumberOfProcessors; + + if (bAuthenticAMD) + { + // Quick fix for AMD Phenom: it reports 3 logical cores and 4 physical cores; + // No AMD CPUs by the end of 2009 have HT, so we'll override HT detection here. + pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors; + } + else + { + CpuTopology topo; + pi.m_nPhysicalProcessors = topo.NumberOfSystemCores(); + } + + // Make sure I always report at least one, when running WinXP with the /ONECPU switch, + // it likes to report 0 processors for some reason. + if (pi.m_nPhysicalProcessors == 0 && pi.m_nLogicalProcessors == 0) + { + Assert(!"Missing CPU detection code for this processor."); + pi.m_nPhysicalProcessors = 1; + pi.m_nLogicalProcessors = 1; + } + + CpuIdResult_t cpuid0 = cpuid(0); + if (cpuid0.eax >= 1) + { + CpuIdResult_t cpuid1 = cpuid(1); + uint32_t bFPU = cpuid1.edx & 1; // This should always be on on anything we support. + // Determine Processor Features: + pi.m_bRDTSC = (cpuid1.edx >> 4) & 1; + pi.m_bCMOV = (cpuid1.edx >> 15) & 1; + pi.m_bFCMOV = (pi.m_bCMOV && bFPU) ? 1 : 0; + pi.m_bMMX = (cpuid1.edx >> 23) & 1; + pi.m_bSSE = (cpuid1.edx >> 25) & 1; + pi.m_bSSE2 = (cpuid1.edx >> 26) & 1; + pi.m_bSSE3 = cpuid1.ecx & 1; + pi.m_bSSSE3 = (cpuid1.ecx >> 9) & 1;; + pi.m_bSSE4a = CheckSSE4aTechnology(); + pi.m_bSSE41 = (cpuid1.ecx >> 19) & 1; + pi.m_bSSE42 = (cpuid1.ecx >> 20) & 1; + pi.m_b3DNow = Check3DNowTechnology(); + pi.m_bAVX = (cpuid1.ecx >> 28) & 1; + pi.m_szProcessorID = (char*)GetProcessorVendorId(); + pi.m_szProcessorBrand = (char*)GetProcessorBrand(); + pi.m_bHT = (pi.m_nPhysicalProcessors < pi.m_nLogicalProcessors); //HTSupported(); + + pi.m_nModel = cpuid1.eax; // Full CPU model info. + pi.m_nFeatures[0] = cpuid1.edx; // x87+ features. + pi.m_nFeatures[1] = cpuid1.ecx; // sse3+ features. + pi.m_nFeatures[2] = cpuid1.ebx; // Some additional features. + + if (bGenuineIntel) + { + if (cpuid0.eax >= 4) + { + // We have CPUID.4, use it to find all the cache parameters. + const uint32_t nCachesToQuery = 4; // Level 0 is not used. + uint32_t nCacheSizeKiB[nCachesToQuery]; + for (uint32_t i = 0; i < nCachesToQuery; ++i) + { + nCacheSizeKiB[i] = 0; + } + for (unsigned long nSub = 0; nSub < 1024; ++nSub) + { + CpuIdResult_t cpuid4 = cpuidex(4, nSub); + uint32_t nCacheType = cpuid4.eax & 0x1F; + if (nCacheType == 0) + { + // No more caches. + break; + } + if (nCacheType & 1) + { + // This cache includes data cache: it's either data or unified. Instuction cache type is 2. + uint32_t nCacheLevel = (cpuid4.eax >> 5) & 7; + if (nCacheLevel < nCachesToQuery) + { + uint32_t nCacheWays = 1 + ((cpuid4.ebx >> 22) & 0x3F); + uint32_t nCachePartitions = 1 + ((cpuid4.ebx >> 12) & 0x3F); + uint32_t nCacheLineSize = 1 + (cpuid4.ebx & 0xFF); + uint32_t nCacheSets = 1 + cpuid4.ecx; + uint32_t nCacheSizeBytes = nCacheWays * nCachePartitions * nCacheLineSize * nCacheSets; + nCacheSizeKiB[nCacheLevel] = nCacheSizeBytes >> 10; + } + } + } + + pi.m_nL1CacheSizeKb = nCacheSizeKiB[1]; + pi.m_nL2CacheSizeKb = nCacheSizeKiB[2]; + pi.m_nL3CacheSizeKb = nCacheSizeKiB[3]; + } + else if (cpuid0.eax >= 2) + { + // Get the cache. + CpuIdResult_t cpuid2 = cpuid(2); + for (int i = (cpuid2.eax & 0xFF); i-- > 0; ) + { + InterpretIntelCacheDescriptors(cpuid2.eax & ~0xFF); + InterpretIntelCacheDescriptors(cpuid2.ebx); + InterpretIntelCacheDescriptors(cpuid2.ecx); + InterpretIntelCacheDescriptors(cpuid2.edx); + cpuid2 = cpuid(2); // Read the next. + } + } + } + } + + CpuIdResult_t cpuid0ex = cpuid(0x80000000); + if (bAuthenticAMD) + { + if (cpuid0ex.eax >= 0x80000005) + { + CpuIdResult_t cpuid5ex = cpuid(0x80000005); + pi.m_nL1CacheSizeKb = cpuid5ex.ecx >> 24; + pi.m_nL1CacheDesc = cpuid5ex.ecx & 0xFFFFFF; + } + if (cpuid0ex.eax >= 0x80000006) + { + CpuIdResult_t cpuid6ex = cpuid(0x80000006); + pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16; + pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF; + pi.m_nL3CacheSizeKb = (cpuid6ex.edx >> 18) * 512; + pi.m_nL3CacheDesc = cpuid6ex.edx & 0xFFFF; + } + } + else if (bGenuineIntel) + { + if (cpuid0ex.eax >= 0x80000006) + { + // Make sure we got the L2 cache info right. + pi.m_nL2CacheSizeKb = (cpuid(0x80000006).ecx >> 16); + } + } + return pi; +} diff --git a/r5dev/tier0/cpu.h b/r5dev/tier0/cpu.h new file mode 100644 index 00000000..0543d66c --- /dev/null +++ b/r5dev/tier0/cpu.h @@ -0,0 +1,21 @@ +//=============================================================================// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// +#ifndef CPU_H +#define CPU_H + +bool CheckSSE3Technology(void); +bool CheckSSSE3Technology(void); +bool CheckSSE41Technology(void); +bool CheckSSE42Technology(void); +bool CheckSSE4aTechnology(void); + +const char* GetProcessorVendorId(void); +const char* GetProcessorBrand(void); + +const CPUInformation& GetCPUInformation(void); + +#endif // CPU_H diff --git a/r5dev/tier0/cputopology.cpp b/r5dev/tier0/cputopology.cpp new file mode 100644 index 00000000..89f4c059 --- /dev/null +++ b/r5dev/tier0/cputopology.cpp @@ -0,0 +1,997 @@ +//------------------------------------------------------------------------------------- +// CpuTopology.cpp +// +// CpuToplogy class implementation. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------------------------------- +#include "core/stdafx.h" + +#if defined(_WIN32) && !defined(_X360) && !defined(_PS3) +#include "tier0/cputopology.h" + +//--------------------------------------------------------------------------------- +// Name: ICpuToplogy +// Desc: Specifies the interface that each class that provides an implementation +// for extracting cpu topology must conform to. This is the Implementor +// class in the traditional Bridge Pattern. +//--------------------------------------------------------------------------------- +class ICpuTopology +{ +public: + virtual ~ICpuTopology() + { + } + virtual BOOL IsDefaultImpl() const = 0; + virtual DWORD NumberOfProcessCores() const = 0; + virtual DWORD NumberOfSystemCores() const = 0; + virtual DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const = 0; +}; + + +namespace +{ +/////////////////////////////////////////////////////////////////////////////////// +// Local Class Definitions +/////////////////////////////////////////////////////////////////////////////////// + +//--------------------------------------------------------------------------------- +// Name: DefaultImpl +// Desc: Provides a default implementation for the ICpuTopology interface when +// GetLogicalProcessorInformation and CPUID are not supported for whatever +// reason. This is a ConcreteImplementor class in the traditional Bridge +// Pattern. +//--------------------------------------------------------------------------------- +class DefaultImpl : public ICpuTopology +{ +public: + //----------------------------------------------------------------------------- + // DefaultImpl::IsDefaultImpl + //----------------------------------------------------------------------------- + /*virtual*/ BOOL IsDefaultImpl() const + { + return TRUE; + } + + //----------------------------------------------------------------------------- + // DefaultImpl::NumberOfProcessCores + //----------------------------------------------------------------------------- + /*virtual*/ DWORD NumberOfProcessCores() const + { + return 1; + } + + //----------------------------------------------------------------------------- + // DefaultImpl::IsNumberOfSystemCores + //----------------------------------------------------------------------------- + /*virtual*/ DWORD NumberOfSystemCores() const + { + return 1; + } + + //----------------------------------------------------------------------------- + // DefaultImpl::CoreAffinityMask + //----------------------------------------------------------------------------- + /*virtual*/ DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const + { + DWORD_PTR coreAffinity = 0; + if( 1 == coreIdx ) + { + DWORD_PTR dwSystemAffinity; + GetProcessAffinityMask( GetCurrentProcess(), &coreAffinity, &dwSystemAffinity ); + } + return coreAffinity; + } +}; + +//--------------------------------------------------------------------------------- +// Name: GlpiImpl +// Desc: Provides the GetLogicalProcessorInformation implementation for the +// ICpuTopology interface. This is a ConcreteImplementor class in the +// traditional Bridge Pattern. +//--------------------------------------------------------------------------------- +class GlpiImpl : public ICpuTopology +{ +public: + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::GlpiImpl + // Desc: Initializes the internal structures/data with information retrieved + // from a call to GetLogicalProcessorInformation. + //----------------------------------------------------------------------------- + GlpiImpl() : m_pSlpi( NULL ), + m_nItems( 0 ) + { + _ASSERT( IsSupported() ); + + GlpiFnPtr pGlpi = GetGlpiFn_(); + _ASSERT( pGlpi ); + + DWORD cbBuffer = 0; + pGlpi( 0, &cbBuffer ); + + m_pSlpi = ( SYSTEM_LOGICAL_PROCESSOR_INFORMATION* )malloc( cbBuffer ); + pGlpi( m_pSlpi, &cbBuffer ); + m_nItems = cbBuffer / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ); + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::~GlpiImpl + //----------------------------------------------------------------------------- + /*virtual*/ ~GlpiImpl() + { + free( m_pSlpi ); + m_pSlpi = 0; + m_nItems = 0; + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::IsDefaultImpl + //----------------------------------------------------------------------------- + /*virtual*/ BOOL IsDefaultImpl() const + { + return FALSE; + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::NumberOfProcessCores + // Desc: Gets the total number of physical processor cores available to the + // current process. + //----------------------------------------------------------------------------- + /*virtual*/ DWORD NumberOfProcessCores() const + { + DWORD_PTR dwProcessAffinity, dwSystemAffinity; + GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity ); + + DWORD nCores = 0; + for( DWORD i = 0; i < m_nItems; ++i ) + { + if( ( RelationProcessorCore == m_pSlpi[i].Relationship ) && + ( m_pSlpi[i].ProcessorMask & dwProcessAffinity ) ) + { + ++nCores; + } + } + return nCores; + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::NumberOfSystemCores + // Desc: Gets the total number of physical processor cores enabled on the + // system. + //----------------------------------------------------------------------------- + /*virtual*/ DWORD NumberOfSystemCores() const + { + DWORD nCores = 0; + for( DWORD i = 0; i < m_nItems; ++i ) + { + if( RelationProcessorCore == m_pSlpi[i].Relationship ) + ++nCores; + } + return nCores; + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::CoreAffinityMask + // Desc: Gets an affinity mask that corresponds to the requested processor + // core. + //----------------------------------------------------------------------------- + /*virtual*/ DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const + { + DWORD_PTR dwProcessAffinity, dwSystemAffinity; + GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity ); + + for( DWORD i = 0; i < m_nItems; ++i ) + { + if( RelationProcessorCore == m_pSlpi[i].Relationship ) + { + if( !coreIdx-- ) + { + return m_pSlpi[i].ProcessorMask & dwProcessAffinity; + } + } + } + return 0; + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::IsSupported + //----------------------------------------------------------------------------- + static BOOL IsSupported() + { + return NULL != GetGlpiFn_(); + } + +private: + // GetLogicalProcessorInformation function pointer + typedef BOOL( WINAPI* GlpiFnPtr )( +SYSTEM_LOGICAL_PROCESSOR_INFORMATION*, +PDWORD +); + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::VerifyGlpiFn_ + // Desc: Gets a pointer to the GetLogicalProcessorInformation function only if + // it is supported on the current platform. + // GetLogicalProcessorInformation is supported on Windows Server 2003 and + // XP64, however there is a bug with the implementation. Therefore, only + // GetLogicalProcessorInformation on Windows Vista is supported in this + // sample. + //----------------------------------------------------------------------------- + static GlpiFnPtr VerifyGlpiFn_() + { + // VerifyVersionInfo function pointer + typedef BOOL ( WINAPI* VviFnPtr )( LPOSVERSIONINFOEX, + DWORD, + DWORDLONG ); + + HMODULE hMod = GetModuleHandle( TEXT( "kernel32" ) ); +#ifdef _UNICODE + VviFnPtr pVvi = (VviFnPtr) GetProcAddress( hMod, "VerifyVersionInfoW" ); + #else + VviFnPtr pVvi = ( VviFnPtr )GetProcAddress( hMod, "VerifyVersionInfoA" ); +#endif + GlpiFnPtr pGlpi = NULL; + + if( pVvi ) + { + // VerSetConditionMask function pointer + typedef ULONGLONG ( WINAPI* VscmFnPtr )( ULONGLONG, + DWORD, + BYTE ); + + VscmFnPtr pVscm = ( VscmFnPtr )GetProcAddress( hMod, "VerSetConditionMask" ); + + _ASSERT( pVscm ); + + // Check for Windows Vista + OSVERSIONINFOEX osvi = { sizeof( OSVERSIONINFOEX ) }; + osvi.dwMajorVersion = 6; + osvi.dwMinorVersion = 0; + osvi.wServicePackMajor = 0; + osvi.wServicePackMinor = 0; + + ULONGLONG dwlMask = 0; + dwlMask = pVscm( dwlMask, VER_MAJORVERSION, VER_GREATER_EQUAL ); + dwlMask = pVscm( dwlMask, VER_MINORVERSION, VER_GREATER_EQUAL ); + dwlMask = pVscm( dwlMask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL ); + dwlMask = pVscm( dwlMask, VER_SERVICEPACKMINOR, VER_GREATER_EQUAL ); + + if( pVvi( &osvi, VER_MAJORVERSION + | VER_MINORVERSION + | VER_SERVICEPACKMAJOR + | VER_SERVICEPACKMINOR, + dwlMask ) ) + { + pGlpi = ( GlpiFnPtr )GetProcAddress( hMod, "GetLogicalProcessorInformation" ); + _ASSERT( pGlpi ); + } + } + + return pGlpi; + + } + + //----------------------------------------------------------------------------- + // Name: GlpiImpl::GetGlpiFn_ + // Desc: Gets a cached pointer to the GetLogicalProcessorInformation function. + //----------------------------------------------------------------------------- + static GlpiFnPtr GetGlpiFn_() + { + static GlpiFnPtr pGlpi = VerifyGlpiFn_(); + return pGlpi; + } + + // Private Members + SYSTEM_LOGICAL_PROCESSOR_INFORMATION* m_pSlpi; + DWORD m_nItems; +}; + +//--------------------------------------------------------------------------------- +// Name: ApicExtractor +// Desc: A utility class that provides an interface for decoding a processor +// APIC ID. An APIC ID is an 8-bit identifier given to each logical +// processor on system boot and can be retrieved by the CPUID instruction. +// Each APIC ID is composed of a PACKAGE_ID, CORE_ID and SMT_ID that describe +// the relationship of a logical processor within the processor topology of +// the system. +//--------------------------------------------------------------------------------- +class ApicExtractor +{ +public: + //----------------------------------------------------------------------------- + // Name: ApicExtractor::ApicExtractor + //----------------------------------------------------------------------------- + ApicExtractor( DWORD nLogProcsPerPkg = 1, DWORD nCoresPerPkg = 1 ) + { + SetPackageTopology( nLogProcsPerPkg, nCoresPerPkg ); + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::SmtId + //----------------------------------------------------------------------------- + BYTE SmtId( BYTE apicId ) const + { + return apicId & m_smtIdMask.mask; + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::CoreId + //----------------------------------------------------------------------------- + BYTE CoreId( BYTE apicId ) const + { + return ( apicId & m_coreIdMask.mask ) >> m_smtIdMask.width; + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::PackageId + //----------------------------------------------------------------------------- + BYTE PackageId( BYTE apicId ) const + { + return ( apicId & m_pkgIdMask.mask ) >> + ( m_smtIdMask.width + m_coreIdMask.width ); + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::PackageCoreId + //----------------------------------------------------------------------------- + BYTE PackageCoreId( BYTE apicId ) const + { + return ( apicId & ( m_pkgIdMask.mask | m_coreIdMask.mask ) ) >> + m_smtIdMask.width; + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::GetLogProcsPerPkg + //----------------------------------------------------------------------------- + DWORD GetLogProcsPerPkg() const + { + return m_nLogProcsPerPkg; + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::GetCoresPerPkg + //----------------------------------------------------------------------------- + DWORD GetCoresPerPkg() const + { + return m_nCoresPerPkg; + } + + //----------------------------------------------------------------------------- + // Name: ApicExtractor::SetPackageTopology + // Desc: You should call SetPackageTopology with the number of logical + // processors per package and number of cores per package before calling + // the sub id accessors (SmtId(), CoreId(), PackageId(), PackageCoreId()) + // as this information is required to effectively decode an APIC ID into + // its sub parts. + //----------------------------------------------------------------------------- + void SetPackageTopology( DWORD nLogProcsPerPkg, DWORD nCoresPerPkg ) + { + m_nLogProcsPerPkg = ( BYTE )nLogProcsPerPkg; + m_nCoresPerPkg = ( BYTE )nCoresPerPkg; + + // fix for Phenom x3 and similar CPUs - it reports 3 logical processors per package, and 4 cores per package + // so one core is probably just disabled for yield, but it causes a bug in GetMaskWidth that propagates + if( m_nCoresPerPkg > m_nLogProcsPerPkg ) + { + m_nCoresPerPkg = m_nLogProcsPerPkg; + } + + m_smtIdMask.width = GetMaskWidth_( m_nLogProcsPerPkg / m_nCoresPerPkg ); + m_coreIdMask.width = GetMaskWidth_( m_nCoresPerPkg ); + m_pkgIdMask.width = 8 - ( m_smtIdMask.width + m_coreIdMask.width ); + + m_pkgIdMask.mask = ( BYTE )( 0xFF << ( m_smtIdMask.width + m_coreIdMask.width ) ); + m_coreIdMask.mask = ( BYTE )( ( 0xFF << m_smtIdMask.width ) ^ m_pkgIdMask.mask ); + m_smtIdMask.mask = ( BYTE )~( 0xFF << m_smtIdMask.width ); + + } + +private: + //----------------------------------------------------------------------------- + // Name: ApicExtractor::GetMaskWidth_ + // Desc: Gets the width of a sub id bit field in an APIC ID. The width of a + // sub id (CORE_ID, SMT_ID) is only wide enough to support the maximum + // number of ids that needs to be represented in the topology. + //----------------------------------------------------------------------------- + static BYTE GetMaskWidth_( BYTE maxIds ) + { + --maxIds; + + // find index of msb + BYTE msbIdx = 8; + BYTE msbMask = 0x80; + while( msbMask && !( msbMask & maxIds ) ) + { + --msbIdx; + msbMask >>= 1; + } + return msbIdx; + } + + struct IdMask + { + BYTE width; + BYTE mask; + }; + + // Private Members + BYTE m_nLogProcsPerPkg; + BYTE m_nCoresPerPkg; + IdMask m_smtIdMask; + IdMask m_coreIdMask; + IdMask m_pkgIdMask; +}; + +//--------------------------------------------------------------------------------- +// Name: Cpuid +// Desc: A utility class that wraps the functionality of the CPUID instruction. +// Call the Call() method with the desired CPUID function, and use the +// register accessors to retrieve the register values. +//--------------------------------------------------------------------------------- +class Cpuid +{ +public: + // FnSet values are used to indicate a CPUID function set. + enum FnSet + { + Std = 0x00000000, + Ext = 0x80000000 + }; + + //----------------------------------------------------------------------------- + // Name: Cpuid::Cpuid + //----------------------------------------------------------------------------- + Cpuid() : m_eax( 0 ), + m_ebx( 0 ), + m_ecx( 0 ), + m_edx( 0 ) + { + } + + // Register accessors + DWORD Eax() const + { + return m_eax; + } + DWORD Ebx() const + { + return m_ebx; + } + DWORD Ecx() const + { + return m_ecx; + } + DWORD Edx() const + { + return m_edx; + } + + //----------------------------------------------------------------------------- + // Name: Cpuid::Call + // Desc: Calls the CPUID instruction with the specified function. Returns TRUE + // if the CPUID function was supported, FALSE if it wasn't. + //----------------------------------------------------------------------------- + BOOL Call( FnSet fnSet, DWORD fn ) + { + if( IsFnSupported( fnSet, fn ) ) + { + UncheckedCall_( fnSet, fn ); + return true; + } + return false; + } + + //----------------------------------------------------------------------------- + // Name: Cpuid::IsVendor + // Desc: Compares a string with the vendor string encoded in the CPUID + // instruction. + //----------------------------------------------------------------------------- + static BOOL IsVendor( const char* strVendor ) + { + // Cache the vendor string + static const Cpuid cpu( Std ); + return cpu.Ebx() == *reinterpret_cast( strVendor ) + && cpu.Ecx() == *reinterpret_cast( strVendor + 8 ) + && cpu.Edx() == *reinterpret_cast( strVendor + 4 ); + } + + //----------------------------------------------------------------------------- + // Name: Cpuid::IsFnSupported + // Desc: Checks to see if a CPUID function is supported. Different processors + // support different functions. This method is automatically called from + // the Call() method, so you don't need to call it beforehand. + //----------------------------------------------------------------------------- + static BOOL IsFnSupported( FnSet fnSet, DWORD fn ) + { + // Cache the maximum supported standard function + static const DWORD MaxStdFn = Cpuid( Std ).Eax(); + // Cache the maximum supported extended function + static const DWORD MaxExtFn = Cpuid( Ext ).Eax(); + + bool ret = false; + switch( fnSet ) + { + case Std: + ret = ( fn <= MaxStdFn ); + break; + case Ext: + ret = ( fn <= MaxExtFn ); + break; + default: + _ASSERT( 0 ); // should never get here + break; + } + return ret; + } + +private: + //----------------------------------------------------------------------------- + // Name: Cpuid::Cpuid + // Desc: This constructor is private and is only used to set a Cpuid object to + // initial values retrieved from CPUID functions 0x00000000 and + // 0x80000000. Good for caching values from the CPUID instruction that + // are not variable, like the encoded vendor string and the maximum + // supported CPUID function values. + //----------------------------------------------------------------------------- + explicit Cpuid( FnSet fnSet ) + { + UncheckedCall_( fnSet, 0 ); + } + + //----------------------------------------------------------------------------- + // Name: Cpuid::UncheckedCall_ + // Desc: Calls the CPUID instruction without checking for CPUID function + // support. + //----------------------------------------------------------------------------- + void UncheckedCall_( FnSet fnSet, DWORD fn ) + { +#ifdef _WIN64 + int out[4]; + __cpuidex( out, fnSet | fn, 0 ); + m_eax = out[0]; + m_ebx = out[1]; + m_ecx = out[2]; + m_edx = out[3]; +#else + __asm + { + mov ecx, 0 + mov eax, fn + or eax, fnSet + cpuid + mov edi, this + mov [edi].m_eax, eax + mov [edi].m_ebx, ebx + mov [edi].m_ecx, ecx + mov [edi].m_edx, edx + } +#endif + } + + // Private Members + DWORD m_eax; + DWORD m_ebx; + DWORD m_ecx; + DWORD m_edx; +}; + +//--------------------------------------------------------------------------------- +// Name: CpuidImpl +// Desc: Provides the CPUID instruction implementation for the ICpuTopology +// interface. This is a ConcreteImplementor class in the traditional Bridge +// Pattern. +//--------------------------------------------------------------------------------- +class CpuidImpl : public ICpuTopology +{ +public: + // CpuidFnMasks are used when extracting bit-encoded information retrieved from + // the CPUID instruction + enum CpuidFnMasks + { + HTT = 0x10000000, // Fn0000_0001 EDX[28] + LogicalProcessorCount = 0x00FF0000, // Fn0000_0001 EBX[23:16] + ApicId = 0xFF000000, // Fn0000_0001 EBX[31:24] + NC_Intel = 0xFC000000, // Fn0000_0004 EAX[31:26] + NC_Amd = 0x000000FF, // Fn8000_0008 ECX[7:0] + CmpLegacy_Amd = 0x00000002, // Fn8000_0001 ECX[1] + ApicIdCoreIdSize_Amd = 0x0000F000 // Fn8000_0008 ECX[15:12] + }; + + enum + { + MaxLogicalProcessors = sizeof( DWORD_PTR ) * 8 + }; + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::CpuidImpl + // Desc: Initializes internal structures/data with information retrieved from + // calling the CPUID instruction. + //----------------------------------------------------------------------------- + CpuidImpl() : m_nItems( 0 ) + { + _ASSERT( IsSupported() ); + + DWORD nLogProcsPerPkg = 1; + DWORD nCoresPerPkg = 1; + + Cpuid cpu; + + // Determine if hardware threading is enabled. + cpu.Call( Cpuid::Std, 1 ); + if( cpu.Edx() & HTT ) + { + // Determine the total number of logical processors per package. + nLogProcsPerPkg = ( cpu.Ebx() & LogicalProcessorCount ) >> 16; + + // Determine the total number of cores per package. This info + // is extracted differently dependending on the cpu vendor. + if( Cpuid::IsVendor( GenuineIntel ) ) + { + if( cpu.Call( Cpuid::Std, 4 ) ) + { + nCoresPerPkg = ( ( cpu.Eax() & NC_Intel ) >> 26 ) + 1; + } + } + else + { + _ASSERT( Cpuid::IsVendor( AuthenticAMD ) ); + if( cpu.Call( Cpuid::Ext, 8 ) ) + { + // AMD reports the msb width of the CORE_ID bit field of the APIC ID + // in ApicIdCoreIdSize_Amd. The maximum value represented by the msb + // width is the theoretical number of cores the processor can support + // and not the actual number of current cores, which is how the msb width + // of the CORE_ID bit field has been traditionally determined. If the + // ApicIdCoreIdSize_Amd value is zero, then you use the traditional method + // to determine the CORE_ID msb width. + DWORD msbWidth = cpu.Ecx() & ApicIdCoreIdSize_Amd; + if( msbWidth ) + { + // Set nCoresPerPkg to the maximum theortical number of cores + // the processor package can support (2 ^ width) so the APIC + // extractor object can be configured to extract the proper + // values from an APIC. + nCoresPerPkg = 1 << ( msbWidth >> 12 ); + } + else + { + // Set nCoresPerPkg to the actual number of cores being reported + // by the CPUID instruction. + nCoresPerPkg = ( cpu.Ecx() & NC_Amd ) + 1; + } + } + } + } + + // Configure the APIC extractor object with the information it needs to + // be able to decode the APIC. + m_apicExtractor.SetPackageTopology( nLogProcsPerPkg, nCoresPerPkg ); + + DWORD_PTR dwProcessAffinity, dwSystemAffinity; + HANDLE hProcess = GetCurrentProcess(); + HANDLE hThread = GetCurrentThread(); + GetProcessAffinityMask( hProcess, &dwProcessAffinity, &dwSystemAffinity ); + if( 1 == dwSystemAffinity ) + { + // Since we only have 1 logical processor present on the system, we + // can explicitly set a single APIC ID to zero. + _ASSERT( 1 == nLogProcsPerPkg ); + m_apicIds[m_nItems++] = 0; + } + else + { + // Set the process affinity to the system affinity if they are not + // equal so that all logical processors can be accounted for. + if( dwProcessAffinity != dwSystemAffinity ) + { + SetProcessAffinityMask( hProcess, dwSystemAffinity ); + } + + // Call cpuid on each active logical processor in the system affinity. + DWORD_PTR dwPrevThreadAffinity = 0; + for( DWORD_PTR dwThreadAffinity = 1; + dwThreadAffinity && dwThreadAffinity <= dwSystemAffinity; + dwThreadAffinity <<= 1 ) + { + if( dwSystemAffinity & dwThreadAffinity ) + { + if( 0 == dwPrevThreadAffinity ) + { + // Save the previous thread affinity so we can return + // the executing thread affinity back to this state. + _ASSERT( 0 == m_nItems ); + dwPrevThreadAffinity = SetThreadAffinityMask( hThread, + dwThreadAffinity ); + } + else + { + _ASSERT( m_nItems > 0 ); + SetThreadAffinityMask( hThread, dwThreadAffinity ); + } + + // Allow the thread to switch to masked logical processor. + Sleep( 0 ); + + // Store the APIC ID + cpu.Call( Cpuid::Std, 1 ); + m_apicIds[m_nItems++] = ( BYTE )( ( cpu.Ebx() & ApicId ) >> 24 ); + } + } + + // Restore the previous process and thread affinity state. + SetProcessAffinityMask( hProcess, dwProcessAffinity ); + SetThreadAffinityMask( hThread, dwPrevThreadAffinity ); + Sleep( 0 ); + } + + } + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::IsDefaultImpl + //----------------------------------------------------------------------------- + /*virtual*/ BOOL IsDefaultImpl() const + { + return FALSE; + } + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::NumberOfProcessCores + // Desc: Gets the number of processor cores available to the current process. + // The total accounts for cores that may have been masked out by process + // affinity. + //----------------------------------------------------------------------------- + /*virtual*/ DWORD NumberOfProcessCores() const + { + DWORD_PTR dwProcessAffinity, dwSystemAffinity; + GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity ); + + BYTE pkgCoreIds[MaxLogicalProcessors] = { 0 }; + DWORD nPkgCoreIds = 0; + + for( DWORD i = 0; i < m_nItems; ++i ) + { + if( dwProcessAffinity & ( ( DWORD_PTR )1 << i ) ) + { + AddUniquePkgCoreId_( i, pkgCoreIds, nPkgCoreIds ); + } + } + return nPkgCoreIds; + } + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::NumberOfSystemCores + // Desc: Gets the number of processor cores on the system. + //----------------------------------------------------------------------------- + /*virtual*/ DWORD NumberOfSystemCores() const + { + BYTE pkgCoreIds[MaxLogicalProcessors] = { 0 }; + DWORD nPkgCoreIds = 0; + for( DWORD i = 0; i < m_nItems; ++i ) + { + AddUniquePkgCoreId_( i, pkgCoreIds, nPkgCoreIds ); + } + return nPkgCoreIds; + } + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::CoreAffinityMask + // Desc: Gets an affinity mask that corresponds to a specific processor core. + // coreIdx must be less than the total number of processor cores + // recognized by the operating system (NumberOfSystemCores()). + //----------------------------------------------------------------------------- + /*virtual*/ DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const + { + BYTE pkgCoreIds[MaxLogicalProcessors] = { 0 }; + DWORD nPkgCoreIds = 0; + for( DWORD i = 0; i < m_nItems; ++i ) + { + AddUniquePkgCoreId_( i, pkgCoreIds, nPkgCoreIds ); + } + + DWORD_PTR dwProcessAffinity, dwSystemAffinity; + GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity ); + + DWORD_PTR coreAffinity = 0; + if( coreIdx < nPkgCoreIds ) + { + for( DWORD i = 0; i < m_nItems; ++i ) + { + if( m_apicExtractor.PackageCoreId( m_apicIds[i] ) == pkgCoreIds[coreIdx] ) + { + coreAffinity |= ( dwProcessAffinity & ( ( DWORD_PTR )1 << i ) ); + } + } + } + return coreAffinity; + } + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::IsSupported + // Desc: Indicates if a CpuidImpl object is supported on this platform. + // Support is only granted on Intel and AMD platforms where the current + // calling process has security rights to query process affinity and + // change it if the process and system affinity differ. CpuidImpl is + // also not supported if thread affinity cannot be set on systems with + // more than 1 logical processor. + //----------------------------------------------------------------------------- + static BOOL IsSupported() + { + BOOL bSupported = Cpuid::IsVendor( GenuineIntel ) + || Cpuid::IsVendor( AuthenticAMD ); + + if( bSupported ) + { + DWORD_PTR dwProcessAffinity, dwSystemAffinity; + HANDLE hProcess = GetCurrentProcess(); + + // Query process affinity mask + bSupported = GetProcessAffinityMask( hProcess, &dwProcessAffinity, &dwSystemAffinity ); + if( bSupported ) + { + if( dwProcessAffinity != dwSystemAffinity ) + { + // The process and system affinities differ. Attempt to set + // the process affinity to the system affinity. + bSupported = SetProcessAffinityMask( hProcess, dwSystemAffinity ); + if( bSupported ) + { + // Restore previous process affinity + bSupported = SetProcessAffinityMask( hProcess, dwProcessAffinity ); + } + } + + if( bSupported && ( dwSystemAffinity > 1 ) ) + { + // Attempt to set the thread affinity + HANDLE hThread = GetCurrentThread(); + DWORD_PTR dwThreadAffinity = SetThreadAffinityMask( hThread, dwProcessAffinity ); + if( dwThreadAffinity ) + { + // Restore the previous thread affinity + bSupported = 0 != SetThreadAffinityMask( hThread, dwThreadAffinity ); + } + else + { + bSupported = FALSE; + } + } + } + } + return bSupported; + } + +private: + + //----------------------------------------------------------------------------- + // Name: CpuidImpl::AddUniquePkgCoreId_ + // Desc: Adds the package/core id extracted from the APIC ID at m_apicIds[idx] + // in the if the package/core id is unique to the pkgCoreIds array. + // nPkgCore is an in/out parm that will reflect the total number of items + // in pkgCoreIds array. It will be incrememted if a unique package/core + // id is found and added. + //----------------------------------------------------------------------------- + void AddUniquePkgCoreId_( DWORD idx, BYTE* pkgCoreIds, DWORD& nPkgCoreIds ) const + { + _ASSERT( idx < m_nItems ); + _ASSERT( NULL != pkgCoreIds ); + + DWORD j; + for( j = 0; j < nPkgCoreIds; ++j ) + { + if( pkgCoreIds[j] == m_apicExtractor.PackageCoreId( m_apicIds[idx] ) ) + break; + } + if( j == nPkgCoreIds ) + { + pkgCoreIds[j] = m_apicExtractor.PackageCoreId( m_apicIds[idx] ); + ++nPkgCoreIds; + } + } + + // Private Members + BYTE m_apicIds[MaxLogicalProcessors]; + BYTE m_nItems; + ApicExtractor m_apicExtractor; + + // Supported Vendor Strings + static const char GenuineIntel[]; + static const char AuthenticAMD[]; +}; + +// Static initialization of vendor strings +const char CpuidImpl::GenuineIntel[] = "GenuineIntel"; +const char CpuidImpl::AuthenticAMD[] = "AuthenticAMD"; + +} // unnamed-namespace + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::CpuTopology +// Desc: Initializes this object with the appropriately supported cpu topology +// implementation object. +//------------------------------------------------------------------------------------- +CpuTopology::CpuTopology( BOOL bForceCpuid ) : m_pImpl( NULL ) +{ + ForceCpuid( bForceCpuid ); +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::~CpuTopology +//------------------------------------------------------------------------------------- +CpuTopology::~CpuTopology() +{ + Destroy_(); +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::NumberOfProcessCores +// Desc: Gets the total number of physical processor cores available to the current +// process. +//------------------------------------------------------------------------------------- +DWORD CpuTopology::NumberOfProcessCores() const +{ + return m_pImpl->NumberOfProcessCores(); +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::NumberOfSystemCores +// Desc: Gets the total number of physical processor cores enabled on the system. +//------------------------------------------------------------------------------------- +DWORD CpuTopology::NumberOfSystemCores() const +{ + return m_pImpl->NumberOfSystemCores(); +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::CoreAffinityMask +// Desc: Gets an affinity mask that corresponds to the requested processor core. +//------------------------------------------------------------------------------------- +DWORD_PTR CpuTopology::CoreAffinityMask( DWORD coreIdx ) const +{ + return m_pImpl->CoreAffinityMask( coreIdx ); +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::IsDefaultImpl +// Desc: Returns TRUE if m_pImpl is a DefaultImpl object, FALSE if not. Used to +// indicate whether or not the prescribed methods (CPUID or +// GetLogicalProcessorInformation) are supported on the system. +//------------------------------------------------------------------------------------- +BOOL CpuTopology::IsDefaultImpl() const +{ + return m_pImpl->IsDefaultImpl(); +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::ForceCpuid +// Desc: Constructs a cpu topology object. If bForce is FALSE, then a GlpiImpl object +// is first attempted, then CpuidImpl, then finally DefaultImpl. If bForce is +// TRUE, then GlpiImpl is never attempted. +//------------------------------------------------------------------------------------- +void CpuTopology::ForceCpuid( BOOL bForce ) +{ + Destroy_(); + + if( !bForce && GlpiImpl::IsSupported() ) + { + m_pImpl = new GlpiImpl(); + } + else if( CpuidImpl::IsSupported() ) + { + m_pImpl = new CpuidImpl(); + } + else + { + m_pImpl = new DefaultImpl(); + } +} + +//------------------------------------------------------------------------------------- +// Name: CpuTopology::Destroy_ +//------------------------------------------------------------------------------------- +void CpuTopology::Destroy_() +{ + delete m_pImpl; + m_pImpl = NULL; +} +#endif \ No newline at end of file diff --git a/r5dev/tier0/cputopology.h b/r5dev/tier0/cputopology.h new file mode 100644 index 00000000..c8342277 --- /dev/null +++ b/r5dev/tier0/cputopology.h @@ -0,0 +1,38 @@ +//------------------------------------------------------------------------------------- +// CpuTopology.h +// +// CpuToplogy class declaration. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------------------------------- +#pragma once +#ifndef CPU_TOPOLOGY_H +#define CPU_TOPOLOGY_H + +class ICpuTopology; + +//--------------------------------------------------------------------------------- +// Name: CpuToplogy +// Desc: This class constructs a supported cpu topology implementation object on +// initialization and forwards calls to it. This is the Abstraction class +// in the traditional Bridge Pattern. +//--------------------------------------------------------------------------------- +class CpuTopology +{ +public: + CpuTopology( BOOL bForceCpuid = FALSE ); + ~CpuTopology(); + + BOOL IsDefaultImpl() const; + DWORD NumberOfProcessCores() const; + DWORD NumberOfSystemCores() const; + DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const; + + void ForceCpuid( BOOL bForce ); +private: + void Destroy_(); + + ICpuTopology* m_pImpl; +}; + +#endif // CPU_TOPOLOGY_H diff --git a/r5dev/tier0/fasttimer.cpp b/r5dev/tier0/fasttimer.cpp new file mode 100644 index 00000000..9e012a46 --- /dev/null +++ b/r5dev/tier0/fasttimer.cpp @@ -0,0 +1,18 @@ +//=============================================================================// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// + +#include "core/stdafx.h" +#include "tier0/fasttimer.h" + +uint64 g_ClockSpeed; // Clocks/sec +unsigned long g_dwClockSpeed; +double g_ClockSpeedMicrosecondsMultiplier; +double g_ClockSpeedMillisecondsMultiplier; +double g_ClockSpeedSecondsMultiplier; + +// Constructor init the clock speed. +CClockSpeedInit g_ClockSpeedInit; diff --git a/r5dev/tier0/fasttimer.h b/r5dev/tier0/fasttimer.h new file mode 100644 index 00000000..37bcdc4b --- /dev/null +++ b/r5dev/tier0/fasttimer.h @@ -0,0 +1,549 @@ +//===========================================================================// +// +// Purpose: +// +// $NoKeywords: $ +//===========================================================================// + +#ifndef FASTTIMER_H +#define FASTTIMER_H + +#include "tier0/platform.h" +#include "tier0/cpu.h" + +/*****************************************************************************/ +extern uint64_t g_ClockSpeed; +extern unsigned long g_dwClockSpeed; + +extern double g_ClockSpeedMicrosecondsMultiplier; +extern double g_ClockSpeedMillisecondsMultiplier; +extern double g_ClockSpeedSecondsMultiplier; + +// -------------------------------------------------------------------------- // +// CCycleCount +// -------------------------------------------------------------------------- // +class CCycleCount +{ + friend class CFastTimer; + +public: + CCycleCount(void); + CCycleCount(uint64_t cycles); + + void Sample(void); // Sample the clock. This takes about 34 clocks to execute (or 26,000 calls per millisecond on a P900). + void Init(void); // Set to zero. + void Init(float initTimeMsec); + void Init(double initTimeMsec) { Init((float)initTimeMsec); } + void Init(uint64_t cycles); + bool IsLessThan(CCycleCount const& other) const; // Compare two counts. + + // Convert to other time representations. These functions are slow, so it's preferable to call them during display rather than inside a timing block. + unsigned long GetCycles(void) const; + uint64_t GetLongCycles(void) const; + + unsigned long GetMicroseconds(void) const; + uint64_t GetUlMicroseconds(void) const; + double GetMicrosecondsF(void) const; + void SetMicroseconds(unsigned long nMicroseconds); + + unsigned long GetMilliseconds(void) const; + double GetMillisecondsF(void) const; + double GetSeconds(void) const; + + CCycleCount& operator+=(CCycleCount const& other); + + // dest = rSrc1 + rSrc2 + static void Add(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest); // Add two samples together. + // dest = rSrc1 - rSrc2 + static void Sub(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest); // Add two samples together. + static uint64_t GetTimestamp(void); + +private: + uint64_t m_Int64{}; +}; + + +// -------------------------------------------------------------------------- // +// CClockSpeedInit +// -------------------------------------------------------------------------- // +class CClockSpeedInit +{ +public: + CClockSpeedInit(void) + { + Init(); + } + + static void Init(void) + { + const CPUInformation& pi = GetCPUInformation(); + g_ClockSpeed = pi.m_Speed; + g_dwClockSpeed = (unsigned long)g_ClockSpeed; + + g_ClockSpeedMicrosecondsMultiplier = 1000000.0 / (double)g_ClockSpeed; + g_ClockSpeedMillisecondsMultiplier = 1000.0 / (double)g_ClockSpeed; + g_ClockSpeedSecondsMultiplier = 1.0f / (double)g_ClockSpeed; + } +}; + + +// -------------------------------------------------------------------------- // +// CFastTimer +// These functions are fast to call and should be called from your sampling code. +// -------------------------------------------------------------------------- // +class CFastTimer +{ +public: + void Start(void); + void End(void); + + const CCycleCount& GetDuration(void) const; // Get the elapsed time between Start and End calls. + CCycleCount GetDurationInProgress(void) const; // Call without ending. Not that cheap. + + // Return number of cycles per second on this processor. + static inline unsigned long GetClockSpeed(void); + +private: + CCycleCount m_Duration; +#ifdef DEBUG_FASTTIMER + bool m_bRunning; // Are we currently running? +#endif +}; + + +// -------------------------------------------------------------------------- // +// CTimeScope +// This is a helper class that times whatever block of code it's in. +// -------------------------------------------------------------------------- // +class CTimeScope +{ +public: + CTimeScope(CFastTimer* pTimer); + ~CTimeScope(void); + +private: + CFastTimer* m_pTimer; +}; + +inline CTimeScope::CTimeScope(CFastTimer* pTotal) +{ + m_pTimer = pTotal; + m_pTimer->Start(); +} + +inline CTimeScope::~CTimeScope(void) +{ + m_pTimer->End(); +} + +// This is a helper class that times whatever block of code it's in and adds the total (int microseconds) to a global counter. +class CTimeAdder +{ +public: + CTimeAdder(CCycleCount* pTotal); + ~CTimeAdder(void); + + void End(); + +private: + CCycleCount* m_pTotal; + CFastTimer m_Timer; +}; + +inline CTimeAdder::CTimeAdder(CCycleCount* pTotal) +{ + m_pTotal = pTotal; + m_Timer.Start(); +} + +inline CTimeAdder::~CTimeAdder(void) +{ + End(); +} + +inline void CTimeAdder::End(void) +{ + if (m_pTotal) + { + m_Timer.End(); + *m_pTotal += m_Timer.GetDuration(); + m_pTotal = 0; + } +} + + +// -------------------------------------------------------------------------- // +// Simple tool to support timing a block of code, and reporting the results on +// program exit or at each iteration +// +// Macros used because dbg.h uses this header, thus Msg() is unavailable +// -------------------------------------------------------------------------- // + +#define PROFILE_SCOPE(name) \ + class C##name##ACC : public CAverageCycleCounter \ + { \ + public: \ + ~C##name##ACC() \ + { \ + Msg("%-48s: %6.3f avg (%8.1f total, %7.3f peak, %5d iters)\n", \ + #name, \ + GetAverageMilliseconds(), \ + GetTotalMilliseconds(), \ + GetPeakMilliseconds(), \ + GetIters() ); \ + } \ + }; \ + static C##name##ACC name##_ACC; \ + CAverageTimeMarker name##_ATM( &name##_ACC ) + +#define TIME_SCOPE(name) \ + class CTimeScopeMsg_##name \ + { \ + public: \ + CTimeScopeMsg_##name() { m_Timer.Start(); } \ + ~CTimeScopeMsg_##name() \ + { \ + m_Timer.End(); \ + Msg( #name "time: %.4fms\n", m_Timer.GetDuration().GetMillisecondsF() ); \ + } \ + private: \ + CFastTimer m_Timer; \ + } name##_TSM; + + +// -------------------------------------------------------------------------- // +// CAverageCycleCounter +// -------------------------------------------------------------------------- // +class CAverageCycleCounter +{ +public: + CAverageCycleCounter(void); + + void Init(void); + void MarkIter(const CCycleCount& duration); + + unsigned GetIters(void) const; + + double GetAverageMilliseconds(void) const; + double GetTotalMilliseconds(void) const; + double GetPeakMilliseconds(void) const; + +private: + unsigned m_nIters {}; + CCycleCount m_Total {}; + CCycleCount m_Peak {}; + bool m_fReport{}; + const char* m_pszName{}; +}; + + +// -------------------------------------------------------------------------- // +// CAverageTimeMarker +// -------------------------------------------------------------------------- // +class CAverageTimeMarker +{ +public: + CAverageTimeMarker(CAverageCycleCounter* pCounter); + ~CAverageTimeMarker(void); + +private: + CAverageCycleCounter* m_pCounter; + CFastTimer m_Timer; +}; + + +// -------------------------------------------------------------------------- // +// CCycleCount inlines. +// -------------------------------------------------------------------------- // +inline CCycleCount::CCycleCount(void) +{ + Init((uint64_t)0); +} + +inline CCycleCount::CCycleCount(uint64_t cycles) +{ + Init(cycles); +} + +inline void CCycleCount::Init(void) +{ + Init((uint64_t)0); +} + +inline void CCycleCount::Init(float initTimeMsec) +{ + if (g_ClockSpeedMillisecondsMultiplier > 0) + Init((uint64_t)(initTimeMsec / g_ClockSpeedMillisecondsMultiplier)); + else + Init((uint64_t)0); +} + +inline void CCycleCount::Init(uint64_t cycles) +{ + m_Int64 = cycles; +} + +inline void CCycleCount::Sample(void) +{ + m_Int64 = Plat_Rdtsc(); +} + +inline CCycleCount& CCycleCount::operator+=(CCycleCount const& other) +{ + m_Int64 += other.m_Int64; + return *this; +} + +inline void CCycleCount::Add(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest) +{ + dest.m_Int64 = rSrc1.m_Int64 + rSrc2.m_Int64; +} + +inline void CCycleCount::Sub(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest) +{ + dest.m_Int64 = rSrc1.m_Int64 - rSrc2.m_Int64; +} + +inline uint64_t CCycleCount::GetTimestamp(void) +{ + CCycleCount c; + c.Sample(); + return c.GetLongCycles(); +} + +inline bool CCycleCount::IsLessThan(CCycleCount const& other) const +{ + return m_Int64 < other.m_Int64; +} + +inline unsigned long CCycleCount::GetCycles(void) const +{ + return (unsigned long)m_Int64; +} + +inline uint64_t CCycleCount::GetLongCycles(void) const +{ + return m_Int64; +} + +inline unsigned long CCycleCount::GetMicroseconds(void) const +{ + return (unsigned long)((m_Int64 * 1000000) / g_ClockSpeed); +} + +inline uint64_t CCycleCount::GetUlMicroseconds(void) const +{ + return ((m_Int64 * 1000000) / g_ClockSpeed); +} + +inline double CCycleCount::GetMicrosecondsF(void) const +{ + return (double)(m_Int64 * g_ClockSpeedMicrosecondsMultiplier); +} + +inline void CCycleCount::SetMicroseconds(unsigned long nMicroseconds) +{ + m_Int64 = ((uint64_t)nMicroseconds * g_ClockSpeed) / 1000000; +} + +inline unsigned long CCycleCount::GetMilliseconds(void) const +{ + return (unsigned long)((m_Int64 * 1000) / g_ClockSpeed); +} + +inline double CCycleCount::GetMillisecondsF(void) const +{ + return (double)(m_Int64 * g_ClockSpeedMillisecondsMultiplier); +} + +inline double CCycleCount::GetSeconds(void) const +{ + return (double)(m_Int64 * g_ClockSpeedSecondsMultiplier); +} + + +// -------------------------------------------------------------------------- // +// CFastTimer inlines. +// -------------------------------------------------------------------------- // +inline void CFastTimer::Start(void) +{ + m_Duration.Sample(); +#ifdef DEBUG_FASTTIMER + m_bRunning = true; +#endif +} + +inline void CFastTimer::End(void) +{ + CCycleCount cnt; + cnt.Sample(); + + m_Duration.m_Int64 = cnt.m_Int64 - m_Duration.m_Int64; + +#ifdef DEBUG_FASTTIMER + m_bRunning = false; +#endif +} + +inline CCycleCount CFastTimer::GetDurationInProgress(void) const +{ + CCycleCount cnt; + cnt.Sample(); + + CCycleCount result; + result.m_Int64 = cnt.m_Int64 - m_Duration.m_Int64; + + return result; +} + +inline unsigned long CFastTimer::GetClockSpeed(void) +{ + return g_dwClockSpeed; +} + +inline CCycleCount const& CFastTimer::GetDuration(void) const +{ +#ifdef DEBUG_FASTTIMER + assert(!m_bRunning); +#endif + return m_Duration; +} + + +// -------------------------------------------------------------------------- // +// CAverageCycleCounter inlines +// -------------------------------------------------------------------------- // +inline CAverageCycleCounter::CAverageCycleCounter(void) + : m_nIters(0) +{ +} + +inline void CAverageCycleCounter::Init(void) +{ + m_Total.Init(); + m_Peak.Init(); + m_nIters = 0; +} + +inline void CAverageCycleCounter::MarkIter(const CCycleCount& duration) +{ + ++m_nIters; + m_Total += duration; + if (m_Peak.IsLessThan(duration)) + m_Peak = duration; +} + +inline unsigned CAverageCycleCounter::GetIters(void) const +{ + return m_nIters; +} + +inline double CAverageCycleCounter::GetAverageMilliseconds(void) const +{ + if (m_nIters) + return (m_Total.GetMillisecondsF() / (double)m_nIters); + else + return 0; +} + +inline double CAverageCycleCounter::GetTotalMilliseconds(void) const +{ + return m_Total.GetMillisecondsF(); +} + +inline double CAverageCycleCounter::GetPeakMilliseconds(void) const +{ + return m_Peak.GetMillisecondsF(); +} + +// -------------------------------------------------------------------------- // + +inline CAverageTimeMarker::CAverageTimeMarker(CAverageCycleCounter* pCounter) +{ + m_pCounter = pCounter; + m_Timer.Start(); +} + +inline CAverageTimeMarker::~CAverageTimeMarker(void) +{ + m_Timer.End(); + m_pCounter->MarkIter(m_Timer.GetDuration()); +} + + +// -------------------------------------------------------------------------- // +// CLimitTimer +// Use this to time whether a desired interval of time has passed. It's extremely fast +// to check while running. NOTE: CMicroSecOverage() and CMicroSecLeft() are not as fast to check. +// -------------------------------------------------------------------------- // +class CLimitTimer +{ +public: + CLimitTimer(void) { } + CLimitTimer(uint64_t cMicroSecDuration) { SetLimit(cMicroSecDuration); } + void SetLimit(uint64_t m_cMicroSecDuration); + bool BLimitReached(void) const; + + int CMicroSecOverage(void) const; + uint64_t CMicroSecLeft(void) const; + +private: + uint64_t m_lCycleLimit{}; +}; + +//----------------------------------------------------------------------------- +// Purpose: Initializes the limit timer with a period of time to measure. +// Input : cMicroSecDuration - How long a time period to measure +//----------------------------------------------------------------------------- +inline void CLimitTimer::SetLimit(uint64_t cMicroSecDuration) +{ + uint64_t dlCycles = ((uint64_t)cMicroSecDuration * (uint64_t)g_dwClockSpeed) / (uint64_t)1000000L; + CCycleCount cycleCount; + cycleCount.Sample(); + m_lCycleLimit = cycleCount.GetLongCycles() + dlCycles; +} + +//----------------------------------------------------------------------------- +// Purpose: Determines whether our specified time period has passed +// Output: true if at least the specified time period has passed +//----------------------------------------------------------------------------- +inline bool CLimitTimer::BLimitReached(void) const +{ + CCycleCount cycleCount; + cycleCount.Sample(); + return (cycleCount.GetLongCycles() >= m_lCycleLimit); +} + +//----------------------------------------------------------------------------- +// Purpose: If we're over our specified time period, return the amount of the overage. +// Output: # of microseconds since we reached our specified time period. +//----------------------------------------------------------------------------- +inline int CLimitTimer::CMicroSecOverage(void) const +{ + CCycleCount cycleCount; + cycleCount.Sample(); + uint64_t lcCycles = cycleCount.GetLongCycles(); + + if (lcCycles < m_lCycleLimit) + return 0; + + return((int)((lcCycles - m_lCycleLimit) * (uint64_t)1000000L / g_dwClockSpeed)); +} + +//----------------------------------------------------------------------------- +// Purpose: If we're under our specified time period, return the amount under. +// Output: # of microseconds until we reached our specified time period, 0 if we've passed it +//----------------------------------------------------------------------------- +inline uint64_t CLimitTimer::CMicroSecLeft(void) const +{ + CCycleCount cycleCount; + cycleCount.Sample(); + uint64_t lcCycles = cycleCount.GetLongCycles(); + + if (lcCycles >= m_lCycleLimit) + return 0; + + return((uint64_t)((m_lCycleLimit - lcCycles) * (uint64_t)1000000L / g_dwClockSpeed)); +} + +#endif // FASTTIMER_H diff --git a/r5dev/tier0/platform.h b/r5dev/tier0/platform.h new file mode 100644 index 00000000..f866db79 --- /dev/null +++ b/r5dev/tier0/platform.h @@ -0,0 +1,76 @@ +#ifndef PLATFORM_H +#define PLATFORM_H + +#if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 ) +#pragma intrinsic(__rdtsc) +#endif + +inline uint64_t Plat_Rdtsc() +{ +#if defined( _X360 ) + return (uint64)__mftb32(); +#elif defined( _WIN64 ) + return (uint64_t)__rdtsc(); +#elif defined( _WIN32 ) +#if defined( _MSC_VER ) && ( _MSC_VER >= 1400 ) + return (uint64)__rdtsc(); +#else + __asm rdtsc; + __asm ret; +#endif +#elif defined( __i386__ ) + uint64 val; + __asm__ __volatile__("rdtsc" : "=A" (val)); + return val; +#elif defined( __x86_64__ ) + uint32 lo, hi; + __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); + return (((uint64)hi) << 32) | lo; +#else +#error +#endif +} + +// Processor Information: +struct CPUInformation +{ + int m_Size; // Size of this structure, for forward compatability. + + uint8_t m_nLogicalProcessors; // Number op logical processors. + uint8_t m_nPhysicalProcessors; // Number of physical processors + + bool m_bRDTSC : 1, // Is RDTSC supported? + m_bCMOV : 1, // Is CMOV supported? + m_bFCMOV : 1, // Is FCMOV supported? + m_bSSE : 1, // Is SSE supported? + m_bSSE2 : 1, // Is SSE2 Supported? + m_b3DNow : 1, // Is 3DNow! Supported? + m_bMMX : 1, // Is MMX supported? + m_bHT : 1; // Is HyperThreading supported? + + + bool m_bSSE3 : 1, + m_bSSSE3 : 1, + m_bSSE4a : 1, + m_bSSE41 : 1, + m_bSSE42 : 1, + m_bAVX : 1; // Is AVX supported? + + int64_t m_Speed; // In cycles per second. + + char* m_szProcessorID; // Processor vendor Identification. + char* m_szProcessorBrand; // Processor brand string, if available + + uint32_t m_nModel; + uint32_t m_nFeatures[3]; + uint32_t m_nL1CacheSizeKb; + uint32_t m_nL1CacheDesc; + uint32_t m_nL2CacheSizeKb; + uint32_t m_nL2CacheDesc; + uint32_t m_nL3CacheSizeKb; + uint32_t m_nL3CacheDesc; + + CPUInformation() : m_Size(0) {} +}; + +#endif /* PLATFORM_H */ \ No newline at end of file diff --git a/r5dev/vproj/dedicated.vcxproj b/r5dev/vproj/dedicated.vcxproj index beb0bef5..0d65ae43 100644 --- a/r5dev/vproj/dedicated.vcxproj +++ b/r5dev/vproj/dedicated.vcxproj @@ -327,9 +327,13 @@ + + + + @@ -403,7 +407,10 @@ + + + diff --git a/r5dev/vproj/dedicated.vcxproj.filters b/r5dev/vproj/dedicated.vcxproj.filters index f1a90b75..0bc9bc50 100644 --- a/r5dev/vproj/dedicated.vcxproj.filters +++ b/r5dev/vproj/dedicated.vcxproj.filters @@ -792,6 +792,18 @@ sdk\game\server + + sdk\tier0 + + + sdk\tier0 + + + sdk\tier0 + + + sdk\tier0 + @@ -986,6 +998,15 @@ sdk\game\server + + sdk\tier0 + + + sdk\tier0 + + + sdk\tier0 + diff --git a/r5dev/vproj/gamesdk.vcxproj b/r5dev/vproj/gamesdk.vcxproj index 3adcb162..c14dc023 100644 --- a/r5dev/vproj/gamesdk.vcxproj +++ b/r5dev/vproj/gamesdk.vcxproj @@ -81,7 +81,10 @@ + + + @@ -329,9 +332,13 @@ + + + + diff --git a/r5dev/vproj/gamesdk.vcxproj.filters b/r5dev/vproj/gamesdk.vcxproj.filters index 2fc3d1f5..ecd83f31 100644 --- a/r5dev/vproj/gamesdk.vcxproj.filters +++ b/r5dev/vproj/gamesdk.vcxproj.filters @@ -399,6 +399,15 @@ sdk\materialsystem + + sdk\tier0 + + + sdk\tier0 + + + sdk\tier0 + @@ -1145,6 +1154,18 @@ sdk\materialsystem + + sdk\tier0 + + + sdk\tier0 + + + sdk\tier0 + + + sdk\tier0 +