mirror of
https://github.com/Mauler125/r5sdk.git
synced 2025-02-09 19:15:03 +01:00
Implement CPU utils and FastTimer
CPU system utilities and FastTimer (inline)
This commit is contained in:
parent
a49a5bb781
commit
4bb3be2a8e
@ -5,15 +5,16 @@
|
||||
#include <windows.h>
|
||||
#include <WinSock2.h>
|
||||
#include <comdef.h>
|
||||
|
||||
#include <tchar.h>
|
||||
#include <stdio.h>
|
||||
#include <Psapi.h>
|
||||
#include <shlobj.h>
|
||||
#include <objbase.h>
|
||||
#include <intrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
@ -59,8 +60,9 @@
|
||||
|
||||
#include "core/assert.h"
|
||||
#include "core/termutil.h"
|
||||
#include "common/pseudodefs.h"
|
||||
#include "tier0/basetypes.h"
|
||||
#include "tier0/platform.h"
|
||||
#include "common/pseudodefs.h"
|
||||
|
||||
#if !defined(SDKLAUNCHER) && !defined (NETCONSOLE)
|
||||
namespace
|
||||
|
564
r5dev/tier0/cpu.cpp
Normal file
564
r5dev/tier0/cpu.cpp
Normal file
@ -0,0 +1,564 @@
|
||||
//=============================================================================//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// $NoKeywords: $
|
||||
//=============================================================================//
|
||||
#include "core/stdafx.h"
|
||||
#include "tier0/cpu.h"
|
||||
#include "tier0/cputopology.h"
|
||||
#include "tier0/fasttimer.h"
|
||||
|
||||
/*******************************************************************************/
|
||||
static CPUInformation s_cpuInformation;
|
||||
static char s_CpuVendorID[13] = "unknown";
|
||||
bool s_bCpuBrandInitialized = false;
|
||||
bool s_bCpuVendorIdInitialized = false;
|
||||
|
||||
/*******************************************************************************/
|
||||
struct CpuIdResult_t
|
||||
{
|
||||
unsigned long eax;
|
||||
unsigned long ebx;
|
||||
unsigned long ecx;
|
||||
unsigned long edx;
|
||||
|
||||
void Reset(void)
|
||||
{
|
||||
eax = ebx = ecx = edx = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct IntelCacheDesc_t
|
||||
{
|
||||
uint8_t nDesc;
|
||||
uint16_t nCacheSize;
|
||||
};
|
||||
|
||||
/*******************************************************************************/
|
||||
union CpuBrand_t
|
||||
{
|
||||
CpuIdResult_t cpuid[3];
|
||||
char name[49];
|
||||
};
|
||||
CpuBrand_t s_CpuBrand;
|
||||
|
||||
/*******************************************************************************/
|
||||
inline static IntelCacheDesc_t s_IntelL1DataCacheDesc[] = {
|
||||
{ 0xA, 8 },
|
||||
{ 0xC, 16 },
|
||||
{ 0xD, 16 },
|
||||
{ 0x2C, 32 },
|
||||
{ 0x30, 32 },
|
||||
{ 0x60, 16 },
|
||||
{ 0x66, 8 },
|
||||
{ 0x67, 16 },
|
||||
{ 0x68, 32 }
|
||||
};
|
||||
|
||||
|
||||
inline static IntelCacheDesc_t s_IntelL2DataCacheDesc[] =
|
||||
{
|
||||
{ 0x21, 256 },
|
||||
{ 0x39, 128 },
|
||||
{ 0x3a, 192 },
|
||||
{ 0x3b, 128 },
|
||||
{ 0x3c, 256 },
|
||||
{ 0x3D, 384 },
|
||||
{ 0x3E, 512 },
|
||||
{ 0x41, 128 },
|
||||
{ 0x42, 256 },
|
||||
{ 0x43, 512 },
|
||||
{ 0x44, 1024 },
|
||||
{ 0x45, 2048 },
|
||||
{ 0x48, 3 * 1024 },
|
||||
{ 0x4e, 6 * 1024 },
|
||||
{ 0x78, 1024 },
|
||||
{ 0x79, 128 },
|
||||
{ 0x7a, 256 },
|
||||
{ 0x7b, 512 },
|
||||
{ 0x7c, 1024 },
|
||||
{ 0x7d, 2048 },
|
||||
{ 0x7f, 512 },
|
||||
{ 0x82, 256 },
|
||||
{ 0x83, 512 },
|
||||
{ 0x84, 1024 },
|
||||
{ 0x85, 2048 },
|
||||
{ 0x86, 512 },
|
||||
{ 0x87, 1024 }
|
||||
};
|
||||
|
||||
|
||||
inline static IntelCacheDesc_t s_IntelL3DataCacheDesc[] = {
|
||||
{ 0x22, 512 },
|
||||
{ 0x23, 1024 },
|
||||
{ 0x25, 2 * 1024 },
|
||||
{ 0x29, 4 * 1024 },
|
||||
{ 0x46, 4 * 1024 },
|
||||
{ 0x47, 8 * 1024 },
|
||||
// { 49,
|
||||
{ 0x4a, 6 * 1024 },
|
||||
{ 0x4b, 8 * 1024 },
|
||||
{ 0x4c, 12 * 1024 },
|
||||
{ 0x4d, 16 * 1014 },
|
||||
{ 0xD0, 512 },
|
||||
{ 0xD1, 1024 },
|
||||
{ 0xD2, 2048 },
|
||||
{ 0xD6, 1024 },
|
||||
{ 0xD7, 2048 },
|
||||
{ 0xD8, 4096 },
|
||||
{ 0xDC, 1536 },
|
||||
{ 0xDD, 3 * 1024 },
|
||||
{ 0xDE, 6 * 1024 },
|
||||
{ 0xE2, 2048 },
|
||||
{ 0xE3, 4096 },
|
||||
{ 0xE4, 8 * 1024 },
|
||||
{ 0xEA, 12 * 1024 },
|
||||
{ 0xEB, 18 * 1024 },
|
||||
{ 0xEC, 24 * 1024 }
|
||||
};
|
||||
|
||||
/*******************************************************************************/
|
||||
static bool cpuid(unsigned long function, CpuIdResult_t& out)
|
||||
{
|
||||
int pCPUInfo[4];
|
||||
__cpuid(pCPUInfo, (int)function);
|
||||
out.eax = pCPUInfo[0];
|
||||
out.ebx = pCPUInfo[1];
|
||||
out.ecx = pCPUInfo[2];
|
||||
out.edx = pCPUInfo[3];
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool cpuidex(unsigned long function, unsigned long subfunction, CpuIdResult_t& out)
|
||||
{
|
||||
int pCPUInfo[4];
|
||||
__cpuidex(pCPUInfo, (int)function, (int)subfunction);
|
||||
out.eax = pCPUInfo[0];
|
||||
out.ebx = pCPUInfo[1];
|
||||
out.ecx = pCPUInfo[2];
|
||||
out.edx = pCPUInfo[3];
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static CpuIdResult_t cpuid(unsigned long function)
|
||||
{
|
||||
CpuIdResult_t out;
|
||||
if (!cpuid(function, out))
|
||||
{
|
||||
out.Reset();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static CpuIdResult_t cpuidex(unsigned long function, unsigned long subfunction)
|
||||
{
|
||||
CpuIdResult_t out;
|
||||
if (!cpuidex(function, subfunction, out))
|
||||
{
|
||||
out.Reset();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/*******************************************************************************/
|
||||
static bool CheckSSETechnology(void)
|
||||
{
|
||||
return (cpuid(1).edx & 0x2000000L) != 0;
|
||||
}
|
||||
|
||||
static bool CheckSSE2Technology(void)
|
||||
{
|
||||
return (cpuid(1).edx & 0x04000000) != 0;
|
||||
}
|
||||
|
||||
bool CheckSSE3Technology(void)
|
||||
{
|
||||
return (cpuid(1).ecx & 0x00000001) != 0; // bit 1 of ECX.
|
||||
}
|
||||
|
||||
bool CheckSSSE3Technology(void)
|
||||
{
|
||||
// SSSE 3 is implemented by both Intel and AMD.
|
||||
// Detection is done the same way for both vendors.
|
||||
return (cpuid(1).ecx & (1 << 9)) != 0; // bit 9 of ECX.
|
||||
}
|
||||
|
||||
bool CheckSSE41Technology(void)
|
||||
{
|
||||
// SSE 4.1 is implemented by both Intel and AMD.
|
||||
// Detection is done the same way for both vendors.
|
||||
|
||||
return (cpuid(1).ecx & (1 << 19)) != 0; // bit 19 of ECX.
|
||||
}
|
||||
|
||||
bool CheckSSE42Technology(void)
|
||||
{
|
||||
// SSE4.2 is an Intel-only feature.
|
||||
|
||||
const char* pchVendor = GetProcessorVendorId();
|
||||
if (0 != _stricmp(pchVendor, "GenuineIntel"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return (cpuid(1).ecx & (1 << 20)) != 0; // bit 20 of ECX.
|
||||
}
|
||||
|
||||
|
||||
bool CheckSSE4aTechnology(void)
|
||||
{
|
||||
// SSE 4a is an AMD-only feature.
|
||||
|
||||
const char* pchVendor = GetProcessorVendorId();
|
||||
if (0 != _stricmp(pchVendor, "AuthenticAMD"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return (cpuid(1).ecx & (1 << 6)) != 0; // bit 6 of ECX.
|
||||
}
|
||||
|
||||
|
||||
static bool Check3DNowTechnology(void)
|
||||
{
|
||||
if (cpuid(0x80000000).eax > 0x80000000L)
|
||||
{
|
||||
return (cpuid(0x80000001).eax & (1 << 31)) != 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool CheckCMOVTechnology(void)
|
||||
{
|
||||
return (cpuid(1).edx & (1 << 15)) != 0;
|
||||
}
|
||||
|
||||
static bool CheckFCMOVTechnology(void)
|
||||
{
|
||||
return (cpuid(1).edx & (1 << 16)) != 0;
|
||||
}
|
||||
|
||||
static bool CheckRDTSCTechnology(void)
|
||||
{
|
||||
return (cpuid(1).edx & 0x10) != 0;
|
||||
}
|
||||
|
||||
// Return the Processor's vendor identification string, or "Generic_x86" if it doesn't exist on this CPU.
|
||||
const char* GetProcessorVendorId(void)
|
||||
{
|
||||
if (s_bCpuVendorIdInitialized)
|
||||
{
|
||||
return s_CpuVendorID;
|
||||
}
|
||||
|
||||
s_bCpuVendorIdInitialized = true;
|
||||
|
||||
CpuIdResult_t cpuid0 = cpuid(0);
|
||||
|
||||
memset(s_CpuVendorID, 0, sizeof(s_CpuVendorID));
|
||||
|
||||
if (!cpuid0.eax)
|
||||
{
|
||||
strcpy(s_CpuVendorID, ("Generic_x86"));
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(s_CpuVendorID + 0, &(cpuid0.ebx), sizeof(cpuid0.ebx));
|
||||
memcpy(s_CpuVendorID + 4, &(cpuid0.edx), sizeof(cpuid0.edx));
|
||||
memcpy(s_CpuVendorID + 8, &(cpuid0.ecx), sizeof(cpuid0.ecx));
|
||||
}
|
||||
|
||||
return s_CpuVendorID;
|
||||
}
|
||||
|
||||
const char* GetProcessorBrand(void)
|
||||
{
|
||||
if (s_bCpuBrandInitialized)
|
||||
{
|
||||
return s_CpuBrand.name;
|
||||
}
|
||||
s_bCpuBrandInitialized = true;
|
||||
|
||||
memset(&s_CpuBrand, 0, sizeof(s_CpuBrand));
|
||||
|
||||
const char* pchVendor = GetProcessorVendorId();
|
||||
if (0 == _stricmp(pchVendor, "GenuineIntel"))
|
||||
{
|
||||
// Intel brand string.
|
||||
if (cpuid(0x80000000).eax >= 0x80000004)
|
||||
{
|
||||
s_CpuBrand.cpuid[0] = cpuid(0x80000002);
|
||||
s_CpuBrand.cpuid[1] = cpuid(0x80000003);
|
||||
s_CpuBrand.cpuid[2] = cpuid(0x80000004);
|
||||
}
|
||||
}
|
||||
return s_CpuBrand.name;
|
||||
}
|
||||
|
||||
/*******************************************************************************/
|
||||
// Returns non-zero if Hyper-Threading Technology is supported on the processors and zero if not.
|
||||
// If it's supported, it does not mean that it's been enabled. So we test another flag to see if it's enabled
|
||||
// See Intel Processor Identification and the CPUID instruction Application Note 485.
|
||||
// http://www.intel.com/Assets/PDF/appnote/241618.pdf
|
||||
static bool HTSupported(void)
|
||||
{
|
||||
enum {
|
||||
HT_BIT = 0x10000000,// EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware.
|
||||
FAMILY_ID = 0x0f00, // EAX[11:8] - Bit 11 thru 8 contains family processor id.
|
||||
EXT_FAMILY_ID = 0x0f00000, // EAX[23:20] - Bit 23 thru 20 contains extended family processor id.
|
||||
FAMILY_ID_386 = 0x0300,
|
||||
FAMILY_ID_486 = 0x0400, // EAX[8:12] - 486, 487 and overdrive.
|
||||
FAMILY_ID_PENTIUM = 0x0500, // Pentium, Pentium OverDrive 60 - 200.
|
||||
FAMILY_ID_PENTIUM_PRO = 0x0600, // P Pro, P II, P III, P M, Celeron M, Core Duo, Core Solo, Core2 Duo, Core2 Extreme, P D, Xeon model F,
|
||||
// also 45-nm : Intel Atom, Core i7, Xeon MP ; see Intel Processor Identification and the CPUID instruction pg 20,21.
|
||||
FAMILY_ID_EXTENDED = 0x0F00 // P IV, Xeon, Celeron D, P D, .
|
||||
};
|
||||
|
||||
// This works on both newer AMD and Intel CPUs.
|
||||
CpuIdResult_t cpuid1 = cpuid(1);
|
||||
|
||||
// Previously, we detected P4 specifically; now, we detect GenuineIntel with HT enabled in general.
|
||||
// if (((cpuid1.eax & FAMILY_ID) == FAMILY_ID_EXTENDED) || (cpuid1.eax & EXT_FAMILY_ID))
|
||||
|
||||
// Check to see if this is an Intel Processor with HT or CMT capability , and if HT/CMT is enabled.
|
||||
// ddk: This codef is actually correct: see example code at http://software.intel.com/en-us/articles/multi-core-detect/
|
||||
return (cpuid1.edx & HT_BIT) != 0 && // Genuine Intel Processor with Hyper-Threading Technology implemented.
|
||||
((cpuid1.ebx >> 16) & 0xFF) > 1; // Hyper-Threading OR Core Multi-Processing has been enabled.
|
||||
}
|
||||
|
||||
// Returns the number of logical processors per physical processors.
|
||||
static uint8_t LogicalProcessorsPerPackage(void)
|
||||
{
|
||||
// EBX[23:16] indicate number of logical processors per package.
|
||||
const unsigned NUM_LOGICAL_BITS = 0x00FF0000;
|
||||
|
||||
if (!HTSupported())
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return (uint8_t)((cpuid(1).ebx & NUM_LOGICAL_BITS) >> 16);
|
||||
}
|
||||
|
||||
// Measure the processor clock speed by sampling the cycle count, waiting
|
||||
// for some fraction of a second, then measuring the elapsed number of cycles.
|
||||
static int64_t CalculateClockSpeed(void)
|
||||
{
|
||||
LARGE_INTEGER waitTime, startCount, curCount;
|
||||
CCycleCount start, end;
|
||||
|
||||
// Take 1/32 of a second for the measurement.
|
||||
QueryPerformanceFrequency(&waitTime);
|
||||
int scale = 5;
|
||||
waitTime.QuadPart >>= scale;
|
||||
|
||||
QueryPerformanceCounter(&startCount);
|
||||
start.Sample();
|
||||
do
|
||||
{
|
||||
QueryPerformanceCounter(&curCount);
|
||||
} while (curCount.QuadPart - startCount.QuadPart < waitTime.QuadPart);
|
||||
end.Sample();
|
||||
|
||||
return (end.GetLongCycles() - start.GetLongCycles()) << scale;
|
||||
}
|
||||
|
||||
static void FindIntelCacheDesc(uint8_t nDesc, const IntelCacheDesc_t* pDesc, int nDescCount, uint32_t& nCache, uint32_t& nCacheDesc)
|
||||
{
|
||||
for (int i = 0; i < nDescCount; ++i)
|
||||
{
|
||||
if (pDesc->nDesc == nDesc)
|
||||
{
|
||||
nCache = pDesc->nCacheSize;
|
||||
nCacheDesc = nDesc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// See "Output of the CPUID instruction" from Intel, page 26.
|
||||
static void InterpretIntelCacheDescriptors(uint32_t nPackedDesc)
|
||||
{
|
||||
if (nPackedDesc & 0x80000000)
|
||||
{
|
||||
return; // This is a wrong descriptor.
|
||||
}
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL1DataCacheDesc, ARRAYSIZE(s_IntelL1DataCacheDesc), s_cpuInformation.m_nL1CacheSizeKb, s_cpuInformation.m_nL1CacheDesc);
|
||||
FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL2DataCacheDesc, ARRAYSIZE(s_IntelL2DataCacheDesc), s_cpuInformation.m_nL2CacheSizeKb, s_cpuInformation.m_nL2CacheDesc);
|
||||
FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL3DataCacheDesc, ARRAYSIZE(s_IntelL3DataCacheDesc), s_cpuInformation.m_nL3CacheSizeKb, s_cpuInformation.m_nL3CacheDesc);
|
||||
nPackedDesc >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const CPUInformation& GetCPUInformation(void)
|
||||
{
|
||||
CPUInformation& pi = s_cpuInformation;
|
||||
// Has the structure already been initialized and filled out?
|
||||
if (pi.m_Size == sizeof(pi))
|
||||
{
|
||||
return pi;
|
||||
}
|
||||
|
||||
// Redundant, but just in case the user somehow messes with the size.
|
||||
memset(&pi, 0x0, sizeof(pi));
|
||||
|
||||
// Fill out the structure, and return it:
|
||||
pi.m_Size = sizeof(pi);
|
||||
|
||||
// Grab the processor frequency:
|
||||
pi.m_Speed = CalculateClockSpeed();
|
||||
|
||||
// Get the logical and physical processor counts:
|
||||
pi.m_nLogicalProcessors = LogicalProcessorsPerPackage();
|
||||
|
||||
bool bAuthenticAMD = (0 == _stricmp(GetProcessorVendorId(), "AuthenticAMD"));
|
||||
bool bGenuineIntel = !bAuthenticAMD && (0 == _stricmp(GetProcessorVendorId(), "GenuineIntel"));
|
||||
|
||||
SYSTEM_INFO si;
|
||||
ZeroMemory(&si, sizeof(si));
|
||||
|
||||
GetSystemInfo(&si);
|
||||
|
||||
// Fixing: si.dwNumberOfProcessors is the number of logical processors according to experiments on i7, P4 and a DirectX sample (Aug'09).
|
||||
// This is contrary to MSDN documentation on GetSystemInfo().
|
||||
pi.m_nLogicalProcessors = si.dwNumberOfProcessors;
|
||||
|
||||
if (bAuthenticAMD)
|
||||
{
|
||||
// Quick fix for AMD Phenom: it reports 3 logical cores and 4 physical cores;
|
||||
// No AMD CPUs by the end of 2009 have HT, so we'll override HT detection here.
|
||||
pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors;
|
||||
}
|
||||
else
|
||||
{
|
||||
CpuTopology topo;
|
||||
pi.m_nPhysicalProcessors = topo.NumberOfSystemCores();
|
||||
}
|
||||
|
||||
// Make sure I always report at least one, when running WinXP with the /ONECPU switch,
|
||||
// it likes to report 0 processors for some reason.
|
||||
if (pi.m_nPhysicalProcessors == 0 && pi.m_nLogicalProcessors == 0)
|
||||
{
|
||||
Assert(!"Missing CPU detection code for this processor.");
|
||||
pi.m_nPhysicalProcessors = 1;
|
||||
pi.m_nLogicalProcessors = 1;
|
||||
}
|
||||
|
||||
CpuIdResult_t cpuid0 = cpuid(0);
|
||||
if (cpuid0.eax >= 1)
|
||||
{
|
||||
CpuIdResult_t cpuid1 = cpuid(1);
|
||||
uint32_t bFPU = cpuid1.edx & 1; // This should always be on on anything we support.
|
||||
// Determine Processor Features:
|
||||
pi.m_bRDTSC = (cpuid1.edx >> 4) & 1;
|
||||
pi.m_bCMOV = (cpuid1.edx >> 15) & 1;
|
||||
pi.m_bFCMOV = (pi.m_bCMOV && bFPU) ? 1 : 0;
|
||||
pi.m_bMMX = (cpuid1.edx >> 23) & 1;
|
||||
pi.m_bSSE = (cpuid1.edx >> 25) & 1;
|
||||
pi.m_bSSE2 = (cpuid1.edx >> 26) & 1;
|
||||
pi.m_bSSE3 = cpuid1.ecx & 1;
|
||||
pi.m_bSSSE3 = (cpuid1.ecx >> 9) & 1;;
|
||||
pi.m_bSSE4a = CheckSSE4aTechnology();
|
||||
pi.m_bSSE41 = (cpuid1.ecx >> 19) & 1;
|
||||
pi.m_bSSE42 = (cpuid1.ecx >> 20) & 1;
|
||||
pi.m_b3DNow = Check3DNowTechnology();
|
||||
pi.m_bAVX = (cpuid1.ecx >> 28) & 1;
|
||||
pi.m_szProcessorID = (char*)GetProcessorVendorId();
|
||||
pi.m_szProcessorBrand = (char*)GetProcessorBrand();
|
||||
pi.m_bHT = (pi.m_nPhysicalProcessors < pi.m_nLogicalProcessors); //HTSupported();
|
||||
|
||||
pi.m_nModel = cpuid1.eax; // Full CPU model info.
|
||||
pi.m_nFeatures[0] = cpuid1.edx; // x87+ features.
|
||||
pi.m_nFeatures[1] = cpuid1.ecx; // sse3+ features.
|
||||
pi.m_nFeatures[2] = cpuid1.ebx; // Some additional features.
|
||||
|
||||
if (bGenuineIntel)
|
||||
{
|
||||
if (cpuid0.eax >= 4)
|
||||
{
|
||||
// We have CPUID.4, use it to find all the cache parameters.
|
||||
const uint32_t nCachesToQuery = 4; // Level 0 is not used.
|
||||
uint32_t nCacheSizeKiB[nCachesToQuery];
|
||||
for (uint32_t i = 0; i < nCachesToQuery; ++i)
|
||||
{
|
||||
nCacheSizeKiB[i] = 0;
|
||||
}
|
||||
for (unsigned long nSub = 0; nSub < 1024; ++nSub)
|
||||
{
|
||||
CpuIdResult_t cpuid4 = cpuidex(4, nSub);
|
||||
uint32_t nCacheType = cpuid4.eax & 0x1F;
|
||||
if (nCacheType == 0)
|
||||
{
|
||||
// No more caches.
|
||||
break;
|
||||
}
|
||||
if (nCacheType & 1)
|
||||
{
|
||||
// This cache includes data cache: it's either data or unified. Instuction cache type is 2.
|
||||
uint32_t nCacheLevel = (cpuid4.eax >> 5) & 7;
|
||||
if (nCacheLevel < nCachesToQuery)
|
||||
{
|
||||
uint32_t nCacheWays = 1 + ((cpuid4.ebx >> 22) & 0x3F);
|
||||
uint32_t nCachePartitions = 1 + ((cpuid4.ebx >> 12) & 0x3F);
|
||||
uint32_t nCacheLineSize = 1 + (cpuid4.ebx & 0xFF);
|
||||
uint32_t nCacheSets = 1 + cpuid4.ecx;
|
||||
uint32_t nCacheSizeBytes = nCacheWays * nCachePartitions * nCacheLineSize * nCacheSets;
|
||||
nCacheSizeKiB[nCacheLevel] = nCacheSizeBytes >> 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pi.m_nL1CacheSizeKb = nCacheSizeKiB[1];
|
||||
pi.m_nL2CacheSizeKb = nCacheSizeKiB[2];
|
||||
pi.m_nL3CacheSizeKb = nCacheSizeKiB[3];
|
||||
}
|
||||
else if (cpuid0.eax >= 2)
|
||||
{
|
||||
// Get the cache.
|
||||
CpuIdResult_t cpuid2 = cpuid(2);
|
||||
for (int i = (cpuid2.eax & 0xFF); i-- > 0; )
|
||||
{
|
||||
InterpretIntelCacheDescriptors(cpuid2.eax & ~0xFF);
|
||||
InterpretIntelCacheDescriptors(cpuid2.ebx);
|
||||
InterpretIntelCacheDescriptors(cpuid2.ecx);
|
||||
InterpretIntelCacheDescriptors(cpuid2.edx);
|
||||
cpuid2 = cpuid(2); // Read the next.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CpuIdResult_t cpuid0ex = cpuid(0x80000000);
|
||||
if (bAuthenticAMD)
|
||||
{
|
||||
if (cpuid0ex.eax >= 0x80000005)
|
||||
{
|
||||
CpuIdResult_t cpuid5ex = cpuid(0x80000005);
|
||||
pi.m_nL1CacheSizeKb = cpuid5ex.ecx >> 24;
|
||||
pi.m_nL1CacheDesc = cpuid5ex.ecx & 0xFFFFFF;
|
||||
}
|
||||
if (cpuid0ex.eax >= 0x80000006)
|
||||
{
|
||||
CpuIdResult_t cpuid6ex = cpuid(0x80000006);
|
||||
pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16;
|
||||
pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF;
|
||||
pi.m_nL3CacheSizeKb = (cpuid6ex.edx >> 18) * 512;
|
||||
pi.m_nL3CacheDesc = cpuid6ex.edx & 0xFFFF;
|
||||
}
|
||||
}
|
||||
else if (bGenuineIntel)
|
||||
{
|
||||
if (cpuid0ex.eax >= 0x80000006)
|
||||
{
|
||||
// Make sure we got the L2 cache info right.
|
||||
pi.m_nL2CacheSizeKb = (cpuid(0x80000006).ecx >> 16);
|
||||
}
|
||||
}
|
||||
return pi;
|
||||
}
|
21
r5dev/tier0/cpu.h
Normal file
21
r5dev/tier0/cpu.h
Normal file
@ -0,0 +1,21 @@
|
||||
//=============================================================================//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// $NoKeywords: $
|
||||
//=============================================================================//
|
||||
#ifndef CPU_H
|
||||
#define CPU_H
|
||||
|
||||
bool CheckSSE3Technology(void);
|
||||
bool CheckSSSE3Technology(void);
|
||||
bool CheckSSE41Technology(void);
|
||||
bool CheckSSE42Technology(void);
|
||||
bool CheckSSE4aTechnology(void);
|
||||
|
||||
const char* GetProcessorVendorId(void);
|
||||
const char* GetProcessorBrand(void);
|
||||
|
||||
const CPUInformation& GetCPUInformation(void);
|
||||
|
||||
#endif // CPU_H
|
997
r5dev/tier0/cputopology.cpp
Normal file
997
r5dev/tier0/cputopology.cpp
Normal file
@ -0,0 +1,997 @@
|
||||
//-------------------------------------------------------------------------------------
|
||||
// CpuTopology.cpp
|
||||
//
|
||||
// CpuToplogy class implementation.
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
//-------------------------------------------------------------------------------------
|
||||
#include "core/stdafx.h"
|
||||
|
||||
#if defined(_WIN32) && !defined(_X360) && !defined(_PS3)
|
||||
#include "tier0/cputopology.h"
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: ICpuToplogy
|
||||
// Desc: Specifies the interface that each class that provides an implementation
|
||||
// for extracting cpu topology must conform to. This is the Implementor
|
||||
// class in the traditional Bridge Pattern.
|
||||
//---------------------------------------------------------------------------------
|
||||
class ICpuTopology
|
||||
{
|
||||
public:
|
||||
virtual ~ICpuTopology()
|
||||
{
|
||||
}
|
||||
virtual BOOL IsDefaultImpl() const = 0;
|
||||
virtual DWORD NumberOfProcessCores() const = 0;
|
||||
virtual DWORD NumberOfSystemCores() const = 0;
|
||||
virtual DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const = 0;
|
||||
};
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
// Local Class Definitions
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: DefaultImpl
|
||||
// Desc: Provides a default implementation for the ICpuTopology interface when
|
||||
// GetLogicalProcessorInformation and CPUID are not supported for whatever
|
||||
// reason. This is a ConcreteImplementor class in the traditional Bridge
|
||||
// Pattern.
|
||||
//---------------------------------------------------------------------------------
|
||||
class DefaultImpl : public ICpuTopology
|
||||
{
|
||||
public:
|
||||
//-----------------------------------------------------------------------------
|
||||
// DefaultImpl::IsDefaultImpl
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ BOOL IsDefaultImpl() const
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// DefaultImpl::NumberOfProcessCores
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD NumberOfProcessCores() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// DefaultImpl::IsNumberOfSystemCores
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD NumberOfSystemCores() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// DefaultImpl::CoreAffinityMask
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const
|
||||
{
|
||||
DWORD_PTR coreAffinity = 0;
|
||||
if( 1 == coreIdx )
|
||||
{
|
||||
DWORD_PTR dwSystemAffinity;
|
||||
GetProcessAffinityMask( GetCurrentProcess(), &coreAffinity, &dwSystemAffinity );
|
||||
}
|
||||
return coreAffinity;
|
||||
}
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: GlpiImpl
|
||||
// Desc: Provides the GetLogicalProcessorInformation implementation for the
|
||||
// ICpuTopology interface. This is a ConcreteImplementor class in the
|
||||
// traditional Bridge Pattern.
|
||||
//---------------------------------------------------------------------------------
|
||||
class GlpiImpl : public ICpuTopology
|
||||
{
|
||||
public:
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::GlpiImpl
|
||||
// Desc: Initializes the internal structures/data with information retrieved
|
||||
// from a call to GetLogicalProcessorInformation.
|
||||
//-----------------------------------------------------------------------------
|
||||
GlpiImpl() : m_pSlpi( NULL ),
|
||||
m_nItems( 0 )
|
||||
{
|
||||
_ASSERT( IsSupported() );
|
||||
|
||||
GlpiFnPtr pGlpi = GetGlpiFn_();
|
||||
_ASSERT( pGlpi );
|
||||
|
||||
DWORD cbBuffer = 0;
|
||||
pGlpi( 0, &cbBuffer );
|
||||
|
||||
m_pSlpi = ( SYSTEM_LOGICAL_PROCESSOR_INFORMATION* )malloc( cbBuffer );
|
||||
pGlpi( m_pSlpi, &cbBuffer );
|
||||
m_nItems = cbBuffer / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::~GlpiImpl
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ ~GlpiImpl()
|
||||
{
|
||||
free( m_pSlpi );
|
||||
m_pSlpi = 0;
|
||||
m_nItems = 0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::IsDefaultImpl
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ BOOL IsDefaultImpl() const
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::NumberOfProcessCores
|
||||
// Desc: Gets the total number of physical processor cores available to the
|
||||
// current process.
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD NumberOfProcessCores() const
|
||||
{
|
||||
DWORD_PTR dwProcessAffinity, dwSystemAffinity;
|
||||
GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity );
|
||||
|
||||
DWORD nCores = 0;
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
if( ( RelationProcessorCore == m_pSlpi[i].Relationship ) &&
|
||||
( m_pSlpi[i].ProcessorMask & dwProcessAffinity ) )
|
||||
{
|
||||
++nCores;
|
||||
}
|
||||
}
|
||||
return nCores;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::NumberOfSystemCores
|
||||
// Desc: Gets the total number of physical processor cores enabled on the
|
||||
// system.
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD NumberOfSystemCores() const
|
||||
{
|
||||
DWORD nCores = 0;
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
if( RelationProcessorCore == m_pSlpi[i].Relationship )
|
||||
++nCores;
|
||||
}
|
||||
return nCores;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::CoreAffinityMask
|
||||
// Desc: Gets an affinity mask that corresponds to the requested processor
|
||||
// core.
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const
|
||||
{
|
||||
DWORD_PTR dwProcessAffinity, dwSystemAffinity;
|
||||
GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity );
|
||||
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
if( RelationProcessorCore == m_pSlpi[i].Relationship )
|
||||
{
|
||||
if( !coreIdx-- )
|
||||
{
|
||||
return m_pSlpi[i].ProcessorMask & dwProcessAffinity;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::IsSupported
|
||||
//-----------------------------------------------------------------------------
|
||||
static BOOL IsSupported()
|
||||
{
|
||||
return NULL != GetGlpiFn_();
|
||||
}
|
||||
|
||||
private:
|
||||
// GetLogicalProcessorInformation function pointer
|
||||
typedef BOOL( WINAPI* GlpiFnPtr )(
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION*,
|
||||
PDWORD
|
||||
);
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::VerifyGlpiFn_
|
||||
// Desc: Gets a pointer to the GetLogicalProcessorInformation function only if
|
||||
// it is supported on the current platform.
|
||||
// GetLogicalProcessorInformation is supported on Windows Server 2003 and
|
||||
// XP64, however there is a bug with the implementation. Therefore, only
|
||||
// GetLogicalProcessorInformation on Windows Vista is supported in this
|
||||
// sample.
|
||||
//-----------------------------------------------------------------------------
|
||||
static GlpiFnPtr VerifyGlpiFn_()
|
||||
{
|
||||
// VerifyVersionInfo function pointer
|
||||
typedef BOOL ( WINAPI* VviFnPtr )( LPOSVERSIONINFOEX,
|
||||
DWORD,
|
||||
DWORDLONG );
|
||||
|
||||
HMODULE hMod = GetModuleHandle( TEXT( "kernel32" ) );
|
||||
#ifdef _UNICODE
|
||||
VviFnPtr pVvi = (VviFnPtr) GetProcAddress( hMod, "VerifyVersionInfoW" );
|
||||
#else
|
||||
VviFnPtr pVvi = ( VviFnPtr )GetProcAddress( hMod, "VerifyVersionInfoA" );
|
||||
#endif
|
||||
GlpiFnPtr pGlpi = NULL;
|
||||
|
||||
if( pVvi )
|
||||
{
|
||||
// VerSetConditionMask function pointer
|
||||
typedef ULONGLONG ( WINAPI* VscmFnPtr )( ULONGLONG,
|
||||
DWORD,
|
||||
BYTE );
|
||||
|
||||
VscmFnPtr pVscm = ( VscmFnPtr )GetProcAddress( hMod, "VerSetConditionMask" );
|
||||
|
||||
_ASSERT( pVscm );
|
||||
|
||||
// Check for Windows Vista
|
||||
OSVERSIONINFOEX osvi = { sizeof( OSVERSIONINFOEX ) };
|
||||
osvi.dwMajorVersion = 6;
|
||||
osvi.dwMinorVersion = 0;
|
||||
osvi.wServicePackMajor = 0;
|
||||
osvi.wServicePackMinor = 0;
|
||||
|
||||
ULONGLONG dwlMask = 0;
|
||||
dwlMask = pVscm( dwlMask, VER_MAJORVERSION, VER_GREATER_EQUAL );
|
||||
dwlMask = pVscm( dwlMask, VER_MINORVERSION, VER_GREATER_EQUAL );
|
||||
dwlMask = pVscm( dwlMask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL );
|
||||
dwlMask = pVscm( dwlMask, VER_SERVICEPACKMINOR, VER_GREATER_EQUAL );
|
||||
|
||||
if( pVvi( &osvi, VER_MAJORVERSION
|
||||
| VER_MINORVERSION
|
||||
| VER_SERVICEPACKMAJOR
|
||||
| VER_SERVICEPACKMINOR,
|
||||
dwlMask ) )
|
||||
{
|
||||
pGlpi = ( GlpiFnPtr )GetProcAddress( hMod, "GetLogicalProcessorInformation" );
|
||||
_ASSERT( pGlpi );
|
||||
}
|
||||
}
|
||||
|
||||
return pGlpi;
|
||||
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: GlpiImpl::GetGlpiFn_
|
||||
// Desc: Gets a cached pointer to the GetLogicalProcessorInformation function.
|
||||
//-----------------------------------------------------------------------------
|
||||
static GlpiFnPtr GetGlpiFn_()
|
||||
{
|
||||
static GlpiFnPtr pGlpi = VerifyGlpiFn_();
|
||||
return pGlpi;
|
||||
}
|
||||
|
||||
// Private Members
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION* m_pSlpi;
|
||||
DWORD m_nItems;
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: ApicExtractor
|
||||
// Desc: A utility class that provides an interface for decoding a processor
|
||||
// APIC ID. An APIC ID is an 8-bit identifier given to each logical
|
||||
// processor on system boot and can be retrieved by the CPUID instruction.
|
||||
// Each APIC ID is composed of a PACKAGE_ID, CORE_ID and SMT_ID that describe
|
||||
// the relationship of a logical processor within the processor topology of
|
||||
// the system.
|
||||
//---------------------------------------------------------------------------------
|
||||
class ApicExtractor
|
||||
{
|
||||
public:
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::ApicExtractor
|
||||
//-----------------------------------------------------------------------------
|
||||
ApicExtractor( DWORD nLogProcsPerPkg = 1, DWORD nCoresPerPkg = 1 )
|
||||
{
|
||||
SetPackageTopology( nLogProcsPerPkg, nCoresPerPkg );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::SmtId
|
||||
//-----------------------------------------------------------------------------
|
||||
BYTE SmtId( BYTE apicId ) const
|
||||
{
|
||||
return apicId & m_smtIdMask.mask;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::CoreId
|
||||
//-----------------------------------------------------------------------------
|
||||
BYTE CoreId( BYTE apicId ) const
|
||||
{
|
||||
return ( apicId & m_coreIdMask.mask ) >> m_smtIdMask.width;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::PackageId
|
||||
//-----------------------------------------------------------------------------
|
||||
BYTE PackageId( BYTE apicId ) const
|
||||
{
|
||||
return ( apicId & m_pkgIdMask.mask ) >>
|
||||
( m_smtIdMask.width + m_coreIdMask.width );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::PackageCoreId
|
||||
//-----------------------------------------------------------------------------
|
||||
BYTE PackageCoreId( BYTE apicId ) const
|
||||
{
|
||||
return ( apicId & ( m_pkgIdMask.mask | m_coreIdMask.mask ) ) >>
|
||||
m_smtIdMask.width;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::GetLogProcsPerPkg
|
||||
//-----------------------------------------------------------------------------
|
||||
DWORD GetLogProcsPerPkg() const
|
||||
{
|
||||
return m_nLogProcsPerPkg;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::GetCoresPerPkg
|
||||
//-----------------------------------------------------------------------------
|
||||
DWORD GetCoresPerPkg() const
|
||||
{
|
||||
return m_nCoresPerPkg;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::SetPackageTopology
|
||||
// Desc: You should call SetPackageTopology with the number of logical
|
||||
// processors per package and number of cores per package before calling
|
||||
// the sub id accessors (SmtId(), CoreId(), PackageId(), PackageCoreId())
|
||||
// as this information is required to effectively decode an APIC ID into
|
||||
// its sub parts.
|
||||
//-----------------------------------------------------------------------------
|
||||
void SetPackageTopology( DWORD nLogProcsPerPkg, DWORD nCoresPerPkg )
|
||||
{
|
||||
m_nLogProcsPerPkg = ( BYTE )nLogProcsPerPkg;
|
||||
m_nCoresPerPkg = ( BYTE )nCoresPerPkg;
|
||||
|
||||
// fix for Phenom x3 and similar CPUs - it reports 3 logical processors per package, and 4 cores per package
|
||||
// so one core is probably just disabled for yield, but it causes a bug in GetMaskWidth that propagates
|
||||
if( m_nCoresPerPkg > m_nLogProcsPerPkg )
|
||||
{
|
||||
m_nCoresPerPkg = m_nLogProcsPerPkg;
|
||||
}
|
||||
|
||||
m_smtIdMask.width = GetMaskWidth_( m_nLogProcsPerPkg / m_nCoresPerPkg );
|
||||
m_coreIdMask.width = GetMaskWidth_( m_nCoresPerPkg );
|
||||
m_pkgIdMask.width = 8 - ( m_smtIdMask.width + m_coreIdMask.width );
|
||||
|
||||
m_pkgIdMask.mask = ( BYTE )( 0xFF << ( m_smtIdMask.width + m_coreIdMask.width ) );
|
||||
m_coreIdMask.mask = ( BYTE )( ( 0xFF << m_smtIdMask.width ) ^ m_pkgIdMask.mask );
|
||||
m_smtIdMask.mask = ( BYTE )~( 0xFF << m_smtIdMask.width );
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ApicExtractor::GetMaskWidth_
|
||||
// Desc: Gets the width of a sub id bit field in an APIC ID. The width of a
|
||||
// sub id (CORE_ID, SMT_ID) is only wide enough to support the maximum
|
||||
// number of ids that needs to be represented in the topology.
|
||||
//-----------------------------------------------------------------------------
|
||||
static BYTE GetMaskWidth_( BYTE maxIds )
|
||||
{
|
||||
--maxIds;
|
||||
|
||||
// find index of msb
|
||||
BYTE msbIdx = 8;
|
||||
BYTE msbMask = 0x80;
|
||||
while( msbMask && !( msbMask & maxIds ) )
|
||||
{
|
||||
--msbIdx;
|
||||
msbMask >>= 1;
|
||||
}
|
||||
return msbIdx;
|
||||
}
|
||||
|
||||
struct IdMask
|
||||
{
|
||||
BYTE width;
|
||||
BYTE mask;
|
||||
};
|
||||
|
||||
// Private Members
|
||||
BYTE m_nLogProcsPerPkg;
|
||||
BYTE m_nCoresPerPkg;
|
||||
IdMask m_smtIdMask;
|
||||
IdMask m_coreIdMask;
|
||||
IdMask m_pkgIdMask;
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: Cpuid
|
||||
// Desc: A utility class that wraps the functionality of the CPUID instruction.
|
||||
// Call the Call() method with the desired CPUID function, and use the
|
||||
// register accessors to retrieve the register values.
|
||||
//---------------------------------------------------------------------------------
|
||||
class Cpuid
|
||||
{
|
||||
public:
|
||||
// FnSet values are used to indicate a CPUID function set.
|
||||
enum FnSet
|
||||
{
|
||||
Std = 0x00000000,
|
||||
Ext = 0x80000000
|
||||
};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cpuid::Cpuid
|
||||
//-----------------------------------------------------------------------------
|
||||
Cpuid() : m_eax( 0 ),
|
||||
m_ebx( 0 ),
|
||||
m_ecx( 0 ),
|
||||
m_edx( 0 )
|
||||
{
|
||||
}
|
||||
|
||||
// Register accessors
|
||||
DWORD Eax() const
|
||||
{
|
||||
return m_eax;
|
||||
}
|
||||
DWORD Ebx() const
|
||||
{
|
||||
return m_ebx;
|
||||
}
|
||||
DWORD Ecx() const
|
||||
{
|
||||
return m_ecx;
|
||||
}
|
||||
DWORD Edx() const
|
||||
{
|
||||
return m_edx;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cpuid::Call
|
||||
// Desc: Calls the CPUID instruction with the specified function. Returns TRUE
|
||||
// if the CPUID function was supported, FALSE if it wasn't.
|
||||
//-----------------------------------------------------------------------------
|
||||
BOOL Call( FnSet fnSet, DWORD fn )
|
||||
{
|
||||
if( IsFnSupported( fnSet, fn ) )
|
||||
{
|
||||
UncheckedCall_( fnSet, fn );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cpuid::IsVendor
|
||||
// Desc: Compares a string with the vendor string encoded in the CPUID
|
||||
// instruction.
|
||||
//-----------------------------------------------------------------------------
|
||||
static BOOL IsVendor( const char* strVendor )
|
||||
{
|
||||
// Cache the vendor string
|
||||
static const Cpuid cpu( Std );
|
||||
return cpu.Ebx() == *reinterpret_cast<const DWORD*>( strVendor )
|
||||
&& cpu.Ecx() == *reinterpret_cast<const DWORD*>( strVendor + 8 )
|
||||
&& cpu.Edx() == *reinterpret_cast<const DWORD*>( strVendor + 4 );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cpuid::IsFnSupported
|
||||
// Desc: Checks to see if a CPUID function is supported. Different processors
|
||||
// support different functions. This method is automatically called from
|
||||
// the Call() method, so you don't need to call it beforehand.
|
||||
//-----------------------------------------------------------------------------
|
||||
static BOOL IsFnSupported( FnSet fnSet, DWORD fn )
|
||||
{
|
||||
// Cache the maximum supported standard function
|
||||
static const DWORD MaxStdFn = Cpuid( Std ).Eax();
|
||||
// Cache the maximum supported extended function
|
||||
static const DWORD MaxExtFn = Cpuid( Ext ).Eax();
|
||||
|
||||
bool ret = false;
|
||||
switch( fnSet )
|
||||
{
|
||||
case Std:
|
||||
ret = ( fn <= MaxStdFn );
|
||||
break;
|
||||
case Ext:
|
||||
ret = ( fn <= MaxExtFn );
|
||||
break;
|
||||
default:
|
||||
_ASSERT( 0 ); // should never get here
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cpuid::Cpuid
|
||||
// Desc: This constructor is private and is only used to set a Cpuid object to
|
||||
// initial values retrieved from CPUID functions 0x00000000 and
|
||||
// 0x80000000. Good for caching values from the CPUID instruction that
|
||||
// are not variable, like the encoded vendor string and the maximum
|
||||
// supported CPUID function values.
|
||||
//-----------------------------------------------------------------------------
|
||||
explicit Cpuid( FnSet fnSet )
|
||||
{
|
||||
UncheckedCall_( fnSet, 0 );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cpuid::UncheckedCall_
|
||||
// Desc: Calls the CPUID instruction without checking for CPUID function
|
||||
// support.
|
||||
//-----------------------------------------------------------------------------
|
||||
void UncheckedCall_( FnSet fnSet, DWORD fn )
|
||||
{
|
||||
#ifdef _WIN64
|
||||
int out[4];
|
||||
__cpuidex( out, fnSet | fn, 0 );
|
||||
m_eax = out[0];
|
||||
m_ebx = out[1];
|
||||
m_ecx = out[2];
|
||||
m_edx = out[3];
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov ecx, 0
|
||||
mov eax, fn
|
||||
or eax, fnSet
|
||||
cpuid
|
||||
mov edi, this
|
||||
mov [edi].m_eax, eax
|
||||
mov [edi].m_ebx, ebx
|
||||
mov [edi].m_ecx, ecx
|
||||
mov [edi].m_edx, edx
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Private Members
|
||||
DWORD m_eax;
|
||||
DWORD m_ebx;
|
||||
DWORD m_ecx;
|
||||
DWORD m_edx;
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: CpuidImpl
|
||||
// Desc: Provides the CPUID instruction implementation for the ICpuTopology
|
||||
// interface. This is a ConcreteImplementor class in the traditional Bridge
|
||||
// Pattern.
|
||||
//---------------------------------------------------------------------------------
|
||||
class CpuidImpl : public ICpuTopology
|
||||
{
|
||||
public:
|
||||
// CpuidFnMasks are used when extracting bit-encoded information retrieved from
|
||||
// the CPUID instruction
|
||||
enum CpuidFnMasks
|
||||
{
|
||||
HTT = 0x10000000, // Fn0000_0001 EDX[28]
|
||||
LogicalProcessorCount = 0x00FF0000, // Fn0000_0001 EBX[23:16]
|
||||
ApicId = 0xFF000000, // Fn0000_0001 EBX[31:24]
|
||||
NC_Intel = 0xFC000000, // Fn0000_0004 EAX[31:26]
|
||||
NC_Amd = 0x000000FF, // Fn8000_0008 ECX[7:0]
|
||||
CmpLegacy_Amd = 0x00000002, // Fn8000_0001 ECX[1]
|
||||
ApicIdCoreIdSize_Amd = 0x0000F000 // Fn8000_0008 ECX[15:12]
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
MaxLogicalProcessors = sizeof( DWORD_PTR ) * 8
|
||||
};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::CpuidImpl
|
||||
// Desc: Initializes internal structures/data with information retrieved from
|
||||
// calling the CPUID instruction.
|
||||
//-----------------------------------------------------------------------------
|
||||
CpuidImpl() : m_nItems( 0 )
|
||||
{
|
||||
_ASSERT( IsSupported() );
|
||||
|
||||
DWORD nLogProcsPerPkg = 1;
|
||||
DWORD nCoresPerPkg = 1;
|
||||
|
||||
Cpuid cpu;
|
||||
|
||||
// Determine if hardware threading is enabled.
|
||||
cpu.Call( Cpuid::Std, 1 );
|
||||
if( cpu.Edx() & HTT )
|
||||
{
|
||||
// Determine the total number of logical processors per package.
|
||||
nLogProcsPerPkg = ( cpu.Ebx() & LogicalProcessorCount ) >> 16;
|
||||
|
||||
// Determine the total number of cores per package. This info
|
||||
// is extracted differently dependending on the cpu vendor.
|
||||
if( Cpuid::IsVendor( GenuineIntel ) )
|
||||
{
|
||||
if( cpu.Call( Cpuid::Std, 4 ) )
|
||||
{
|
||||
nCoresPerPkg = ( ( cpu.Eax() & NC_Intel ) >> 26 ) + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_ASSERT( Cpuid::IsVendor( AuthenticAMD ) );
|
||||
if( cpu.Call( Cpuid::Ext, 8 ) )
|
||||
{
|
||||
// AMD reports the msb width of the CORE_ID bit field of the APIC ID
|
||||
// in ApicIdCoreIdSize_Amd. The maximum value represented by the msb
|
||||
// width is the theoretical number of cores the processor can support
|
||||
// and not the actual number of current cores, which is how the msb width
|
||||
// of the CORE_ID bit field has been traditionally determined. If the
|
||||
// ApicIdCoreIdSize_Amd value is zero, then you use the traditional method
|
||||
// to determine the CORE_ID msb width.
|
||||
DWORD msbWidth = cpu.Ecx() & ApicIdCoreIdSize_Amd;
|
||||
if( msbWidth )
|
||||
{
|
||||
// Set nCoresPerPkg to the maximum theortical number of cores
|
||||
// the processor package can support (2 ^ width) so the APIC
|
||||
// extractor object can be configured to extract the proper
|
||||
// values from an APIC.
|
||||
nCoresPerPkg = 1 << ( msbWidth >> 12 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Set nCoresPerPkg to the actual number of cores being reported
|
||||
// by the CPUID instruction.
|
||||
nCoresPerPkg = ( cpu.Ecx() & NC_Amd ) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Configure the APIC extractor object with the information it needs to
|
||||
// be able to decode the APIC.
|
||||
m_apicExtractor.SetPackageTopology( nLogProcsPerPkg, nCoresPerPkg );
|
||||
|
||||
DWORD_PTR dwProcessAffinity, dwSystemAffinity;
|
||||
HANDLE hProcess = GetCurrentProcess();
|
||||
HANDLE hThread = GetCurrentThread();
|
||||
GetProcessAffinityMask( hProcess, &dwProcessAffinity, &dwSystemAffinity );
|
||||
if( 1 == dwSystemAffinity )
|
||||
{
|
||||
// Since we only have 1 logical processor present on the system, we
|
||||
// can explicitly set a single APIC ID to zero.
|
||||
_ASSERT( 1 == nLogProcsPerPkg );
|
||||
m_apicIds[m_nItems++] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Set the process affinity to the system affinity if they are not
|
||||
// equal so that all logical processors can be accounted for.
|
||||
if( dwProcessAffinity != dwSystemAffinity )
|
||||
{
|
||||
SetProcessAffinityMask( hProcess, dwSystemAffinity );
|
||||
}
|
||||
|
||||
// Call cpuid on each active logical processor in the system affinity.
|
||||
DWORD_PTR dwPrevThreadAffinity = 0;
|
||||
for( DWORD_PTR dwThreadAffinity = 1;
|
||||
dwThreadAffinity && dwThreadAffinity <= dwSystemAffinity;
|
||||
dwThreadAffinity <<= 1 )
|
||||
{
|
||||
if( dwSystemAffinity & dwThreadAffinity )
|
||||
{
|
||||
if( 0 == dwPrevThreadAffinity )
|
||||
{
|
||||
// Save the previous thread affinity so we can return
|
||||
// the executing thread affinity back to this state.
|
||||
_ASSERT( 0 == m_nItems );
|
||||
dwPrevThreadAffinity = SetThreadAffinityMask( hThread,
|
||||
dwThreadAffinity );
|
||||
}
|
||||
else
|
||||
{
|
||||
_ASSERT( m_nItems > 0 );
|
||||
SetThreadAffinityMask( hThread, dwThreadAffinity );
|
||||
}
|
||||
|
||||
// Allow the thread to switch to masked logical processor.
|
||||
Sleep( 0 );
|
||||
|
||||
// Store the APIC ID
|
||||
cpu.Call( Cpuid::Std, 1 );
|
||||
m_apicIds[m_nItems++] = ( BYTE )( ( cpu.Ebx() & ApicId ) >> 24 );
|
||||
}
|
||||
}
|
||||
|
||||
// Restore the previous process and thread affinity state.
|
||||
SetProcessAffinityMask( hProcess, dwProcessAffinity );
|
||||
SetThreadAffinityMask( hThread, dwPrevThreadAffinity );
|
||||
Sleep( 0 );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::IsDefaultImpl
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ BOOL IsDefaultImpl() const
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::NumberOfProcessCores
|
||||
// Desc: Gets the number of processor cores available to the current process.
|
||||
// The total accounts for cores that may have been masked out by process
|
||||
// affinity.
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD NumberOfProcessCores() const
|
||||
{
|
||||
DWORD_PTR dwProcessAffinity, dwSystemAffinity;
|
||||
GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity );
|
||||
|
||||
BYTE pkgCoreIds[MaxLogicalProcessors] = { 0 };
|
||||
DWORD nPkgCoreIds = 0;
|
||||
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
if( dwProcessAffinity & ( ( DWORD_PTR )1 << i ) )
|
||||
{
|
||||
AddUniquePkgCoreId_( i, pkgCoreIds, nPkgCoreIds );
|
||||
}
|
||||
}
|
||||
return nPkgCoreIds;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::NumberOfSystemCores
|
||||
// Desc: Gets the number of processor cores on the system.
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD NumberOfSystemCores() const
|
||||
{
|
||||
BYTE pkgCoreIds[MaxLogicalProcessors] = { 0 };
|
||||
DWORD nPkgCoreIds = 0;
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
AddUniquePkgCoreId_( i, pkgCoreIds, nPkgCoreIds );
|
||||
}
|
||||
return nPkgCoreIds;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::CoreAffinityMask
|
||||
// Desc: Gets an affinity mask that corresponds to a specific processor core.
|
||||
// coreIdx must be less than the total number of processor cores
|
||||
// recognized by the operating system (NumberOfSystemCores()).
|
||||
//-----------------------------------------------------------------------------
|
||||
/*virtual*/ DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const
|
||||
{
|
||||
BYTE pkgCoreIds[MaxLogicalProcessors] = { 0 };
|
||||
DWORD nPkgCoreIds = 0;
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
AddUniquePkgCoreId_( i, pkgCoreIds, nPkgCoreIds );
|
||||
}
|
||||
|
||||
DWORD_PTR dwProcessAffinity, dwSystemAffinity;
|
||||
GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity );
|
||||
|
||||
DWORD_PTR coreAffinity = 0;
|
||||
if( coreIdx < nPkgCoreIds )
|
||||
{
|
||||
for( DWORD i = 0; i < m_nItems; ++i )
|
||||
{
|
||||
if( m_apicExtractor.PackageCoreId( m_apicIds[i] ) == pkgCoreIds[coreIdx] )
|
||||
{
|
||||
coreAffinity |= ( dwProcessAffinity & ( ( DWORD_PTR )1 << i ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
return coreAffinity;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::IsSupported
|
||||
// Desc: Indicates if a CpuidImpl object is supported on this platform.
|
||||
// Support is only granted on Intel and AMD platforms where the current
|
||||
// calling process has security rights to query process affinity and
|
||||
// change it if the process and system affinity differ. CpuidImpl is
|
||||
// also not supported if thread affinity cannot be set on systems with
|
||||
// more than 1 logical processor.
|
||||
//-----------------------------------------------------------------------------
|
||||
static BOOL IsSupported()
|
||||
{
|
||||
BOOL bSupported = Cpuid::IsVendor( GenuineIntel )
|
||||
|| Cpuid::IsVendor( AuthenticAMD );
|
||||
|
||||
if( bSupported )
|
||||
{
|
||||
DWORD_PTR dwProcessAffinity, dwSystemAffinity;
|
||||
HANDLE hProcess = GetCurrentProcess();
|
||||
|
||||
// Query process affinity mask
|
||||
bSupported = GetProcessAffinityMask( hProcess, &dwProcessAffinity, &dwSystemAffinity );
|
||||
if( bSupported )
|
||||
{
|
||||
if( dwProcessAffinity != dwSystemAffinity )
|
||||
{
|
||||
// The process and system affinities differ. Attempt to set
|
||||
// the process affinity to the system affinity.
|
||||
bSupported = SetProcessAffinityMask( hProcess, dwSystemAffinity );
|
||||
if( bSupported )
|
||||
{
|
||||
// Restore previous process affinity
|
||||
bSupported = SetProcessAffinityMask( hProcess, dwProcessAffinity );
|
||||
}
|
||||
}
|
||||
|
||||
if( bSupported && ( dwSystemAffinity > 1 ) )
|
||||
{
|
||||
// Attempt to set the thread affinity
|
||||
HANDLE hThread = GetCurrentThread();
|
||||
DWORD_PTR dwThreadAffinity = SetThreadAffinityMask( hThread, dwProcessAffinity );
|
||||
if( dwThreadAffinity )
|
||||
{
|
||||
// Restore the previous thread affinity
|
||||
bSupported = 0 != SetThreadAffinityMask( hThread, dwThreadAffinity );
|
||||
}
|
||||
else
|
||||
{
|
||||
bSupported = FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return bSupported;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: CpuidImpl::AddUniquePkgCoreId_
|
||||
// Desc: Adds the package/core id extracted from the APIC ID at m_apicIds[idx]
|
||||
// in the if the package/core id is unique to the pkgCoreIds array.
|
||||
// nPkgCore is an in/out parm that will reflect the total number of items
|
||||
// in pkgCoreIds array. It will be incrememted if a unique package/core
|
||||
// id is found and added.
|
||||
//-----------------------------------------------------------------------------
|
||||
void AddUniquePkgCoreId_( DWORD idx, BYTE* pkgCoreIds, DWORD& nPkgCoreIds ) const
|
||||
{
|
||||
_ASSERT( idx < m_nItems );
|
||||
_ASSERT( NULL != pkgCoreIds );
|
||||
|
||||
DWORD j;
|
||||
for( j = 0; j < nPkgCoreIds; ++j )
|
||||
{
|
||||
if( pkgCoreIds[j] == m_apicExtractor.PackageCoreId( m_apicIds[idx] ) )
|
||||
break;
|
||||
}
|
||||
if( j == nPkgCoreIds )
|
||||
{
|
||||
pkgCoreIds[j] = m_apicExtractor.PackageCoreId( m_apicIds[idx] );
|
||||
++nPkgCoreIds;
|
||||
}
|
||||
}
|
||||
|
||||
// Private Members
|
||||
BYTE m_apicIds[MaxLogicalProcessors];
|
||||
BYTE m_nItems;
|
||||
ApicExtractor m_apicExtractor;
|
||||
|
||||
// Supported Vendor Strings
|
||||
static const char GenuineIntel[];
|
||||
static const char AuthenticAMD[];
|
||||
};
|
||||
|
||||
// Static initialization of vendor strings
|
||||
const char CpuidImpl::GenuineIntel[] = "GenuineIntel";
|
||||
const char CpuidImpl::AuthenticAMD[] = "AuthenticAMD";
|
||||
|
||||
} // unnamed-namespace
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::CpuTopology
|
||||
// Desc: Initializes this object with the appropriately supported cpu topology
|
||||
// implementation object.
|
||||
//-------------------------------------------------------------------------------------
|
||||
CpuTopology::CpuTopology( BOOL bForceCpuid ) : m_pImpl( NULL )
|
||||
{
|
||||
ForceCpuid( bForceCpuid );
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::~CpuTopology
|
||||
//-------------------------------------------------------------------------------------
|
||||
CpuTopology::~CpuTopology()
|
||||
{
|
||||
Destroy_();
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::NumberOfProcessCores
|
||||
// Desc: Gets the total number of physical processor cores available to the current
|
||||
// process.
|
||||
//-------------------------------------------------------------------------------------
|
||||
DWORD CpuTopology::NumberOfProcessCores() const
|
||||
{
|
||||
return m_pImpl->NumberOfProcessCores();
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::NumberOfSystemCores
|
||||
// Desc: Gets the total number of physical processor cores enabled on the system.
|
||||
//-------------------------------------------------------------------------------------
|
||||
DWORD CpuTopology::NumberOfSystemCores() const
|
||||
{
|
||||
return m_pImpl->NumberOfSystemCores();
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::CoreAffinityMask
|
||||
// Desc: Gets an affinity mask that corresponds to the requested processor core.
|
||||
//-------------------------------------------------------------------------------------
|
||||
DWORD_PTR CpuTopology::CoreAffinityMask( DWORD coreIdx ) const
|
||||
{
|
||||
return m_pImpl->CoreAffinityMask( coreIdx );
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::IsDefaultImpl
|
||||
// Desc: Returns TRUE if m_pImpl is a DefaultImpl object, FALSE if not. Used to
|
||||
// indicate whether or not the prescribed methods (CPUID or
|
||||
// GetLogicalProcessorInformation) are supported on the system.
|
||||
//-------------------------------------------------------------------------------------
|
||||
BOOL CpuTopology::IsDefaultImpl() const
|
||||
{
|
||||
return m_pImpl->IsDefaultImpl();
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::ForceCpuid
|
||||
// Desc: Constructs a cpu topology object. If bForce is FALSE, then a GlpiImpl object
|
||||
// is first attempted, then CpuidImpl, then finally DefaultImpl. If bForce is
|
||||
// TRUE, then GlpiImpl is never attempted.
|
||||
//-------------------------------------------------------------------------------------
|
||||
void CpuTopology::ForceCpuid( BOOL bForce )
|
||||
{
|
||||
Destroy_();
|
||||
|
||||
if( !bForce && GlpiImpl::IsSupported() )
|
||||
{
|
||||
m_pImpl = new GlpiImpl();
|
||||
}
|
||||
else if( CpuidImpl::IsSupported() )
|
||||
{
|
||||
m_pImpl = new CpuidImpl();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pImpl = new DefaultImpl();
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Name: CpuTopology::Destroy_
|
||||
//-------------------------------------------------------------------------------------
|
||||
void CpuTopology::Destroy_()
|
||||
{
|
||||
delete m_pImpl;
|
||||
m_pImpl = NULL;
|
||||
}
|
||||
#endif
|
38
r5dev/tier0/cputopology.h
Normal file
38
r5dev/tier0/cputopology.h
Normal file
@ -0,0 +1,38 @@
|
||||
//-------------------------------------------------------------------------------------
|
||||
// CpuTopology.h
|
||||
//
|
||||
// CpuToplogy class declaration.
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
//-------------------------------------------------------------------------------------
|
||||
#pragma once
|
||||
#ifndef CPU_TOPOLOGY_H
|
||||
#define CPU_TOPOLOGY_H
|
||||
|
||||
class ICpuTopology;
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// Name: CpuToplogy
|
||||
// Desc: This class constructs a supported cpu topology implementation object on
|
||||
// initialization and forwards calls to it. This is the Abstraction class
|
||||
// in the traditional Bridge Pattern.
|
||||
//---------------------------------------------------------------------------------
|
||||
class CpuTopology
|
||||
{
|
||||
public:
|
||||
CpuTopology( BOOL bForceCpuid = FALSE );
|
||||
~CpuTopology();
|
||||
|
||||
BOOL IsDefaultImpl() const;
|
||||
DWORD NumberOfProcessCores() const;
|
||||
DWORD NumberOfSystemCores() const;
|
||||
DWORD_PTR CoreAffinityMask( DWORD coreIdx ) const;
|
||||
|
||||
void ForceCpuid( BOOL bForce );
|
||||
private:
|
||||
void Destroy_();
|
||||
|
||||
ICpuTopology* m_pImpl;
|
||||
};
|
||||
|
||||
#endif // CPU_TOPOLOGY_H
|
18
r5dev/tier0/fasttimer.cpp
Normal file
18
r5dev/tier0/fasttimer.cpp
Normal file
@ -0,0 +1,18 @@
|
||||
//=============================================================================//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// $NoKeywords: $
|
||||
//=============================================================================//
|
||||
|
||||
#include "core/stdafx.h"
|
||||
#include "tier0/fasttimer.h"
|
||||
|
||||
uint64 g_ClockSpeed; // Clocks/sec
|
||||
unsigned long g_dwClockSpeed;
|
||||
double g_ClockSpeedMicrosecondsMultiplier;
|
||||
double g_ClockSpeedMillisecondsMultiplier;
|
||||
double g_ClockSpeedSecondsMultiplier;
|
||||
|
||||
// Constructor init the clock speed.
|
||||
CClockSpeedInit g_ClockSpeedInit;
|
549
r5dev/tier0/fasttimer.h
Normal file
549
r5dev/tier0/fasttimer.h
Normal file
@ -0,0 +1,549 @@
|
||||
//===========================================================================//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// $NoKeywords: $
|
||||
//===========================================================================//
|
||||
|
||||
#ifndef FASTTIMER_H
|
||||
#define FASTTIMER_H
|
||||
|
||||
#include "tier0/platform.h"
|
||||
#include "tier0/cpu.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
extern uint64_t g_ClockSpeed;
|
||||
extern unsigned long g_dwClockSpeed;
|
||||
|
||||
extern double g_ClockSpeedMicrosecondsMultiplier;
|
||||
extern double g_ClockSpeedMillisecondsMultiplier;
|
||||
extern double g_ClockSpeedSecondsMultiplier;
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CCycleCount
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CCycleCount
|
||||
{
|
||||
friend class CFastTimer;
|
||||
|
||||
public:
|
||||
CCycleCount(void);
|
||||
CCycleCount(uint64_t cycles);
|
||||
|
||||
void Sample(void); // Sample the clock. This takes about 34 clocks to execute (or 26,000 calls per millisecond on a P900).
|
||||
void Init(void); // Set to zero.
|
||||
void Init(float initTimeMsec);
|
||||
void Init(double initTimeMsec) { Init((float)initTimeMsec); }
|
||||
void Init(uint64_t cycles);
|
||||
bool IsLessThan(CCycleCount const& other) const; // Compare two counts.
|
||||
|
||||
// Convert to other time representations. These functions are slow, so it's preferable to call them during display rather than inside a timing block.
|
||||
unsigned long GetCycles(void) const;
|
||||
uint64_t GetLongCycles(void) const;
|
||||
|
||||
unsigned long GetMicroseconds(void) const;
|
||||
uint64_t GetUlMicroseconds(void) const;
|
||||
double GetMicrosecondsF(void) const;
|
||||
void SetMicroseconds(unsigned long nMicroseconds);
|
||||
|
||||
unsigned long GetMilliseconds(void) const;
|
||||
double GetMillisecondsF(void) const;
|
||||
double GetSeconds(void) const;
|
||||
|
||||
CCycleCount& operator+=(CCycleCount const& other);
|
||||
|
||||
// dest = rSrc1 + rSrc2
|
||||
static void Add(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest); // Add two samples together.
|
||||
// dest = rSrc1 - rSrc2
|
||||
static void Sub(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest); // Add two samples together.
|
||||
static uint64_t GetTimestamp(void);
|
||||
|
||||
private:
|
||||
uint64_t m_Int64{};
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CClockSpeedInit
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CClockSpeedInit
|
||||
{
|
||||
public:
|
||||
CClockSpeedInit(void)
|
||||
{
|
||||
Init();
|
||||
}
|
||||
|
||||
static void Init(void)
|
||||
{
|
||||
const CPUInformation& pi = GetCPUInformation();
|
||||
g_ClockSpeed = pi.m_Speed;
|
||||
g_dwClockSpeed = (unsigned long)g_ClockSpeed;
|
||||
|
||||
g_ClockSpeedMicrosecondsMultiplier = 1000000.0 / (double)g_ClockSpeed;
|
||||
g_ClockSpeedMillisecondsMultiplier = 1000.0 / (double)g_ClockSpeed;
|
||||
g_ClockSpeedSecondsMultiplier = 1.0f / (double)g_ClockSpeed;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CFastTimer
|
||||
// These functions are fast to call and should be called from your sampling code.
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CFastTimer
|
||||
{
|
||||
public:
|
||||
void Start(void);
|
||||
void End(void);
|
||||
|
||||
const CCycleCount& GetDuration(void) const; // Get the elapsed time between Start and End calls.
|
||||
CCycleCount GetDurationInProgress(void) const; // Call without ending. Not that cheap.
|
||||
|
||||
// Return number of cycles per second on this processor.
|
||||
static inline unsigned long GetClockSpeed(void);
|
||||
|
||||
private:
|
||||
CCycleCount m_Duration;
|
||||
#ifdef DEBUG_FASTTIMER
|
||||
bool m_bRunning; // Are we currently running?
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CTimeScope
|
||||
// This is a helper class that times whatever block of code it's in.
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CTimeScope
|
||||
{
|
||||
public:
|
||||
CTimeScope(CFastTimer* pTimer);
|
||||
~CTimeScope(void);
|
||||
|
||||
private:
|
||||
CFastTimer* m_pTimer;
|
||||
};
|
||||
|
||||
inline CTimeScope::CTimeScope(CFastTimer* pTotal)
|
||||
{
|
||||
m_pTimer = pTotal;
|
||||
m_pTimer->Start();
|
||||
}
|
||||
|
||||
inline CTimeScope::~CTimeScope(void)
|
||||
{
|
||||
m_pTimer->End();
|
||||
}
|
||||
|
||||
// This is a helper class that times whatever block of code it's in and adds the total (int microseconds) to a global counter.
|
||||
class CTimeAdder
|
||||
{
|
||||
public:
|
||||
CTimeAdder(CCycleCount* pTotal);
|
||||
~CTimeAdder(void);
|
||||
|
||||
void End();
|
||||
|
||||
private:
|
||||
CCycleCount* m_pTotal;
|
||||
CFastTimer m_Timer;
|
||||
};
|
||||
|
||||
inline CTimeAdder::CTimeAdder(CCycleCount* pTotal)
|
||||
{
|
||||
m_pTotal = pTotal;
|
||||
m_Timer.Start();
|
||||
}
|
||||
|
||||
inline CTimeAdder::~CTimeAdder(void)
|
||||
{
|
||||
End();
|
||||
}
|
||||
|
||||
inline void CTimeAdder::End(void)
|
||||
{
|
||||
if (m_pTotal)
|
||||
{
|
||||
m_Timer.End();
|
||||
*m_pTotal += m_Timer.GetDuration();
|
||||
m_pTotal = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// Simple tool to support timing a block of code, and reporting the results on
|
||||
// program exit or at each iteration
|
||||
//
|
||||
// Macros used because dbg.h uses this header, thus Msg() is unavailable
|
||||
// -------------------------------------------------------------------------- //
|
||||
|
||||
#define PROFILE_SCOPE(name) \
|
||||
class C##name##ACC : public CAverageCycleCounter \
|
||||
{ \
|
||||
public: \
|
||||
~C##name##ACC() \
|
||||
{ \
|
||||
Msg("%-48s: %6.3f avg (%8.1f total, %7.3f peak, %5d iters)\n", \
|
||||
#name, \
|
||||
GetAverageMilliseconds(), \
|
||||
GetTotalMilliseconds(), \
|
||||
GetPeakMilliseconds(), \
|
||||
GetIters() ); \
|
||||
} \
|
||||
}; \
|
||||
static C##name##ACC name##_ACC; \
|
||||
CAverageTimeMarker name##_ATM( &name##_ACC )
|
||||
|
||||
#define TIME_SCOPE(name) \
|
||||
class CTimeScopeMsg_##name \
|
||||
{ \
|
||||
public: \
|
||||
CTimeScopeMsg_##name() { m_Timer.Start(); } \
|
||||
~CTimeScopeMsg_##name() \
|
||||
{ \
|
||||
m_Timer.End(); \
|
||||
Msg( #name "time: %.4fms\n", m_Timer.GetDuration().GetMillisecondsF() ); \
|
||||
} \
|
||||
private: \
|
||||
CFastTimer m_Timer; \
|
||||
} name##_TSM;
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CAverageCycleCounter
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CAverageCycleCounter
|
||||
{
|
||||
public:
|
||||
CAverageCycleCounter(void);
|
||||
|
||||
void Init(void);
|
||||
void MarkIter(const CCycleCount& duration);
|
||||
|
||||
unsigned GetIters(void) const;
|
||||
|
||||
double GetAverageMilliseconds(void) const;
|
||||
double GetTotalMilliseconds(void) const;
|
||||
double GetPeakMilliseconds(void) const;
|
||||
|
||||
private:
|
||||
unsigned m_nIters {};
|
||||
CCycleCount m_Total {};
|
||||
CCycleCount m_Peak {};
|
||||
bool m_fReport{};
|
||||
const char* m_pszName{};
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CAverageTimeMarker
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CAverageTimeMarker
|
||||
{
|
||||
public:
|
||||
CAverageTimeMarker(CAverageCycleCounter* pCounter);
|
||||
~CAverageTimeMarker(void);
|
||||
|
||||
private:
|
||||
CAverageCycleCounter* m_pCounter;
|
||||
CFastTimer m_Timer;
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CCycleCount inlines.
|
||||
// -------------------------------------------------------------------------- //
|
||||
inline CCycleCount::CCycleCount(void)
|
||||
{
|
||||
Init((uint64_t)0);
|
||||
}
|
||||
|
||||
inline CCycleCount::CCycleCount(uint64_t cycles)
|
||||
{
|
||||
Init(cycles);
|
||||
}
|
||||
|
||||
inline void CCycleCount::Init(void)
|
||||
{
|
||||
Init((uint64_t)0);
|
||||
}
|
||||
|
||||
inline void CCycleCount::Init(float initTimeMsec)
|
||||
{
|
||||
if (g_ClockSpeedMillisecondsMultiplier > 0)
|
||||
Init((uint64_t)(initTimeMsec / g_ClockSpeedMillisecondsMultiplier));
|
||||
else
|
||||
Init((uint64_t)0);
|
||||
}
|
||||
|
||||
inline void CCycleCount::Init(uint64_t cycles)
|
||||
{
|
||||
m_Int64 = cycles;
|
||||
}
|
||||
|
||||
inline void CCycleCount::Sample(void)
|
||||
{
|
||||
m_Int64 = Plat_Rdtsc();
|
||||
}
|
||||
|
||||
inline CCycleCount& CCycleCount::operator+=(CCycleCount const& other)
|
||||
{
|
||||
m_Int64 += other.m_Int64;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline void CCycleCount::Add(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest)
|
||||
{
|
||||
dest.m_Int64 = rSrc1.m_Int64 + rSrc2.m_Int64;
|
||||
}
|
||||
|
||||
inline void CCycleCount::Sub(CCycleCount const& rSrc1, CCycleCount const& rSrc2, CCycleCount& dest)
|
||||
{
|
||||
dest.m_Int64 = rSrc1.m_Int64 - rSrc2.m_Int64;
|
||||
}
|
||||
|
||||
inline uint64_t CCycleCount::GetTimestamp(void)
|
||||
{
|
||||
CCycleCount c;
|
||||
c.Sample();
|
||||
return c.GetLongCycles();
|
||||
}
|
||||
|
||||
inline bool CCycleCount::IsLessThan(CCycleCount const& other) const
|
||||
{
|
||||
return m_Int64 < other.m_Int64;
|
||||
}
|
||||
|
||||
inline unsigned long CCycleCount::GetCycles(void) const
|
||||
{
|
||||
return (unsigned long)m_Int64;
|
||||
}
|
||||
|
||||
inline uint64_t CCycleCount::GetLongCycles(void) const
|
||||
{
|
||||
return m_Int64;
|
||||
}
|
||||
|
||||
inline unsigned long CCycleCount::GetMicroseconds(void) const
|
||||
{
|
||||
return (unsigned long)((m_Int64 * 1000000) / g_ClockSpeed);
|
||||
}
|
||||
|
||||
inline uint64_t CCycleCount::GetUlMicroseconds(void) const
|
||||
{
|
||||
return ((m_Int64 * 1000000) / g_ClockSpeed);
|
||||
}
|
||||
|
||||
inline double CCycleCount::GetMicrosecondsF(void) const
|
||||
{
|
||||
return (double)(m_Int64 * g_ClockSpeedMicrosecondsMultiplier);
|
||||
}
|
||||
|
||||
inline void CCycleCount::SetMicroseconds(unsigned long nMicroseconds)
|
||||
{
|
||||
m_Int64 = ((uint64_t)nMicroseconds * g_ClockSpeed) / 1000000;
|
||||
}
|
||||
|
||||
inline unsigned long CCycleCount::GetMilliseconds(void) const
|
||||
{
|
||||
return (unsigned long)((m_Int64 * 1000) / g_ClockSpeed);
|
||||
}
|
||||
|
||||
inline double CCycleCount::GetMillisecondsF(void) const
|
||||
{
|
||||
return (double)(m_Int64 * g_ClockSpeedMillisecondsMultiplier);
|
||||
}
|
||||
|
||||
inline double CCycleCount::GetSeconds(void) const
|
||||
{
|
||||
return (double)(m_Int64 * g_ClockSpeedSecondsMultiplier);
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CFastTimer inlines.
|
||||
// -------------------------------------------------------------------------- //
|
||||
inline void CFastTimer::Start(void)
|
||||
{
|
||||
m_Duration.Sample();
|
||||
#ifdef DEBUG_FASTTIMER
|
||||
m_bRunning = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void CFastTimer::End(void)
|
||||
{
|
||||
CCycleCount cnt;
|
||||
cnt.Sample();
|
||||
|
||||
m_Duration.m_Int64 = cnt.m_Int64 - m_Duration.m_Int64;
|
||||
|
||||
#ifdef DEBUG_FASTTIMER
|
||||
m_bRunning = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline CCycleCount CFastTimer::GetDurationInProgress(void) const
|
||||
{
|
||||
CCycleCount cnt;
|
||||
cnt.Sample();
|
||||
|
||||
CCycleCount result;
|
||||
result.m_Int64 = cnt.m_Int64 - m_Duration.m_Int64;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline unsigned long CFastTimer::GetClockSpeed(void)
|
||||
{
|
||||
return g_dwClockSpeed;
|
||||
}
|
||||
|
||||
inline CCycleCount const& CFastTimer::GetDuration(void) const
|
||||
{
|
||||
#ifdef DEBUG_FASTTIMER
|
||||
assert(!m_bRunning);
|
||||
#endif
|
||||
return m_Duration;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CAverageCycleCounter inlines
|
||||
// -------------------------------------------------------------------------- //
|
||||
inline CAverageCycleCounter::CAverageCycleCounter(void)
|
||||
: m_nIters(0)
|
||||
{
|
||||
}
|
||||
|
||||
inline void CAverageCycleCounter::Init(void)
|
||||
{
|
||||
m_Total.Init();
|
||||
m_Peak.Init();
|
||||
m_nIters = 0;
|
||||
}
|
||||
|
||||
inline void CAverageCycleCounter::MarkIter(const CCycleCount& duration)
|
||||
{
|
||||
++m_nIters;
|
||||
m_Total += duration;
|
||||
if (m_Peak.IsLessThan(duration))
|
||||
m_Peak = duration;
|
||||
}
|
||||
|
||||
inline unsigned CAverageCycleCounter::GetIters(void) const
|
||||
{
|
||||
return m_nIters;
|
||||
}
|
||||
|
||||
inline double CAverageCycleCounter::GetAverageMilliseconds(void) const
|
||||
{
|
||||
if (m_nIters)
|
||||
return (m_Total.GetMillisecondsF() / (double)m_nIters);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline double CAverageCycleCounter::GetTotalMilliseconds(void) const
|
||||
{
|
||||
return m_Total.GetMillisecondsF();
|
||||
}
|
||||
|
||||
inline double CAverageCycleCounter::GetPeakMilliseconds(void) const
|
||||
{
|
||||
return m_Peak.GetMillisecondsF();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
|
||||
inline CAverageTimeMarker::CAverageTimeMarker(CAverageCycleCounter* pCounter)
|
||||
{
|
||||
m_pCounter = pCounter;
|
||||
m_Timer.Start();
|
||||
}
|
||||
|
||||
inline CAverageTimeMarker::~CAverageTimeMarker(void)
|
||||
{
|
||||
m_Timer.End();
|
||||
m_pCounter->MarkIter(m_Timer.GetDuration());
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// CLimitTimer
|
||||
// Use this to time whether a desired interval of time has passed. It's extremely fast
|
||||
// to check while running. NOTE: CMicroSecOverage() and CMicroSecLeft() are not as fast to check.
|
||||
// -------------------------------------------------------------------------- //
|
||||
class CLimitTimer
|
||||
{
|
||||
public:
|
||||
CLimitTimer(void) { }
|
||||
CLimitTimer(uint64_t cMicroSecDuration) { SetLimit(cMicroSecDuration); }
|
||||
void SetLimit(uint64_t m_cMicroSecDuration);
|
||||
bool BLimitReached(void) const;
|
||||
|
||||
int CMicroSecOverage(void) const;
|
||||
uint64_t CMicroSecLeft(void) const;
|
||||
|
||||
private:
|
||||
uint64_t m_lCycleLimit{};
|
||||
};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: Initializes the limit timer with a period of time to measure.
|
||||
// Input : cMicroSecDuration - How long a time period to measure
|
||||
//-----------------------------------------------------------------------------
|
||||
inline void CLimitTimer::SetLimit(uint64_t cMicroSecDuration)
|
||||
{
|
||||
uint64_t dlCycles = ((uint64_t)cMicroSecDuration * (uint64_t)g_dwClockSpeed) / (uint64_t)1000000L;
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample();
|
||||
m_lCycleLimit = cycleCount.GetLongCycles() + dlCycles;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: Determines whether our specified time period has passed
|
||||
// Output: true if at least the specified time period has passed
|
||||
//-----------------------------------------------------------------------------
|
||||
inline bool CLimitTimer::BLimitReached(void) const
|
||||
{
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample();
|
||||
return (cycleCount.GetLongCycles() >= m_lCycleLimit);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: If we're over our specified time period, return the amount of the overage.
|
||||
// Output: # of microseconds since we reached our specified time period.
|
||||
//-----------------------------------------------------------------------------
|
||||
inline int CLimitTimer::CMicroSecOverage(void) const
|
||||
{
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample();
|
||||
uint64_t lcCycles = cycleCount.GetLongCycles();
|
||||
|
||||
if (lcCycles < m_lCycleLimit)
|
||||
return 0;
|
||||
|
||||
return((int)((lcCycles - m_lCycleLimit) * (uint64_t)1000000L / g_dwClockSpeed));
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: If we're under our specified time period, return the amount under.
|
||||
// Output: # of microseconds until we reached our specified time period, 0 if we've passed it
|
||||
//-----------------------------------------------------------------------------
|
||||
inline uint64_t CLimitTimer::CMicroSecLeft(void) const
|
||||
{
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample();
|
||||
uint64_t lcCycles = cycleCount.GetLongCycles();
|
||||
|
||||
if (lcCycles >= m_lCycleLimit)
|
||||
return 0;
|
||||
|
||||
return((uint64_t)((m_lCycleLimit - lcCycles) * (uint64_t)1000000L / g_dwClockSpeed));
|
||||
}
|
||||
|
||||
#endif // FASTTIMER_H
|
76
r5dev/tier0/platform.h
Normal file
76
r5dev/tier0/platform.h
Normal file
@ -0,0 +1,76 @@
|
||||
#ifndef PLATFORM_H
|
||||
#define PLATFORM_H
|
||||
|
||||
#if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
|
||||
inline uint64_t Plat_Rdtsc()
|
||||
{
|
||||
#if defined( _X360 )
|
||||
return (uint64)__mftb32();
|
||||
#elif defined( _WIN64 )
|
||||
return (uint64_t)__rdtsc();
|
||||
#elif defined( _WIN32 )
|
||||
#if defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
|
||||
return (uint64)__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
__asm ret;
|
||||
#endif
|
||||
#elif defined( __i386__ )
|
||||
uint64 val;
|
||||
__asm__ __volatile__("rdtsc" : "=A" (val));
|
||||
return val;
|
||||
#elif defined( __x86_64__ )
|
||||
uint32 lo, hi;
|
||||
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
|
||||
return (((uint64)hi) << 32) | lo;
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
}
|
||||
|
||||
// Processor Information:
|
||||
struct CPUInformation
|
||||
{
|
||||
int m_Size; // Size of this structure, for forward compatability.
|
||||
|
||||
uint8_t m_nLogicalProcessors; // Number op logical processors.
|
||||
uint8_t m_nPhysicalProcessors; // Number of physical processors
|
||||
|
||||
bool m_bRDTSC : 1, // Is RDTSC supported?
|
||||
m_bCMOV : 1, // Is CMOV supported?
|
||||
m_bFCMOV : 1, // Is FCMOV supported?
|
||||
m_bSSE : 1, // Is SSE supported?
|
||||
m_bSSE2 : 1, // Is SSE2 Supported?
|
||||
m_b3DNow : 1, // Is 3DNow! Supported?
|
||||
m_bMMX : 1, // Is MMX supported?
|
||||
m_bHT : 1; // Is HyperThreading supported?
|
||||
|
||||
|
||||
bool m_bSSE3 : 1,
|
||||
m_bSSSE3 : 1,
|
||||
m_bSSE4a : 1,
|
||||
m_bSSE41 : 1,
|
||||
m_bSSE42 : 1,
|
||||
m_bAVX : 1; // Is AVX supported?
|
||||
|
||||
int64_t m_Speed; // In cycles per second.
|
||||
|
||||
char* m_szProcessorID; // Processor vendor Identification.
|
||||
char* m_szProcessorBrand; // Processor brand string, if available
|
||||
|
||||
uint32_t m_nModel;
|
||||
uint32_t m_nFeatures[3];
|
||||
uint32_t m_nL1CacheSizeKb;
|
||||
uint32_t m_nL1CacheDesc;
|
||||
uint32_t m_nL2CacheSizeKb;
|
||||
uint32_t m_nL2CacheDesc;
|
||||
uint32_t m_nL3CacheSizeKb;
|
||||
uint32_t m_nL3CacheDesc;
|
||||
|
||||
CPUInformation() : m_Size(0) {}
|
||||
};
|
||||
|
||||
#endif /* PLATFORM_H */
|
@ -327,9 +327,13 @@
|
||||
<ClInclude Include="..\tier0\commandline.h" />
|
||||
<ClInclude Include="..\tier0\cmd.h" />
|
||||
<ClInclude Include="..\tier0\completion.h" />
|
||||
<ClInclude Include="..\tier0\cpu.h" />
|
||||
<ClInclude Include="..\tier0\cputopology.h" />
|
||||
<ClInclude Include="..\tier0\cvar.h" />
|
||||
<ClInclude Include="..\tier0\fasttimer.h" />
|
||||
<ClInclude Include="..\tier0\IConVar.h" />
|
||||
<ClInclude Include="..\tier0\interface.h" />
|
||||
<ClInclude Include="..\tier0\platform.h" />
|
||||
<ClInclude Include="..\tier1\NetAdr2.h" />
|
||||
<ClInclude Include="..\tier2\socketcreator.h" />
|
||||
<ClInclude Include="..\vpc\basefilesystem.h" />
|
||||
@ -403,7 +407,10 @@
|
||||
<ClCompile Include="..\tier0\commandline.cpp" />
|
||||
<ClCompile Include="..\tier0\cmd.cpp" />
|
||||
<ClCompile Include="..\tier0\completion.cpp" />
|
||||
<ClCompile Include="..\tier0\cpu.cpp" />
|
||||
<ClCompile Include="..\tier0\cputopology.cpp" />
|
||||
<ClCompile Include="..\tier0\cvar.cpp" />
|
||||
<ClCompile Include="..\tier0\fasttimer.cpp" />
|
||||
<ClCompile Include="..\tier0\IConVar.cpp" />
|
||||
<ClCompile Include="..\tier1\NetAdr2.cpp" />
|
||||
<ClCompile Include="..\tier2\socketcreator.cpp" />
|
||||
|
@ -792,6 +792,18 @@
|
||||
<ClInclude Include="..\game\server\ai_utility.h">
|
||||
<Filter>sdk\game\server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\cpu.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\cputopology.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\platform.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\fasttimer.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\client\IVEngineClient.cpp">
|
||||
@ -986,6 +998,15 @@
|
||||
<ClCompile Include="..\game\server\ai_utility.cpp">
|
||||
<Filter>sdk\game\server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\tier0\cpu.cpp">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\tier0\cputopology.cpp">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\tier0\fasttimer.cpp">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\Dedicated.def" />
|
||||
|
@ -81,7 +81,10 @@
|
||||
<ClCompile Include="..\tier0\commandline.cpp" />
|
||||
<ClCompile Include="..\tier0\cmd.cpp" />
|
||||
<ClCompile Include="..\tier0\completion.cpp" />
|
||||
<ClCompile Include="..\tier0\cpu.cpp" />
|
||||
<ClCompile Include="..\tier0\cputopology.cpp" />
|
||||
<ClCompile Include="..\tier0\cvar.cpp" />
|
||||
<ClCompile Include="..\tier0\fasttimer.cpp" />
|
||||
<ClCompile Include="..\tier0\IConVar.cpp" />
|
||||
<ClCompile Include="..\tier1\NetAdr2.cpp" />
|
||||
<ClCompile Include="..\tier2\socketcreator.cpp" />
|
||||
@ -329,9 +332,13 @@
|
||||
<ClInclude Include="..\tier0\commandline.h" />
|
||||
<ClInclude Include="..\tier0\cmd.h" />
|
||||
<ClInclude Include="..\tier0\completion.h" />
|
||||
<ClInclude Include="..\tier0\cpu.h" />
|
||||
<ClInclude Include="..\tier0\cputopology.h" />
|
||||
<ClInclude Include="..\tier0\cvar.h" />
|
||||
<ClInclude Include="..\tier0\fasttimer.h" />
|
||||
<ClInclude Include="..\tier0\IConVar.h" />
|
||||
<ClInclude Include="..\tier0\interface.h" />
|
||||
<ClInclude Include="..\tier0\platform.h" />
|
||||
<ClInclude Include="..\tier1\NetAdr2.h" />
|
||||
<ClInclude Include="..\tier2\socketcreator.h" />
|
||||
<ClInclude Include="..\vguimatsurface\MatSystemSurface.h" />
|
||||
|
@ -399,6 +399,15 @@
|
||||
<ClCompile Include="..\materialsystem\cmaterialglue.cpp">
|
||||
<Filter>sdk\materialsystem</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\tier0\cpu.cpp">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\tier0\cputopology.cpp">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\tier0\fasttimer.cpp">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\client\cdll_engine_int.h">
|
||||
@ -1145,6 +1154,18 @@
|
||||
<ClInclude Include="..\materialsystem\cmaterialglue.h">
|
||||
<Filter>sdk\materialsystem</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\fasttimer.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\platform.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\cputopology.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\tier0\cpu.h">
|
||||
<Filter>sdk\tier0</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Image Include="..\shared\resource\lockedserver.png">
|
||||
|
Loading…
x
Reference in New Issue
Block a user