r5sdk/r5dev/tier0/cpu.cpp
Kawe Mazidjatari 7c9a50f0d5 Add proper license to project
The project was never licensed, it only contained third party licenses.
I determined to use the Source SDK 2013 license for this, as the majority of the business logic running this product is based on Valve's (the license has zero restrictions in the scope of our goal with this project).
The licenses has to be included with any depots from now on in the folder 'legal' placed in the root of the project folder (the location of r5apex_ds.exe).

With any new additions of third party code, the 'thirdpartylegalnotices.txt' file has to be updated accordingly.
2022-04-01 00:11:42 +02:00

565 lines
15 KiB
C++

//===== Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose:
//
// $NoKeywords: $
//=============================================================================//
#include "core/stdafx.h"
#include "tier0/cpu.h"
#include "tier0/cputopology.h"
#include "tier0/fasttimer.h"
/*******************************************************************************/
static CPUInformation s_cpuInformation;
static char s_CpuVendorID[13] = "unknown";
bool s_bCpuBrandInitialized = false;
bool s_bCpuVendorIdInitialized = false;
/*******************************************************************************/
struct CpuIdResult_t
{
unsigned long eax;
unsigned long ebx;
unsigned long ecx;
unsigned long edx;
void Reset(void)
{
eax = ebx = ecx = edx = 0;
}
};
struct IntelCacheDesc_t
{
uint8_t nDesc;
uint16_t nCacheSize;
};
/*******************************************************************************/
union CpuBrand_t
{
CpuIdResult_t cpuid[3];
char name[49];
};
CpuBrand_t s_CpuBrand;
/*******************************************************************************/
inline static IntelCacheDesc_t s_IntelL1DataCacheDesc[] = {
{ 0xA, 8 },
{ 0xC, 16 },
{ 0xD, 16 },
{ 0x2C, 32 },
{ 0x30, 32 },
{ 0x60, 16 },
{ 0x66, 8 },
{ 0x67, 16 },
{ 0x68, 32 }
};
inline static IntelCacheDesc_t s_IntelL2DataCacheDesc[] =
{
{ 0x21, 256 },
{ 0x39, 128 },
{ 0x3a, 192 },
{ 0x3b, 128 },
{ 0x3c, 256 },
{ 0x3D, 384 },
{ 0x3E, 512 },
{ 0x41, 128 },
{ 0x42, 256 },
{ 0x43, 512 },
{ 0x44, 1024 },
{ 0x45, 2048 },
{ 0x48, 3 * 1024 },
{ 0x4e, 6 * 1024 },
{ 0x78, 1024 },
{ 0x79, 128 },
{ 0x7a, 256 },
{ 0x7b, 512 },
{ 0x7c, 1024 },
{ 0x7d, 2048 },
{ 0x7f, 512 },
{ 0x82, 256 },
{ 0x83, 512 },
{ 0x84, 1024 },
{ 0x85, 2048 },
{ 0x86, 512 },
{ 0x87, 1024 }
};
inline static IntelCacheDesc_t s_IntelL3DataCacheDesc[] = {
{ 0x22, 512 },
{ 0x23, 1024 },
{ 0x25, 2 * 1024 },
{ 0x29, 4 * 1024 },
{ 0x46, 4 * 1024 },
{ 0x47, 8 * 1024 },
// { 49,
{ 0x4a, 6 * 1024 },
{ 0x4b, 8 * 1024 },
{ 0x4c, 12 * 1024 },
{ 0x4d, 16 * 1014 },
{ 0xD0, 512 },
{ 0xD1, 1024 },
{ 0xD2, 2048 },
{ 0xD6, 1024 },
{ 0xD7, 2048 },
{ 0xD8, 4096 },
{ 0xDC, 1536 },
{ 0xDD, 3 * 1024 },
{ 0xDE, 6 * 1024 },
{ 0xE2, 2048 },
{ 0xE3, 4096 },
{ 0xE4, 8 * 1024 },
{ 0xEA, 12 * 1024 },
{ 0xEB, 18 * 1024 },
{ 0xEC, 24 * 1024 }
};
/*******************************************************************************/
static bool cpuid(unsigned long function, CpuIdResult_t& out)
{
int pCPUInfo[4];
__cpuid(pCPUInfo, (int)function);
out.eax = pCPUInfo[0];
out.ebx = pCPUInfo[1];
out.ecx = pCPUInfo[2];
out.edx = pCPUInfo[3];
return true;
}
static bool cpuidex(unsigned long function, unsigned long subfunction, CpuIdResult_t& out)
{
int pCPUInfo[4];
__cpuidex(pCPUInfo, (int)function, (int)subfunction);
out.eax = pCPUInfo[0];
out.ebx = pCPUInfo[1];
out.ecx = pCPUInfo[2];
out.edx = pCPUInfo[3];
return false;
}
static CpuIdResult_t cpuid(unsigned long function)
{
CpuIdResult_t out;
if (!cpuid(function, out))
{
out.Reset();
}
return out;
}
static CpuIdResult_t cpuidex(unsigned long function, unsigned long subfunction)
{
CpuIdResult_t out;
if (!cpuidex(function, subfunction, out))
{
out.Reset();
}
return out;
}
/*******************************************************************************/
static bool CheckSSETechnology(void)
{
return (cpuid(1).edx & 0x2000000L) != 0;
}
static bool CheckSSE2Technology(void)
{
return (cpuid(1).edx & 0x04000000) != 0;
}
bool CheckSSE3Technology(void)
{
return (cpuid(1).ecx & 0x00000001) != 0; // bit 1 of ECX.
}
bool CheckSSSE3Technology(void)
{
// SSSE 3 is implemented by both Intel and AMD.
// Detection is done the same way for both vendors.
return (cpuid(1).ecx & (1 << 9)) != 0; // bit 9 of ECX.
}
bool CheckSSE41Technology(void)
{
// SSE 4.1 is implemented by both Intel and AMD.
// Detection is done the same way for both vendors.
return (cpuid(1).ecx & (1 << 19)) != 0; // bit 19 of ECX.
}
bool CheckSSE42Technology(void)
{
// SSE4.2 is an Intel-only feature.
const char* pchVendor = GetProcessorVendorId();
if (0 != _stricmp(pchVendor, "GenuineIntel"))
{
return false;
}
return (cpuid(1).ecx & (1 << 20)) != 0; // bit 20 of ECX.
}
bool CheckSSE4aTechnology(void)
{
// SSE 4a is an AMD-only feature.
const char* pchVendor = GetProcessorVendorId();
if (0 != _stricmp(pchVendor, "AuthenticAMD"))
{
return false;
}
return (cpuid(1).ecx & (1 << 6)) != 0; // bit 6 of ECX.
}
static bool Check3DNowTechnology(void)
{
if (cpuid(0x80000000).eax > 0x80000000L)
{
return (cpuid(0x80000001).eax & (1 << 31)) != 0;
}
return false;
}
static bool CheckCMOVTechnology(void)
{
return (cpuid(1).edx & (1 << 15)) != 0;
}
static bool CheckFCMOVTechnology(void)
{
return (cpuid(1).edx & (1 << 16)) != 0;
}
static bool CheckRDTSCTechnology(void)
{
return (cpuid(1).edx & 0x10) != 0;
}
// Return the Processor's vendor identification string, or "Generic_x86" if it doesn't exist on this CPU.
const char* GetProcessorVendorId(void)
{
if (s_bCpuVendorIdInitialized)
{
return s_CpuVendorID;
}
s_bCpuVendorIdInitialized = true;
CpuIdResult_t cpuid0 = cpuid(0);
memset(s_CpuVendorID, 0, sizeof(s_CpuVendorID));
if (!cpuid0.eax)
{
strcpy(s_CpuVendorID, ("Generic_x86"));
}
else
{
memcpy(s_CpuVendorID + 0, &(cpuid0.ebx), sizeof(cpuid0.ebx));
memcpy(s_CpuVendorID + 4, &(cpuid0.edx), sizeof(cpuid0.edx));
memcpy(s_CpuVendorID + 8, &(cpuid0.ecx), sizeof(cpuid0.ecx));
}
return s_CpuVendorID;
}
const char* GetProcessorBrand(void)
{
if (s_bCpuBrandInitialized)
{
return s_CpuBrand.name;
}
s_bCpuBrandInitialized = true;
memset(&s_CpuBrand, 0, sizeof(s_CpuBrand));
const char* pchVendor = GetProcessorVendorId();
if (0 == _stricmp(pchVendor, "GenuineIntel"))
{
// Intel brand string.
if (cpuid(0x80000000).eax >= 0x80000004)
{
s_CpuBrand.cpuid[0] = cpuid(0x80000002);
s_CpuBrand.cpuid[1] = cpuid(0x80000003);
s_CpuBrand.cpuid[2] = cpuid(0x80000004);
}
}
return s_CpuBrand.name;
}
/*******************************************************************************/
// Returns non-zero if Hyper-Threading Technology is supported on the processors and zero if not.
// If it's supported, it does not mean that it's been enabled. So we test another flag to see if it's enabled
// See Intel Processor Identification and the CPUID instruction Application Note 485.
// http://www.intel.com/Assets/PDF/appnote/241618.pdf
static bool HTSupported(void)
{
enum {
HT_BIT = 0x10000000,// EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware.
FAMILY_ID = 0x0f00, // EAX[11:8] - Bit 11 thru 8 contains family processor id.
EXT_FAMILY_ID = 0x0f00000, // EAX[23:20] - Bit 23 thru 20 contains extended family processor id.
FAMILY_ID_386 = 0x0300,
FAMILY_ID_486 = 0x0400, // EAX[8:12] - 486, 487 and overdrive.
FAMILY_ID_PENTIUM = 0x0500, // Pentium, Pentium OverDrive 60 - 200.
FAMILY_ID_PENTIUM_PRO = 0x0600, // P Pro, P II, P III, P M, Celeron M, Core Duo, Core Solo, Core2 Duo, Core2 Extreme, P D, Xeon model F,
// also 45-nm : Intel Atom, Core i7, Xeon MP ; see Intel Processor Identification and the CPUID instruction pg 20,21.
FAMILY_ID_EXTENDED = 0x0F00 // P IV, Xeon, Celeron D, P D, .
};
// This works on both newer AMD and Intel CPUs.
CpuIdResult_t cpuid1 = cpuid(1);
// Previously, we detected P4 specifically; now, we detect GenuineIntel with HT enabled in general.
// if (((cpuid1.eax & FAMILY_ID) == FAMILY_ID_EXTENDED) || (cpuid1.eax & EXT_FAMILY_ID))
// Check to see if this is an Intel Processor with HT or CMT capability , and if HT/CMT is enabled.
// ddk: This codef is actually correct: see example code at http://software.intel.com/en-us/articles/multi-core-detect/
return (cpuid1.edx & HT_BIT) != 0 && // Genuine Intel Processor with Hyper-Threading Technology implemented.
((cpuid1.ebx >> 16) & 0xFF) > 1; // Hyper-Threading OR Core Multi-Processing has been enabled.
}
// Returns the number of logical processors per physical processors.
static uint8_t LogicalProcessorsPerPackage(void)
{
// EBX[23:16] indicate number of logical processors per package.
const unsigned NUM_LOGICAL_BITS = 0x00FF0000;
if (!HTSupported())
{
return 1;
}
return (uint8_t)((cpuid(1).ebx & NUM_LOGICAL_BITS) >> 16);
}
// Measure the processor clock speed by sampling the cycle count, waiting
// for some fraction of a second, then measuring the elapsed number of cycles.
static int64_t CalculateClockSpeed(void)
{
LARGE_INTEGER waitTime, startCount, curCount;
CCycleCount start, end;
// Take 1/32 of a second for the measurement.
QueryPerformanceFrequency(&waitTime);
int scale = 5;
waitTime.QuadPart >>= scale;
QueryPerformanceCounter(&startCount);
start.Sample();
do
{
QueryPerformanceCounter(&curCount);
} while (curCount.QuadPart - startCount.QuadPart < waitTime.QuadPart);
end.Sample();
return (end.GetLongCycles() - start.GetLongCycles()) << scale;
}
static void FindIntelCacheDesc(uint8_t nDesc, const IntelCacheDesc_t* pDesc, int nDescCount, uint32_t& nCache, uint32_t& nCacheDesc)
{
for (int i = 0; i < nDescCount; ++i)
{
if (pDesc->nDesc == nDesc)
{
nCache = pDesc->nCacheSize;
nCacheDesc = nDesc;
break;
}
}
}
// See "Output of the CPUID instruction" from Intel, page 26.
static void InterpretIntelCacheDescriptors(uint32_t nPackedDesc)
{
if (nPackedDesc & 0x80000000)
{
return; // This is a wrong descriptor.
}
for (int i = 0; i < 4; ++i)
{
FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL1DataCacheDesc, ARRAYSIZE(s_IntelL1DataCacheDesc), s_cpuInformation.m_nL1CacheSizeKb, s_cpuInformation.m_nL1CacheDesc);
FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL2DataCacheDesc, ARRAYSIZE(s_IntelL2DataCacheDesc), s_cpuInformation.m_nL2CacheSizeKb, s_cpuInformation.m_nL2CacheDesc);
FindIntelCacheDesc(nPackedDesc & 0xFF, s_IntelL3DataCacheDesc, ARRAYSIZE(s_IntelL3DataCacheDesc), s_cpuInformation.m_nL3CacheSizeKb, s_cpuInformation.m_nL3CacheDesc);
nPackedDesc >>= 8;
}
}
const CPUInformation& GetCPUInformation(void)
{
CPUInformation& pi = s_cpuInformation;
// Has the structure already been initialized and filled out?
if (pi.m_Size == sizeof(pi))
{
return pi;
}
// Redundant, but just in case the user somehow messes with the size.
memset(&pi, 0x0, sizeof(pi));
// Fill out the structure, and return it:
pi.m_Size = sizeof(pi);
// Grab the processor frequency:
pi.m_Speed = CalculateClockSpeed();
// Get the logical and physical processor counts:
pi.m_nLogicalProcessors = LogicalProcessorsPerPackage();
bool bAuthenticAMD = (0 == _stricmp(GetProcessorVendorId(), "AuthenticAMD"));
bool bGenuineIntel = !bAuthenticAMD && (0 == _stricmp(GetProcessorVendorId(), "GenuineIntel"));
SYSTEM_INFO si;
ZeroMemory(&si, sizeof(si));
GetSystemInfo(&si);
// Fixing: si.dwNumberOfProcessors is the number of logical processors according to experiments on i7, P4 and a DirectX sample (Aug'09).
// This is contrary to MSDN documentation on GetSystemInfo().
pi.m_nLogicalProcessors = si.dwNumberOfProcessors;
if (bAuthenticAMD)
{
// Quick fix for AMD Phenom: it reports 3 logical cores and 4 physical cores;
// No AMD CPUs by the end of 2009 have HT, so we'll override HT detection here.
pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors;
}
else
{
CpuTopology topo;
pi.m_nPhysicalProcessors = topo.NumberOfSystemCores();
}
// Make sure I always report at least one, when running WinXP with the /ONECPU switch,
// it likes to report 0 processors for some reason.
if (pi.m_nPhysicalProcessors == 0 && pi.m_nLogicalProcessors == 0)
{
assert(!"Missing CPU detection code for this processor.");
pi.m_nPhysicalProcessors = 1;
pi.m_nLogicalProcessors = 1;
}
CpuIdResult_t cpuid0 = cpuid(0);
if (cpuid0.eax >= 1)
{
CpuIdResult_t cpuid1 = cpuid(1);
uint32_t bFPU = cpuid1.edx & 1; // This should always be on on anything we support.
// Determine Processor Features:
pi.m_bRDTSC = (cpuid1.edx >> 4) & 1;
pi.m_bCMOV = (cpuid1.edx >> 15) & 1;
pi.m_bFCMOV = (pi.m_bCMOV && bFPU) ? 1 : 0;
pi.m_bMMX = (cpuid1.edx >> 23) & 1;
pi.m_bSSE = (cpuid1.edx >> 25) & 1;
pi.m_bSSE2 = (cpuid1.edx >> 26) & 1;
pi.m_bSSE3 = cpuid1.ecx & 1;
pi.m_bSSSE3 = (cpuid1.ecx >> 9) & 1;;
pi.m_bSSE4a = CheckSSE4aTechnology();
pi.m_bSSE41 = (cpuid1.ecx >> 19) & 1;
pi.m_bSSE42 = (cpuid1.ecx >> 20) & 1;
pi.m_b3DNow = Check3DNowTechnology();
pi.m_bAVX = (cpuid1.ecx >> 28) & 1;
pi.m_szProcessorID = (char*)GetProcessorVendorId();
pi.m_szProcessorBrand = (char*)GetProcessorBrand();
pi.m_bHT = (pi.m_nPhysicalProcessors < pi.m_nLogicalProcessors); //HTSupported();
pi.m_nModel = cpuid1.eax; // Full CPU model info.
pi.m_nFeatures[0] = cpuid1.edx; // x87+ features.
pi.m_nFeatures[1] = cpuid1.ecx; // sse3+ features.
pi.m_nFeatures[2] = cpuid1.ebx; // Some additional features.
if (bGenuineIntel)
{
if (cpuid0.eax >= 4)
{
// We have CPUID.4, use it to find all the cache parameters.
const uint32_t nCachesToQuery = 4; // Level 0 is not used.
uint32_t nCacheSizeKiB[nCachesToQuery];
for (uint32_t i = 0; i < nCachesToQuery; ++i)
{
nCacheSizeKiB[i] = 0;
}
for (unsigned long nSub = 0; nSub < 1024; ++nSub)
{
CpuIdResult_t cpuid4 = cpuidex(4, nSub);
uint32_t nCacheType = cpuid4.eax & 0x1F;
if (nCacheType == 0)
{
// No more caches.
break;
}
if (nCacheType & 1)
{
// This cache includes data cache: it's either data or unified. Instuction cache type is 2.
uint32_t nCacheLevel = (cpuid4.eax >> 5) & 7;
if (nCacheLevel < nCachesToQuery)
{
uint32_t nCacheWays = 1 + ((cpuid4.ebx >> 22) & 0x3F);
uint32_t nCachePartitions = 1 + ((cpuid4.ebx >> 12) & 0x3F);
uint32_t nCacheLineSize = 1 + (cpuid4.ebx & 0xFF);
uint32_t nCacheSets = 1 + cpuid4.ecx;
uint32_t nCacheSizeBytes = nCacheWays * nCachePartitions * nCacheLineSize * nCacheSets;
nCacheSizeKiB[nCacheLevel] = nCacheSizeBytes >> 10;
}
}
}
pi.m_nL1CacheSizeKb = nCacheSizeKiB[1];
pi.m_nL2CacheSizeKb = nCacheSizeKiB[2];
pi.m_nL3CacheSizeKb = nCacheSizeKiB[3];
}
else if (cpuid0.eax >= 2)
{
// Get the cache.
CpuIdResult_t cpuid2 = cpuid(2);
for (int i = (cpuid2.eax & 0xFF); i-- > 0; )
{
InterpretIntelCacheDescriptors(cpuid2.eax & ~0xFF);
InterpretIntelCacheDescriptors(cpuid2.ebx);
InterpretIntelCacheDescriptors(cpuid2.ecx);
InterpretIntelCacheDescriptors(cpuid2.edx);
cpuid2 = cpuid(2); // Read the next.
}
}
}
}
CpuIdResult_t cpuid0ex = cpuid(0x80000000);
if (bAuthenticAMD)
{
if (cpuid0ex.eax >= 0x80000005)
{
CpuIdResult_t cpuid5ex = cpuid(0x80000005);
pi.m_nL1CacheSizeKb = cpuid5ex.ecx >> 24;
pi.m_nL1CacheDesc = cpuid5ex.ecx & 0xFFFFFF;
}
if (cpuid0ex.eax >= 0x80000006)
{
CpuIdResult_t cpuid6ex = cpuid(0x80000006);
pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16;
pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF;
pi.m_nL3CacheSizeKb = (cpuid6ex.edx >> 18) * 512;
pi.m_nL3CacheDesc = cpuid6ex.edx & 0xFFFF;
}
}
else if (bGenuineIntel)
{
if (cpuid0ex.eax >= 0x80000006)
{
// Make sure we got the L2 cache info right.
pi.m_nL2CacheSizeKb = (cpuid(0x80000006).ecx >> 16);
}
}
return pi;
}