r5sdk/r5dev/tier0/cpu.cpp

595 lines
16 KiB
C++
Raw Normal View History

//===== Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose:
//
// $NoKeywords: $
//=============================================================================//
#include "core/stdafx.h"
#include "tier0/cpu.h"
#include "tier0/cputopology.h"
#include "tier0/fasttimer.h"
/*******************************************************************************/
static CPUInformation s_cpuInformation;
static char s_CpuVendorID[13] = "unknown";
bool s_bCpuBrandInitialized = false;
bool s_bCpuVendorIdInitialized = false;
/*******************************************************************************/
struct CpuIdResult_t
{
unsigned long eax;
unsigned long ebx;
unsigned long ecx;
unsigned long edx;
void Reset(void)
{
eax = ebx = ecx = edx = 0;
}
};
struct IntelCacheDesc_t
{
uint8_t nDesc;
uint16_t nCacheSize;
};
/*******************************************************************************/
union CpuBrand_t
{
CpuIdResult_t cpuid[3];
char name[49];
};
CpuBrand_t s_CpuBrand;
/*******************************************************************************/
inline static IntelCacheDesc_t s_IntelL1DataCacheDesc[] = {
{ 0xA, 8 },
{ 0xC, 16 },
{ 0xD, 16 },
{ 0x2C, 32 },
{ 0x30, 32 },
{ 0x60, 16 },
{ 0x66, 8 },
{ 0x67, 16 },
{ 0x68, 32 }
};
inline static IntelCacheDesc_t s_IntelL2DataCacheDesc[] =
{
{ 0x21, 256 },
{ 0x39, 128 },
{ 0x3a, 192 },
{ 0x3b, 128 },
{ 0x3c, 256 },
{ 0x3D, 384 },
{ 0x3E, 512 },
{ 0x41, 128 },
{ 0x42, 256 },
{ 0x43, 512 },
{ 0x44, 1024 },
{ 0x45, 2048 },
{ 0x48, 3 * 1024 },
{ 0x4e, 6 * 1024 },
{ 0x78, 1024 },
{ 0x79, 128 },
{ 0x7a, 256 },
{ 0x7b, 512 },
{ 0x7c, 1024 },
{ 0x7d, 2048 },
{ 0x7f, 512 },
{ 0x82, 256 },
{ 0x83, 512 },
{ 0x84, 1024 },
{ 0x85, 2048 },
{ 0x86, 512 },
{ 0x87, 1024 }
};
inline static IntelCacheDesc_t s_IntelL3DataCacheDesc[] = {
{ 0x22, 512 },
{ 0x23, 1024 },
{ 0x25, 2 * 1024 },
{ 0x29, 4 * 1024 },
{ 0x46, 4 * 1024 },
{ 0x47, 8 * 1024 },
{ 0x49, 4 * 1024 }, // Only valid when: family == 0x0F && model == 0x06.
{ 0x4a, 6 * 1024 },
{ 0x4b, 8 * 1024 },
{ 0x4c, 12 * 1024 },
{ 0x4d, 16 * 1014 },
{ 0xD0, 512 },
{ 0xD1, 1024 },
{ 0xD2, 2048 },
{ 0xD6, 1024 },
{ 0xD7, 2048 },
{ 0xD8, 4096 },
{ 0xDC, 1536 },
{ 0xDD, 3 * 1024 },
{ 0xDE, 6 * 1024 },
{ 0xE2, 2048 },
{ 0xE3, 4096 },
{ 0xE4, 8 * 1024 },
{ 0xEA, 12 * 1024 },
{ 0xEB, 18 * 1024 },
{ 0xEC, 24 * 1024 }
};
/*******************************************************************************/
static bool cpuid(unsigned long function, CpuIdResult_t& out)
{
int pCPUInfo[4];
2022-04-18 20:15:23 +02:00
__cpuid(pCPUInfo, static_cast<int>(function));
out.eax = pCPUInfo[0];
out.ebx = pCPUInfo[1];
out.ecx = pCPUInfo[2];
out.edx = pCPUInfo[3];
return true;
}
static bool cpuidex(unsigned long function, unsigned long subfunction, CpuIdResult_t& out)
{
int pCPUInfo[4];
2022-04-18 20:15:23 +02:00
__cpuidex(pCPUInfo, static_cast<int>(function), static_cast<int>(subfunction));
out.eax = pCPUInfo[0];
out.ebx = pCPUInfo[1];
out.ecx = pCPUInfo[2];
out.edx = pCPUInfo[3];
return true;
}
static CpuIdResult_t cpuid(unsigned long function)
{
CpuIdResult_t out;
if (!cpuid(function, out))
{
out.Reset();
}
return out;
}
static CpuIdResult_t cpuidex(unsigned long function, unsigned long subfunction)
{
CpuIdResult_t out;
if (!cpuidex(function, subfunction, out))
{
out.Reset();
}
return out;
}
/*******************************************************************************/
static bool CheckSSETechnology(void)
{
return (cpuid(1).edx & 0x2000000L) != 0;
}
static bool CheckSSE2Technology(void)
{
return (cpuid(1).edx & 0x04000000) != 0;
}
bool CheckSSE3Technology(void)
{
return (cpuid(1).ecx & 0x00000001) != 0; // bit 1 of ECX.
}
bool CheckSSSE3Technology(void)
{
// SSSE 3 is implemented by both Intel and AMD.
// Detection is done the same way for both vendors.
return (cpuid(1).ecx & (1 << 9)) != 0; // bit 9 of ECX.
}
bool CheckSSE41Technology(void)
{
// SSE 4.1 is implemented by both Intel and AMD.
// Detection is done the same way for both vendors.
return (cpuid(1).ecx & (1 << 19)) != 0; // bit 19 of ECX.
}
bool CheckSSE42Technology(void)
{
// SSE4.2 is an Intel-only feature.
const char* pchVendor = GetProcessorVendorId();
if (0 != _stricmp(pchVendor, "GenuineIntel"))
{
return false;
}
return (cpuid(1).ecx & (1 << 20)) != 0; // bit 20 of ECX.
}
bool CheckSSE4aTechnology(void)
{
// SSE 4a is an AMD-only feature.
const char* pchVendor = GetProcessorVendorId();
if (0 != _stricmp(pchVendor, "AuthenticAMD"))
{
return false;
}
return (cpuid(1).ecx & (1 << 6)) != 0; // bit 6 of ECX.
}
static bool Check3DNowTechnology(void)
{
if (cpuid(0x80000000).eax > 0x80000000L)
{
return (cpuid(0x80000001).eax & (1 << 31)) != 0;
}
return false;
}
static bool CheckCMOVTechnology(void)
{
return (cpuid(1).edx & (1 << 15)) != 0;
}
static bool CheckFCMOVTechnology(void)
{
return (cpuid(1).edx & (1 << 16)) != 0;
}
static bool CheckRDTSCTechnology(void)
{
return (cpuid(1).edx & 0x10) != 0;
}
// Return the Processor's vendor identification string, or "Generic_x86" if it doesn't exist on this CPU.
const char* GetProcessorVendorId(void)
{
if (s_bCpuVendorIdInitialized)
{
return s_CpuVendorID;
}
s_bCpuVendorIdInitialized = true;
CpuIdResult_t cpuid0 = cpuid(0);
memset(s_CpuVendorID, 0, sizeof(s_CpuVendorID));
if (!cpuid0.eax)
{
strcpy(s_CpuVendorID, ("Generic_x86"));
}
else
{
memcpy(s_CpuVendorID + 0, &(cpuid0.ebx), sizeof(cpuid0.ebx));
memcpy(s_CpuVendorID + 4, &(cpuid0.edx), sizeof(cpuid0.edx));
memcpy(s_CpuVendorID + 8, &(cpuid0.ecx), sizeof(cpuid0.ecx));
}
return s_CpuVendorID;
}
const char* GetProcessorBrand(bool bRemovePadding = true)
{
if (s_bCpuBrandInitialized)
{
return s_CpuBrand.name;
}
s_bCpuBrandInitialized = true;
memset(&s_CpuBrand, '\0', sizeof(s_CpuBrand));
const char* pchVendor = GetProcessorVendorId();
if (0 == _stricmp(pchVendor, "AuthenticAMD") || 0 == _stricmp(pchVendor, "GenuineIntel"))
{
// AMD/Intel brand string.
if (cpuid(0x80000000).eax >= 0x80000004)
{
s_CpuBrand.cpuid[0] = cpuid(0x80000002);
s_CpuBrand.cpuid[1] = cpuid(0x80000003);
s_CpuBrand.cpuid[2] = cpuid(0x80000004);
}
}
if (bRemovePadding)
{
for (size_t i = sizeof(s_CpuBrand.name); i-- > 0; )
{
if (s_CpuBrand.name[i] != '\0')
{
if (s_CpuBrand.name[i] != ' ')
{
if (i < (sizeof(s_CpuBrand.name) - 1))
{
s_CpuBrand.name[i + 1] = '\0';
break;
}
}
}
}
}
return s_CpuBrand.name;
}
/*******************************************************************************/
// Returns non-zero if Hyper-Threading Technology is supported on the processors and zero if not.
// If it's supported, it does not mean that it's been enabled. So we test another flag to see if it's enabled
// See Intel Processor Identification and the CPUID instruction Application Note 485.
// http://www.intel.com/Assets/PDF/appnote/241618.pdf
static bool HTSupported(void)
{
enum {
HT_BIT = 0x10000000,// EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware.
FAMILY_ID = 0x0f00, // EAX[11:8] - Bit 11 thru 8 contains family processor id.
EXT_FAMILY_ID = 0x0f00000, // EAX[23:20] - Bit 23 thru 20 contains extended family processor id.
FAMILY_ID_386 = 0x0300,
FAMILY_ID_486 = 0x0400, // EAX[8:12] - 486, 487 and overdrive.
FAMILY_ID_PENTIUM = 0x0500, // Pentium, Pentium OverDrive 60 - 200.
FAMILY_ID_PENTIUM_PRO = 0x0600, // P Pro, P II, P III, P M, Celeron M, Core Duo, Core Solo, Core2 Duo, Core2 Extreme, P D, Xeon model F,
// also 45-nm : Intel Atom, Core i7, Xeon MP ; see Intel Processor Identification and the CPUID instruction pg 20,21.
FAMILY_ID_EXTENDED = 0x0F00 // P IV, Xeon, Celeron D, P D, .
};
// This works on both newer AMD and Intel CPUs.
CpuIdResult_t cpuid1 = cpuid(1);
// Previously, we detected P4 specifically; now, we detect GenuineIntel with HT enabled in general.
// if (((cpuid1.eax & FAMILY_ID) == FAMILY_ID_EXTENDED) || (cpuid1.eax & EXT_FAMILY_ID))
// Check to see if this is an Intel Processor with HT or CMT capability , and if HT/CMT is enabled.
// ddk: This codef is actually correct: see example code at http://software.intel.com/en-us/articles/multi-core-detect/
return (cpuid1.edx & HT_BIT) != 0 && // Genuine Intel Processor with Hyper-Threading Technology implemented.
((cpuid1.ebx >> 16) & 0xFF) > 1; // Hyper-Threading OR Core Multi-Processing has been enabled.
}
// Returns the number of logical processors per physical processors.
static uint8_t LogicalProcessorsPerPackage(void)
{
// EBX[23:16] indicate number of logical processors per package.
const unsigned NUM_LOGICAL_BITS = 0x00FF0000;
if (!HTSupported())
{
return 1;
}
2022-04-18 20:15:23 +02:00
return static_cast<uint8_t>(((cpuid(1).ebx & NUM_LOGICAL_BITS) >> 16));
}
// Measure the processor clock speed by sampling the cycle count, waiting
// for some fraction of a second, then measuring the elapsed number of cycles.
static int64_t CalculateClockSpeed(void)
{
LARGE_INTEGER waitTime, startCount, curCount;
CCycleCount start, end;
// Take 1/32 of a second for the measurement.
QueryPerformanceFrequency(&waitTime);
int scale = 5;
waitTime.QuadPart >>= scale;
QueryPerformanceCounter(&startCount);
start.Sample();
do
{
QueryPerformanceCounter(&curCount);
} while (curCount.QuadPart - startCount.QuadPart < waitTime.QuadPart);
end.Sample();
return (end.GetLongCycles() - start.GetLongCycles()) << scale;
}
static void FindIntelCacheDesc(uint8_t nDesc, const IntelCacheDesc_t* pDesc, int nDescCount, uint32_t& nCache, uint32_t& nCacheDesc)
{
for (int i = 0; i < nDescCount; ++i)
{
const IntelCacheDesc_t& desc = pDesc[i];
if (desc.nDesc == nDesc)
{
nCache = desc.nCacheSize;
nCacheDesc = nDesc;
break;
}
}
}
// See "Output of the CPUID instruction" from Intel, page 26.
static void InterpretIntelCacheDescriptors(uint32_t nPackedDesc, CPUInformation& pi)
{
if (nPackedDesc & 0x80000000)
{
return; // This is a wrong descriptor.
}
for (int i = 0; i < 4; ++i)
{
uint8_t nDesc = nPackedDesc & 0xFF;
FindIntelCacheDesc(nDesc, s_IntelL1DataCacheDesc, ARRAYSIZE(s_IntelL1DataCacheDesc), pi.m_nL1CacheSizeKb, pi.m_nL1CacheDesc);
FindIntelCacheDesc(nDesc, s_IntelL2DataCacheDesc, ARRAYSIZE(s_IntelL2DataCacheDesc), pi.m_nL2CacheSizeKb, pi.m_nL2CacheDesc);
FindIntelCacheDesc(nDesc, s_IntelL3DataCacheDesc, ARRAYSIZE(s_IntelL3DataCacheDesc), pi.m_nL3CacheSizeKb, pi.m_nL3CacheDesc);
int nFamily = (pi.m_nModel >> 8) & 0xF;
int nModel = (pi.m_nModel >> 4) & 0xF;
if (nDesc == 0x49 && (nFamily != 0x0F || nModel != 0x06))
{
pi.m_nL3CacheSizeKb = 0;
pi.m_nL3CacheDesc = 0;
}
nPackedDesc >>= 8;
}
}
const CPUInformation& GetCPUInformation(void)
{
CPUInformation& pi = s_cpuInformation;
// Has the structure already been initialized and filled out?
if (pi.m_Size == sizeof(pi))
{
return pi;
}
// Redundant, but just in case the user somehow messes with the size.
memset(&pi, 0x0, sizeof(pi));
// Fill out the structure, and return it:
pi.m_Size = sizeof(pi);
// Grab the processor frequency:
pi.m_Speed = CalculateClockSpeed();
// Get the logical and physical processor counts:
pi.m_nLogicalProcessors = LogicalProcessorsPerPackage();
bool bAuthenticAMD = (0 == _stricmp(GetProcessorVendorId(), "AuthenticAMD"));
bool bGenuineIntel = !bAuthenticAMD && (0 == _stricmp(GetProcessorVendorId(), "GenuineIntel"));
SYSTEM_INFO si;
ZeroMemory(&si, sizeof(si));
GetSystemInfo(&si);
// Fixing: si.dwNumberOfProcessors is the number of logical processors according to experiments on i7, P4 and a DirectX sample (Aug'09).
// This is contrary to MSDN documentation on GetSystemInfo().
pi.m_nLogicalProcessors = uint8_t(si.dwNumberOfProcessors);
CpuTopology topo;
pi.m_nPhysicalProcessors = uint8_t(topo.NumberOfSystemCores());
// Make sure I always report at least one, when running WinXP with the /ONECPU switch,
// it likes to report 0 processors for some reason.
if (pi.m_nPhysicalProcessors == 0 && pi.m_nLogicalProcessors == 0)
{
2022-03-23 19:23:53 +01:00
assert(!"Missing CPU detection code for this processor.");
pi.m_nPhysicalProcessors = 1;
pi.m_nLogicalProcessors = 1;
}
CpuIdResult_t cpuid0 = cpuid(0);
if (cpuid0.eax >= 1)
{
CpuIdResult_t cpuid1 = cpuid(1);
uint32_t bFPU = cpuid1.edx & 1; // This should always be set on anything we support.
// Determine Processor Features:
pi.m_bRDTSC = (cpuid1.edx >> 4) & 1;
2022-04-18 20:15:23 +02:00
pi.m_bCMOV = (cpuid1.edx >> 15) & 1;
pi.m_bFCMOV = (pi.m_bCMOV && bFPU) ? 1 : 0;
pi.m_bPOPCNT= (cpuid1.edx >> 17) & 1;
2022-04-18 20:15:23 +02:00
pi.m_bMMX = (cpuid1.edx >> 23) & 1;
pi.m_bSSE = (cpuid1.edx >> 25) & 1;
pi.m_bSSE2 = (cpuid1.edx >> 26) & 1;
pi.m_bSSE3 = cpuid1.ecx & 1;
pi.m_bSSSE3 = (cpuid1.ecx >> 9) & 1;
pi.m_bSSE4a = CheckSSE4aTechnology();
pi.m_bSSE41 = (cpuid1.ecx >> 19) & 1;
pi.m_bSSE42 = (cpuid1.ecx >> 20) & 1;
pi.m_b3DNow = Check3DNowTechnology();
2022-04-18 20:15:23 +02:00
pi.m_bAVX = (cpuid1.ecx >> 28) & 1;
pi.m_szProcessorID = const_cast<char*>(GetProcessorVendorId());
pi.m_szProcessorBrand = const_cast<char*>(GetProcessorBrand());
pi.m_bHT = (pi.m_nPhysicalProcessors < pi.m_nLogicalProcessors); //HTSupported();
pi.m_nModel = cpuid1.eax; // Full CPU model info.
pi.m_nFeatures[0] = cpuid1.edx; // x87+ features.
pi.m_nFeatures[1] = cpuid1.ecx; // sse3+ features.
pi.m_nFeatures[2] = cpuid1.ebx; // Some additional features.
if (bGenuineIntel)
{
if (cpuid0.eax >= 4)
{
// We have CPUID.4, use it to find all the cache parameters.
const uint32_t nCachesToQuery = 4; // Level 0 is not used.
2022-04-18 20:15:23 +02:00
uint32_t nCacheSizeKiB[nCachesToQuery]{};
uint32_t nCacheDesc[nCachesToQuery]{};
for (unsigned long nSub = 0; nSub < 1024; ++nSub)
{
CpuIdResult_t cpuid4 = cpuidex(4, nSub);
uint32_t nCacheType = cpuid4.eax & 0x1F;
if (nCacheType == 0)
{
// No more caches.
break;
}
if (nCacheType & 1)
{
2022-12-26 21:58:39 +01:00
// This cache includes data cache: it's either data or unified. Instruction cache type is 2.
uint32_t nCacheLevel = (cpuid4.eax >> 5) & 7;
if (nCacheLevel < nCachesToQuery)
{
2022-04-18 20:15:23 +02:00
uint32_t nCacheWays = 1 + ((cpuid4.ebx >> 22) & 0x3F);
uint32_t nCachePartitions = 1 + ((cpuid4.ebx >> 12) & 0x3F);
uint32_t nCacheLineSize = 1 + (cpuid4.ebx & 0xFF);
uint32_t nCacheSets = 1 + cpuid4.ecx;
uint32_t nCacheSizeBytes = nCacheWays * nCachePartitions * nCacheLineSize * nCacheSets;
nCacheSizeKiB[nCacheLevel] = nCacheSizeBytes >> 10;
nCacheDesc[nCacheLevel] = 1 + cpuid4.ebx;
}
}
}
pi.m_nL1CacheSizeKb = nCacheSizeKiB[1];
pi.m_nL1CacheDesc = nCacheDesc[1];
pi.m_nL2CacheSizeKb = nCacheSizeKiB[2];
pi.m_nL2CacheDesc = nCacheDesc[2];
pi.m_nL3CacheSizeKb = nCacheSizeKiB[3];
pi.m_nL3CacheDesc = nCacheDesc[3];
}
else if (cpuid0.eax >= 2)
{
// Get the cache.
CpuIdResult_t cpuid2 = cpuid(2);
for (int i = (cpuid2.eax & 0xFF); i-- > 0; )
{
InterpretIntelCacheDescriptors(cpuid2.eax & ~0xFF, pi);
InterpretIntelCacheDescriptors(cpuid2.ebx, pi);
InterpretIntelCacheDescriptors(cpuid2.ecx, pi);
InterpretIntelCacheDescriptors(cpuid2.edx, pi);
cpuid2 = cpuid(2); // Read the next.
}
}
}
}
CpuIdResult_t cpuid0ex = cpuid(0x80000000);
if (bAuthenticAMD)
{
// TODO: add '0x8000001D' (newer AMD cpu's).
// Fall back to below if value doesn't equal/exceed it.
if (cpuid0ex.eax >= 0x80000005)
{
CpuIdResult_t cpuid5ex = cpuid(0x80000005);
pi.m_nL1CacheSizeKb = cpuid5ex.ecx >> 24;
pi.m_nL1CacheDesc = cpuid5ex.ecx & 0xFFFFFF;
}
if (cpuid0ex.eax >= 0x80000006)
{
CpuIdResult_t cpuid6ex = cpuid(0x80000006);
pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16;
pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF;
pi.m_nL3CacheSizeKb = (cpuid6ex.edx >> 18) * 512;
pi.m_nL3CacheDesc = cpuid6ex.edx & 0xFFFF;
}
}
else if (bGenuineIntel)
{
if (cpuid0ex.eax >= 0x80000006)
{
// Make sure we got the L2 cache info right.
CpuIdResult_t cpuid6ex = cpuid(0x80000006);
pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16;
pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF;
}
}
return pi;
}