Squashed 'externals/xbyak/' changes from f72646a7..4a6fac8a

4a6fac8a update version to 5.77
801cf3fd cosmetic change of getNumCores
d397e824 fix number of cores that share LLC cache
a669e092 support non-intel-cpu visual studio
af5f422e Merge branch 'fenghaitao-guard_x86' into develop
9b98dc17 Guard x86 specific codes with "#if defined(__i386__) || defined(__x86_64__)"
dd4173e1 move some member variables input private

git-subtree-dir: externals/xbyak
git-subtree-split: 4a6fac8ade404f667b94170f713367fe7da2a852
This commit is contained in:
Lioncash 2019-03-24 14:20:49 -04:00
parent cc67312fed
commit 1957180595
5 changed files with 66 additions and 37 deletions

View File

@ -1,5 +1,5 @@
# Xbyak 5.76 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
# Xbyak 5.77 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
@ -392,6 +392,7 @@ modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
## History
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed

View File

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.76
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.77
-----------------------------------------------------------------------------
◎概要
@ -373,6 +373,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す

View File

@ -113,7 +113,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5760 /* 0xABCD = A.BC(D) */
VERSION = 0x5770 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.76"; }
const char *getVersionString() const { return "5.77"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }

View File

@ -9,6 +9,11 @@
*/
#include "xbyak.h"
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define XBYAK_INTEL_CPU_SPECIFIC
#endif
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
#if (_MSC_VER < 1400) && defined(XBYAK32)
static inline __declspec(naked) void __cpuid(int[4], int)
@ -47,6 +52,7 @@
#endif
#endif
#endif
#endif
namespace Xbyak { namespace util {
@ -65,6 +71,11 @@ class Cpu {
static const size_t maxTopologyLevels = 2;
unsigned int numCores_[maxTopologyLevels];
static const unsigned int maxNumberCacheLevels = 10;
unsigned int dataCacheSize_[maxNumberCacheLevels];
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
unsigned int dataCacheLevels_;
unsigned int get32bitAsBE(const char *x) const
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
@ -75,7 +86,7 @@ class Cpu {
}
void setFamily()
{
unsigned int data[4];
unsigned int data[4] = {};
getCpuid(1, data);
stepping = data[0] & mask(4);
model = (data[0] >> 4) & mask(4);
@ -102,7 +113,7 @@ class Cpu {
{
if ((type_ & tINTEL) == 0) return;
unsigned int data[4];
unsigned int data[4] = {};
/* CAUTION: These numbers are configuration as shipped by Intel. */
getCpuidEx(0x0, 0, data);
@ -121,9 +132,6 @@ class Cpu {
numCores_[level - 1] = extractBit(data[1], 0, 15);
}
}
if (numCores_[SmtLevel - 1] != 0) {
numCores_[CoreLevel - 1] /= numCores_[SmtLevel - 1];
}
} else {
/*
Failed to deremine num of cores without x2APIC support.
@ -143,7 +151,7 @@ class Cpu {
const unsigned int UNIFIED_CACHE = 3;
unsigned int smt_width = 0;
unsigned int logical_cores = 0;
unsigned int data[4];
unsigned int data[4] = {};
if (x2APIC_supported_) {
smt_width = numCores_[0];
@ -159,7 +167,7 @@ class Cpu {
on socket reported by leaf 11, then it is a correct number
of cores not an upperbound.
*/
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
getCpuidEx(0x4, i, data);
unsigned int cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
@ -169,15 +177,15 @@ class Cpu {
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
}
assert(actual_logical_cores != 0);
data_cache_size[data_cache_levels] =
dataCacheSize_[dataCacheLevels_] =
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
assert(smt_width != 0);
cores_sharing_data_cache[data_cache_levels] = (std::max)(actual_logical_cores / smt_width, 1u);
data_cache_levels++;
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
dataCacheLevels_++;
}
}
}
@ -191,28 +199,25 @@ public:
int displayFamily; // family + extFamily
int displayModel; // model + extModel
// may I move these members into private?
static const unsigned int maxNumberCacheLevels = 10;
unsigned int data_cache_size[maxNumberCacheLevels];
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
unsigned int data_cache_levels;
unsigned int getNumCores(IntelCpuTopologyLevel level) {
if (level != SmtLevel && level != CoreLevel) throw Error(ERR_BAD_PARAMETER);
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
return numCores_[level - 1];
switch (level) {
case SmtLevel: return numCores_[level - 1];
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
}
}
unsigned int getDataCacheLevels() const { return data_cache_levels; }
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return cores_sharing_data_cache[i];
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
return coresSharignDataCache_[i];
}
unsigned int getDataCacheSize(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return data_cache_size[i];
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
return dataCacheSize_[i];
}
/*
@ -220,30 +225,45 @@ public:
*/
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
#else
(void)eaxIn;
(void)data;
#endif
}
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
#else
#else
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
#endif
#else
(void)eaxIn;
(void)ecxIn;
(void)data;
#endif
}
static inline uint64 getXfeature()
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
return _xgetbv(0);
#else
#else
unsigned int eax, edx;
// xgetvb is not support on gcc 4.2
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
#endif
#else
return 0;
#endif
}
typedef uint64 Type;
@ -316,9 +336,11 @@ public:
: type_(NONE)
, x2APIC_supported_(false)
, numCores_()
, data_cache_levels(0)
, dataCacheSize_()
, coresSharignDataCache_()
, dataCacheLevels_(0)
{
unsigned int data[4];
unsigned int data[4] = {};
const unsigned int& EAX = data[0];
const unsigned int& EBX = data[1];
const unsigned int& ECX = data[2];
@ -427,12 +449,17 @@ class Clock {
public:
static inline uint64 getRdtsc()
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
return __rdtsc();
#else
#else
unsigned int eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64)edx << 32) | eax;
#endif
#else
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
return 0;
#endif
}
Clock()