externals/xbyak: Update xbyak to 5.76

Keeps the external library up to date.
This commit is contained in:
Lioncash 2019-02-08 12:58:53 -05:00
commit ab4b82167f
No known key found for this signature in database
GPG Key ID: 4E3C3CC1031BA9C7
12 changed files with 287 additions and 87 deletions

View File

@ -1,5 +1,5 @@
# Xbyak 5.73 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
# Xbyak 5.76 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
@ -392,6 +392,11 @@ modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
## History
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
* 2018/Sep/04 ver 5.71 L() returns a new label instance

View File

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.73
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.76
-----------------------------------------------------------------------------
◎概要
@ -373,6 +373,11 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加

View File

@ -198,7 +198,7 @@ int main(int argc, char *argv[])
Brainfuck bf(ifs);
if (mode == 0) {
static int stack[128 * 1024];
bf.getCode<void (*)(void*, void*, int *)>()(Xbyak::CastTo<void*>(putchar), Xbyak::CastTo<void*>(getchar), stack);
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
} else {
dump(bf.getCode(), bf.getSize());
}

View File

@ -32,7 +32,7 @@ struct Code : Xbyak::CodeGenerator {
inline int add(int a, int b)
{
return Xbyak::CastTo<int (*)(int,int)>(buf)(a, b);
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
}
int main()

View File

@ -77,7 +77,7 @@ public:
#ifdef XBYAK_VARIADIC_TEMPLATE
call(atoi);
#else
call(Xbyak::CastTo<void*>(atoi));
call(reinterpret_cast<const void*>(atoi));
#endif
add(esp, 4);
#endif
@ -96,7 +96,7 @@ public:
mov(rax, (size_t)atoi);
jmp(rax);
#else
jmp(Xbyak::CastTo<void*>(atoi));
jmp(reinterpret_cast<const void*>(atoi));
#endif
}
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
@ -171,8 +171,9 @@ int main()
return 1;
}
int (*func)(int) = s.getCode<int (*)(int)>();
if (Xbyak::CastTo<uint8*>(func) != p) {
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
if (funcp != p) {
fprintf(stderr, "internal error %p %p\n", p, funcp);
return 1;
}
printf("0 + ... + %d = %d\n", 100, func(100));

View File

@ -104,9 +104,12 @@ void putCPUinfo()
Core i7-3930K 6 2D
*/
cpu.putFamily();
if (!cpu.has(Cpu::tINTEL)) return;
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
}
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
}
int main()

View File

@ -204,7 +204,7 @@ public:
push(reg[r]);
push('A' + r);
push((int)str);
call(Xbyak::CastTo<void*>(printf));
call(reinterpret_cast<const void*>(printf));
add(esp, 4 * 4);
pop(ecx);
pop(edx);

View File

@ -1016,9 +1016,9 @@ struct GetAddressCode1 : Xbyak::CodeGenerator {
};
struct CodeLabelTable : Xbyak::CodeGenerator {
static const int ret0 = 3;
static const int ret1 = 5;
static const int ret2 = 8;
enum { ret0 = 3 };
enum { ret1 = 5 };
enum { ret2 = 8 };
CodeLabelTable()
{
using namespace Xbyak;
@ -1225,3 +1225,48 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
code.setProtectModeRW();
}
#endif
struct ReleaseTestCode : Xbyak::CodeGenerator {
ReleaseTestCode(Label& L1, Label& L2, Label& L3)
{
L(L1);
jmp(L1);
L(L2);
jmp(L3); // not assigned
}
};
/*
code must unlink label if code is destroyed
*/
CYBOZU_TEST_AUTO(release_label_after_code)
{
puts("---");
{
Label L1, L2, L3, L4, L5;
{
ReleaseTestCode code(L1, L2, L3);
CYBOZU_TEST_ASSERT(L1.getId() > 0);
CYBOZU_TEST_ASSERT(L1.getAddress() != 0);
CYBOZU_TEST_ASSERT(L2.getId() > 0);
CYBOZU_TEST_ASSERT(L2.getAddress() != 0);
CYBOZU_TEST_ASSERT(L3.getId() > 0);
CYBOZU_TEST_ASSERT(L3.getAddress() == 0); // L3 is not assigned
code.assignL(L4, L1);
L5 = L1;
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
}
puts("code is released");
CYBOZU_TEST_ASSERT(L1.getId() == 0);
CYBOZU_TEST_ASSERT(L1.getAddress() == 0);
CYBOZU_TEST_ASSERT(L2.getId() == 0);
CYBOZU_TEST_ASSERT(L2.getAddress() == 0);
// CYBOZU_TEST_ASSERT(L3.getId() == 0); // L3 is not assigned so not cleared
CYBOZU_TEST_ASSERT(L3.getAddress() == 0);
CYBOZU_TEST_ASSERT(L4.getId() == 0);
CYBOZU_TEST_ASSERT(L4.getAddress() == 0);
CYBOZU_TEST_ASSERT(L5.getId() == 0);
CYBOZU_TEST_ASSERT(L5.getAddress() == 0);
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
}
}

View File

@ -129,6 +129,55 @@ struct Code : public Xbyak::CodeGenerator {
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
/*
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
*/
void gen13()
{
StackFrame sf(this, 1, 13);
for (int i = 0; i < 13; i++) {
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
}
mov(rax, sf.t[0]);
for (int i = 1; i < 13; i++) {
add(rax, sf.t[i]);
}
}
/*
same as gen13
*/
void gen14()
{
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
Pack t = sf.t;
t.append(rcx);
t.append(rdx);
for (int i = 0; i < 13; i++) {
mov(t[i], ptr[sf.p[0] + i * 8]);
}
mov(rax, t[0]);
for (int i = 1; i < 13; i++) {
add(rax, t[i]);
}
}
/*
return (1 << 15) - 1;
*/
void gen15()
{
StackFrame sf(this, 0, 14, 8);
Pack t = sf.t;
t.append(rax);
for (int i = 0; i < 15; i++) {
mov(t[i], 1 << i);
}
mov(qword[rsp], 0);
for (int i = 0; i < 15; i++) {
add(ptr[rsp], t[i]);
}
mov(rax, ptr[rsp]);
}
};
struct Code2 : Xbyak::CodeGenerator {
@ -152,8 +201,14 @@ struct Code2 : Xbyak::CodeGenerator {
add(rax, sf.p[i]);
}
}
void gen2(int pNum, int tNum, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum, stackSizeByte);
mov(rax, rsp);
}
};
static int errNum = 0;
void check(int x, int y)
{
@ -167,19 +222,19 @@ void verify(const Xbyak::uint8 *f, int pNum)
{
switch (pNum) {
case 0:
check(1, Xbyak::CastTo<int (*)()>(f)());
check(1, reinterpret_cast<int (*)()>(f)());
return;
case 1:
check(11, Xbyak::CastTo<int (*)(int)>(f)(10));
check(11, reinterpret_cast<int (*)(int)>(f)(10));
return;
case 2:
check(111, Xbyak::CastTo<int (*)(int, int)>(f)(10, 100));
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
return;
case 3:
check(1111, Xbyak::CastTo<int (*)(int, int, int)>(f)(10, 100, 1000));
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
return;
case 4:
check(11111, Xbyak::CastTo<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
return;
default:
printf("ERR pNum=%d\n", pNum);
@ -212,6 +267,15 @@ void testAll()
const Xbyak::uint8 *f = code.getCurr();
code.gen(pNum, tNum | opt, stackSize);
verify(f, pNum);
/*
check rsp is 16-byte aligned if stackSize > 0
*/
if (stackSize > 0) {
Code2 c2;
c2.gen2(pNum, tNum | opt, stackSize);
uint64_t addr = c2.getCode<uint64_t (*)()>()();
check(addr % 16, 0);
}
}
}
}
@ -268,6 +332,20 @@ void testPartial()
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen12();
check(24, f12(3, 5, 7, 9));
{
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
code.gen13();
check(91, f13(tbl));
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
code.gen14();
check(91, f14(tbl));
}
int (*f15)() = code.getCurr<int (*)()>();
code.gen15();
check((1 << 15) - 1, f15());
}
void put(const Xbyak::util::Pack& p)

View File

@ -40,6 +40,8 @@
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
#include <unordered_set>
#define XBYAK_STD_UNORDERED_SET std::unordered_set
#include <unordered_map>
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
@ -49,16 +51,22 @@
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
*/
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
#include <tr1/unordered_set>
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
#include <tr1/unordered_map>
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
#include <unordered_set>
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
#include <unordered_map>
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
#else
#include <set>
#define XBYAK_STD_UNORDERED_SET std::set
#include <map>
#define XBYAK_STD_UNORDERED_MAP std::map
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
@ -105,7 +113,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5730 /* 0xABCD = A.BC(D) */
VERSION = 0x5760 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -178,7 +186,8 @@ enum {
ERR_INVALID_ZERO,
ERR_INVALID_RIP_IN_AUTO_GROW,
ERR_INVALID_MIB_ADDRESS,
ERR_INTERNAL
ERR_INTERNAL,
ERR_X2APIC_IS_NOT_SUPPORTED
};
class Error : public std::exception {
@ -240,6 +249,7 @@ public:
"invalid rip in AutoGrow",
"invalid mib address",
"internal error",
"x2APIC is not supported"
};
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
return errTbl[err_];
@ -617,6 +627,12 @@ struct RegRip {
const Label* label_;
bool isAddr_;
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
friend const RegRip operator+(const RegRip& r, int disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
friend const RegRip operator-(const RegRip& r, int disp) {
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
}
friend const RegRip operator+(const RegRip& r, sint64 disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
@ -919,10 +935,10 @@ public:
void dq(uint64 code) { db(code, 8); }
const uint8 *getCode() const { return top_; }
template<class F>
const F getCode() const { return CastTo<F>(top_); }
const F getCode() const { return reinterpret_cast<F>(top_); }
const uint8 *getCurr() const { return &top_[size_]; }
template<class F>
const F getCurr() const { return CastTo<F>(&top_[size_]); }
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
size_t getSize() const { return size_; }
void setSize(size_t size)
{
@ -1128,6 +1144,7 @@ public:
Label(const Label& rhs);
Label& operator=(const Label& rhs);
~Label();
void clear() { mgr = 0; id = 0; }
int getId() const { return id; }
const uint8 *getAddress() const;
@ -1166,6 +1183,7 @@ class LabelManager {
};
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
CodeArray *base_;
// global : stateList_.front(), local : stateList_.back()
@ -1173,6 +1191,7 @@ class LabelManager {
mutable int labelId_;
ClabelDefList clabelDefList_;
ClabelUndefList clabelUndefList_;
LabelPtrList labelPtrList_;
int getId(const Label& label) const
{
@ -1221,9 +1240,14 @@ class LabelManager {
return true;
}
friend class Label;
void incRefCount(int id) { clabelDefList_[id].refCount++; }
void decRefCount(int id)
void incRefCount(int id, Label *label)
{
clabelDefList_[id].refCount++;
labelPtrList_.insert(label);
}
void decRefCount(int id, Label *label)
{
labelPtrList_.erase(label);
ClabelDefList::iterator i = clabelDefList_.find(id);
if (i == clabelDefList_.end()) return;
if (i->second.refCount == 1) {
@ -1242,11 +1266,23 @@ class LabelManager {
#endif
return !list.empty();
}
// detach all labels linked to LabelManager
void resetLabelPtrList()
{
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
(*i)->clear();
}
labelPtrList_.clear();
}
public:
LabelManager()
{
reset();
}
~LabelManager()
{
resetLabelPtrList();
}
void reset()
{
base_ = 0;
@ -1256,6 +1292,7 @@ public:
stateList_.push_back(SlabelState());
clabelDefList_.clear();
clabelUndefList_.clear();
resetLabelPtrList();
}
void enterLocal()
{
@ -1288,10 +1325,11 @@ public:
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
define_inner(st.defList, st.undefList, label, base_->getSize());
}
void defineClabel(const Label& label)
void defineClabel(Label& label)
{
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
label.mgr = this;
labelPtrList_.insert(&label);
}
void assign(Label& dst, const Label& src)
{
@ -1299,6 +1337,7 @@ public:
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
dst.mgr = this;
labelPtrList_.insert(&dst);
}
bool getOffset(size_t *offset, std::string& label) const
{
@ -1346,19 +1385,19 @@ inline Label::Label(const Label& rhs)
{
id = rhs.id;
mgr = rhs.mgr;
if (mgr) mgr->incRefCount(id);
if (mgr) mgr->incRefCount(id, this);
}
inline Label& Label::operator=(const Label& rhs)
{
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
id = rhs.id;
mgr = rhs.mgr;
if (mgr) mgr->incRefCount(id);
if (mgr) mgr->incRefCount(id, this);
return *this;
}
inline Label::~Label()
{
if (id && mgr) mgr->decRefCount(id);
if (id && mgr) mgr->decRefCount(id, this);
}
inline const uint8* Label::getAddress() const
{
@ -2162,7 +2201,7 @@ public:
const Segment es, cs, ss, ds, fs, gs;
#endif
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
void L(const Label& label) { labelMgr_.defineClabel(label); }
void L(Label& label) { labelMgr_.defineClabel(label); }
Label L() { Label label; L(label); return label; }
void inLocalLabel() { labelMgr_.enterLocal(); }
void outLocalLabel() { labelMgr_.leaveLocal(); }
@ -2194,7 +2233,7 @@ public:
// call(function pointer)
#ifdef XBYAK_VARIADIC_TEMPLATE
template<class Ret, class... Params>
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
#endif
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.73"; }
const char *getVersionString() const { return "5.76"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }

View File

@ -50,11 +50,21 @@
namespace Xbyak { namespace util {
typedef enum {
SmtLevel = 1,
CoreLevel = 2
} IntelCpuTopologyLevel;
/**
CPU detection class
*/
class Cpu {
uint64 type_;
//system topology
bool x2APIC_supported_;
static const size_t maxTopologyLevels = 2;
unsigned int numCores_[maxTopologyLevels];
unsigned int get32bitAsBE(const char *x) const
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
@ -88,6 +98,42 @@ class Cpu {
{
return (val >> base) & ((1u << (end - base)) - 1);
}
void setNumCores()
{
if ((type_ & tINTEL) == 0) return;
unsigned int data[4];
/* CAUTION: These numbers are configuration as shipped by Intel. */
getCpuidEx(0x0, 0, data);
if (data[0] >= 0xB) {
/*
if leaf 11 exists(x2APIC is supported),
we use it to get the number of smt cores and cores on socket
leaf 0xB can be zeroed-out by a hypervisor
*/
x2APIC_supported_ = true;
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
getCpuidEx(0xB, i, data);
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
if (level == SmtLevel || level == CoreLevel) {
numCores_[level - 1] = extractBit(data[1], 0, 15);
}
}
if (numCores_[SmtLevel - 1] != 0) {
numCores_[CoreLevel - 1] /= numCores_[SmtLevel - 1];
}
} else {
/*
Failed to deremine num of cores without x2APIC support.
TODO: USE initial APIC ID to determine ncores.
*/
numCores_[SmtLevel - 1] = 0;
numCores_[CoreLevel - 1] = 0;
}
}
void setCacheHierarchy()
{
if ((type_ & tINTEL) == 0) return;
@ -96,21 +142,12 @@ class Cpu {
// const unsigned int INSTRUCTION_CACHE = 2;
const unsigned int UNIFIED_CACHE = 3;
unsigned int smt_width = 0;
unsigned int n_cores = 0;
unsigned int logical_cores = 0;
unsigned int data[4];
/*
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
If x2APIC is supported, these are the only correct numbers.
leaf 0xB can be zeroed-out by a hypervisor
*/
getCpuidEx(0x0, 0, data);
if (data[0] >= 0xB) {
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
smt_width = data[1] & 0x7FFF;
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
n_cores = data[1] & 0x7FFF;
if (x2APIC_supported_) {
smt_width = numCores_[0];
logical_cores = numCores_[1];
}
/*
@ -118,7 +155,7 @@ class Cpu {
the first level of data cache is not shared (which is the
case for every existing architecture) and use this to
determine the SMT width for arch not supporting leaf 11.
when leaf 4 reports a number of core less than n_cores
when leaf 4 reports a number of core less than numCores_
on socket reported by leaf 11, then it is a correct number
of cores not an upperbound.
*/
@ -127,19 +164,19 @@ class Cpu {
unsigned int cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
}
assert(nb_logical_cores != 0);
assert(actual_logical_cores != 0);
data_cache_size[data_cache_levels] =
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
assert(smt_width != 0);
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
cores_sharing_data_cache[data_cache_levels] = (std::max)(actual_logical_cores / smt_width, 1u);
data_cache_levels++;
}
}
@ -160,6 +197,12 @@ public:
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
unsigned int data_cache_levels;
unsigned int getNumCores(IntelCpuTopologyLevel level) {
if (level != SmtLevel && level != CoreLevel) throw Error(ERR_BAD_PARAMETER);
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
return numCores_[level - 1];
}
unsigned int getDataCacheLevels() const { return data_cache_levels; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
@ -271,6 +314,8 @@ public:
Cpu()
: type_(NONE)
, x2APIC_supported_(false)
, numCores_()
, data_cache_levels(0)
{
unsigned int data[4];
@ -363,6 +408,7 @@ public:
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
}
setFamily();
setNumCores();
setCacheHierarchy();
}
void putFamily() const
@ -416,7 +462,7 @@ const int UseRCX = 1 << 6;
const int UseRDX = 1 << 7;
class Pack {
static const size_t maxTblNum = 10;
static const size_t maxTblNum = 15;
const Xbyak::Reg64 *tbl_[maxTblNum];
size_t n_;
public:
@ -476,7 +522,7 @@ public:
const Xbyak::Reg64& operator[](size_t n) const
{
if (n >= n_) {
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
throw Error(ERR_BAD_PARAMETER);
}
return *tbl_[n];
@ -518,6 +564,7 @@ class StackFrame {
static const int rcxPos = 3;
static const int rdxPos = 2;
#endif
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
@ -527,7 +574,7 @@ class StackFrame {
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[10];
Xbyak::Reg64 tTbl_[maxRegNum];
Pack p_;
Pack t_;
StackFrame(const StackFrame&);
@ -539,7 +586,7 @@ public:
make stack frame
@param sf [in] this
@param pNum [in] num of function parameter(0 <= pNum <= 4)
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
@param stackSizeByte [in] local stack size
@param makeEpilog [in] automatically call close() if true
@ -566,27 +613,17 @@ public:
using namespace Xbyak;
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
const Reg64& _rsp = code->rsp;
const AddressFrame& _ptr = code->ptr;
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
const int *tbl = getOrderTbl() + noSaveNum;
P_ = saveNum_ + (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
for (int i = 0; i < saveNum_; i++) {
code->push(Reg64(tbl[i]));
}
P_ = (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
P_ *= 8;
if (P_ > 0) code->sub(_rsp, P_);
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
}
for (int i = 4; i < saveNum_; i++) {
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#else
for (int i = 0; i < saveNum_; i++) {
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#endif
int pos = 0;
for (int i = 0; i < pNum; i++) {
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
@ -607,21 +644,11 @@ public:
{
using namespace Xbyak;
const Reg64& _rsp = code_->rsp;
const AddressFrame& _ptr = code_->ptr;
const int *tbl = getOrderTbl() + noSaveNum;
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
}
for (int i = 4; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
}
#else
for (int i = 0; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
}
#endif
if (P_ > 0) code_->add(_rsp, P_);
for (int i = 0; i < saveNum_; i++) {
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
}
if (callRet) code_->ret();
}
@ -633,9 +660,6 @@ public:
} catch (std::exception& e) {
printf("ERR:StackFrame %s\n", e.what());
exit(1);
} catch (...) {
printf("ERR:StackFrame otherwise\n");
exit(1);
}
}
private:
@ -654,7 +678,7 @@ private:
}
int getRegIdx(int& pos) const
{
assert(pos < 14);
assert(pos < maxRegNum);
using namespace Xbyak;
const int *tbl = getOrderTbl();
int r = tbl[pos++];