externals/xbyak: Update xbyak to 5.76
Keeps the external library up to date.
This commit is contained in:
commit
ab4b82167f
7
externals/xbyak/readme.md
vendored
7
externals/xbyak/readme.md
vendored
@ -1,5 +1,5 @@
|
||||
|
||||
# Xbyak 5.73 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
# Xbyak 5.76 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
|
||||
## Abstract
|
||||
|
||||
@ -392,6 +392,11 @@ modified new BSD License
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
## History
|
||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||
|
7
externals/xbyak/readme.txt
vendored
7
externals/xbyak/readme.txt
vendored
@ -1,5 +1,5 @@
|
||||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.73
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.76
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
@ -373,6 +373,11 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
|
||||
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
|
||||
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
|
||||
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
|
||||
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
|
||||
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
|
||||
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
|
||||
|
2
externals/xbyak/sample/bf.cpp
vendored
2
externals/xbyak/sample/bf.cpp
vendored
@ -198,7 +198,7 @@ int main(int argc, char *argv[])
|
||||
Brainfuck bf(ifs);
|
||||
if (mode == 0) {
|
||||
static int stack[128 * 1024];
|
||||
bf.getCode<void (*)(void*, void*, int *)>()(Xbyak::CastTo<void*>(putchar), Xbyak::CastTo<void*>(getchar), stack);
|
||||
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
|
||||
} else {
|
||||
dump(bf.getCode(), bf.getSize());
|
||||
}
|
||||
|
2
externals/xbyak/sample/static_buf.cpp
vendored
2
externals/xbyak/sample/static_buf.cpp
vendored
@ -32,7 +32,7 @@ struct Code : Xbyak::CodeGenerator {
|
||||
|
||||
inline int add(int a, int b)
|
||||
{
|
||||
return Xbyak::CastTo<int (*)(int,int)>(buf)(a, b);
|
||||
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
|
||||
}
|
||||
|
||||
int main()
|
||||
|
9
externals/xbyak/sample/test0.cpp
vendored
9
externals/xbyak/sample/test0.cpp
vendored
@ -77,7 +77,7 @@ public:
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
call(atoi);
|
||||
#else
|
||||
call(Xbyak::CastTo<void*>(atoi));
|
||||
call(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
add(esp, 4);
|
||||
#endif
|
||||
@ -96,7 +96,7 @@ public:
|
||||
mov(rax, (size_t)atoi);
|
||||
jmp(rax);
|
||||
#else
|
||||
jmp(Xbyak::CastTo<void*>(atoi));
|
||||
jmp(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
}
|
||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||
@ -171,8 +171,9 @@ int main()
|
||||
return 1;
|
||||
}
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
if (Xbyak::CastTo<uint8*>(func) != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
|
||||
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
|
||||
if (funcp != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, funcp);
|
||||
return 1;
|
||||
}
|
||||
printf("0 + ... + %d = %d\n", 100, func(100));
|
||||
|
3
externals/xbyak/sample/test_util.cpp
vendored
3
externals/xbyak/sample/test_util.cpp
vendored
@ -104,9 +104,12 @@ void putCPUinfo()
|
||||
Core i7-3930K 6 2D
|
||||
*/
|
||||
cpu.putFamily();
|
||||
if (!cpu.has(Cpu::tINTEL)) return;
|
||||
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||
}
|
||||
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
|
||||
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||
}
|
||||
|
||||
int main()
|
||||
|
2
externals/xbyak/sample/toyvm.cpp
vendored
2
externals/xbyak/sample/toyvm.cpp
vendored
@ -204,7 +204,7 @@ public:
|
||||
push(reg[r]);
|
||||
push('A' + r);
|
||||
push((int)str);
|
||||
call(Xbyak::CastTo<void*>(printf));
|
||||
call(reinterpret_cast<const void*>(printf));
|
||||
add(esp, 4 * 4);
|
||||
pop(ecx);
|
||||
pop(edx);
|
||||
|
51
externals/xbyak/test/jmp.cpp
vendored
51
externals/xbyak/test/jmp.cpp
vendored
@ -1016,9 +1016,9 @@ struct GetAddressCode1 : Xbyak::CodeGenerator {
|
||||
};
|
||||
|
||||
struct CodeLabelTable : Xbyak::CodeGenerator {
|
||||
static const int ret0 = 3;
|
||||
static const int ret1 = 5;
|
||||
static const int ret2 = 8;
|
||||
enum { ret0 = 3 };
|
||||
enum { ret1 = 5 };
|
||||
enum { ret2 = 8 };
|
||||
CodeLabelTable()
|
||||
{
|
||||
using namespace Xbyak;
|
||||
@ -1225,3 +1225,48 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
|
||||
code.setProtectModeRW();
|
||||
}
|
||||
#endif
|
||||
|
||||
struct ReleaseTestCode : Xbyak::CodeGenerator {
|
||||
ReleaseTestCode(Label& L1, Label& L2, Label& L3)
|
||||
{
|
||||
L(L1);
|
||||
jmp(L1);
|
||||
L(L2);
|
||||
jmp(L3); // not assigned
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
code must unlink label if code is destroyed
|
||||
*/
|
||||
CYBOZU_TEST_AUTO(release_label_after_code)
|
||||
{
|
||||
puts("---");
|
||||
{
|
||||
Label L1, L2, L3, L4, L5;
|
||||
{
|
||||
ReleaseTestCode code(L1, L2, L3);
|
||||
CYBOZU_TEST_ASSERT(L1.getId() > 0);
|
||||
CYBOZU_TEST_ASSERT(L1.getAddress() != 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getId() > 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getAddress() != 0);
|
||||
CYBOZU_TEST_ASSERT(L3.getId() > 0);
|
||||
CYBOZU_TEST_ASSERT(L3.getAddress() == 0); // L3 is not assigned
|
||||
code.assignL(L4, L1);
|
||||
L5 = L1;
|
||||
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
|
||||
}
|
||||
puts("code is released");
|
||||
CYBOZU_TEST_ASSERT(L1.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L1.getAddress() == 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getAddress() == 0);
|
||||
// CYBOZU_TEST_ASSERT(L3.getId() == 0); // L3 is not assigned so not cleared
|
||||
CYBOZU_TEST_ASSERT(L3.getAddress() == 0);
|
||||
CYBOZU_TEST_ASSERT(L4.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L4.getAddress() == 0);
|
||||
CYBOZU_TEST_ASSERT(L5.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L5.getAddress() == 0);
|
||||
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
|
||||
}
|
||||
}
|
||||
|
88
externals/xbyak/test/sf_test.cpp
vendored
88
externals/xbyak/test/sf_test.cpp
vendored
@ -129,6 +129,55 @@ struct Code : public Xbyak::CodeGenerator {
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
||||
*/
|
||||
void gen13()
|
||||
{
|
||||
StackFrame sf(this, 1, 13);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, sf.t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, sf.t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
same as gen13
|
||||
*/
|
||||
void gen14()
|
||||
{
|
||||
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
||||
Pack t = sf.t;
|
||||
t.append(rcx);
|
||||
t.append(rdx);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
return (1 << 15) - 1;
|
||||
*/
|
||||
void gen15()
|
||||
{
|
||||
StackFrame sf(this, 0, 14, 8);
|
||||
Pack t = sf.t;
|
||||
t.append(rax);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
mov(t[i], 1 << i);
|
||||
}
|
||||
mov(qword[rsp], 0);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
add(ptr[rsp], t[i]);
|
||||
}
|
||||
mov(rax, ptr[rsp]);
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
@ -152,8 +201,14 @@ struct Code2 : Xbyak::CodeGenerator {
|
||||
add(rax, sf.p[i]);
|
||||
}
|
||||
}
|
||||
void gen2(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
mov(rax, rsp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int errNum = 0;
|
||||
void check(int x, int y)
|
||||
{
|
||||
@ -167,19 +222,19 @@ void verify(const Xbyak::uint8 *f, int pNum)
|
||||
{
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
check(1, Xbyak::CastTo<int (*)()>(f)());
|
||||
check(1, reinterpret_cast<int (*)()>(f)());
|
||||
return;
|
||||
case 1:
|
||||
check(11, Xbyak::CastTo<int (*)(int)>(f)(10));
|
||||
check(11, reinterpret_cast<int (*)(int)>(f)(10));
|
||||
return;
|
||||
case 2:
|
||||
check(111, Xbyak::CastTo<int (*)(int, int)>(f)(10, 100));
|
||||
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
||||
return;
|
||||
case 3:
|
||||
check(1111, Xbyak::CastTo<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
return;
|
||||
case 4:
|
||||
check(11111, Xbyak::CastTo<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
return;
|
||||
default:
|
||||
printf("ERR pNum=%d\n", pNum);
|
||||
@ -212,6 +267,15 @@ void testAll()
|
||||
const Xbyak::uint8 *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
check rsp is 16-byte aligned if stackSize > 0
|
||||
*/
|
||||
if (stackSize > 0) {
|
||||
Code2 c2;
|
||||
c2.gen2(pNum, tNum | opt, stackSize);
|
||||
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
||||
check(addr % 16, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -268,6 +332,20 @@ void testPartial()
|
||||
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen12();
|
||||
check(24, f12(3, 5, 7, 9));
|
||||
|
||||
{
|
||||
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
||||
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen13();
|
||||
check(91, f13(tbl));
|
||||
|
||||
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen14();
|
||||
check(91, f14(tbl));
|
||||
}
|
||||
int (*f15)() = code.getCurr<int (*)()>();
|
||||
code.gen15();
|
||||
check((1 << 15) - 1, f15());
|
||||
}
|
||||
|
||||
void put(const Xbyak::util::Pack& p)
|
||||
|
63
externals/xbyak/xbyak/xbyak.h
vendored
63
externals/xbyak/xbyak/xbyak.h
vendored
@ -40,6 +40,8 @@
|
||||
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
|
||||
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
|
||||
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
|
||||
@ -49,16 +51,22 @@
|
||||
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
|
||||
*/
|
||||
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
|
||||
#include <tr1/unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <tr1/unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#else
|
||||
#include <set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::set
|
||||
#include <map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
|
||||
@ -105,7 +113,7 @@ namespace Xbyak {
|
||||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5730 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x5760 /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
@ -178,7 +186,8 @@ enum {
|
||||
ERR_INVALID_ZERO,
|
||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||
ERR_INVALID_MIB_ADDRESS,
|
||||
ERR_INTERNAL
|
||||
ERR_INTERNAL,
|
||||
ERR_X2APIC_IS_NOT_SUPPORTED
|
||||
};
|
||||
|
||||
class Error : public std::exception {
|
||||
@ -240,6 +249,7 @@ public:
|
||||
"invalid rip in AutoGrow",
|
||||
"invalid mib address",
|
||||
"internal error",
|
||||
"x2APIC is not supported"
|
||||
};
|
||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||
return errTbl[err_];
|
||||
@ -617,6 +627,12 @@ struct RegRip {
|
||||
const Label* label_;
|
||||
bool isAddr_;
|
||||
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
||||
friend const RegRip operator+(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator-(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
@ -919,10 +935,10 @@ public:
|
||||
void dq(uint64 code) { db(code, 8); }
|
||||
const uint8 *getCode() const { return top_; }
|
||||
template<class F>
|
||||
const F getCode() const { return CastTo<F>(top_); }
|
||||
const F getCode() const { return reinterpret_cast<F>(top_); }
|
||||
const uint8 *getCurr() const { return &top_[size_]; }
|
||||
template<class F>
|
||||
const F getCurr() const { return CastTo<F>(&top_[size_]); }
|
||||
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
|
||||
size_t getSize() const { return size_; }
|
||||
void setSize(size_t size)
|
||||
{
|
||||
@ -1128,6 +1144,7 @@ public:
|
||||
Label(const Label& rhs);
|
||||
Label& operator=(const Label& rhs);
|
||||
~Label();
|
||||
void clear() { mgr = 0; id = 0; }
|
||||
int getId() const { return id; }
|
||||
const uint8 *getAddress() const;
|
||||
|
||||
@ -1166,6 +1183,7 @@ class LabelManager {
|
||||
};
|
||||
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
|
||||
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
|
||||
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
|
||||
|
||||
CodeArray *base_;
|
||||
// global : stateList_.front(), local : stateList_.back()
|
||||
@ -1173,6 +1191,7 @@ class LabelManager {
|
||||
mutable int labelId_;
|
||||
ClabelDefList clabelDefList_;
|
||||
ClabelUndefList clabelUndefList_;
|
||||
LabelPtrList labelPtrList_;
|
||||
|
||||
int getId(const Label& label) const
|
||||
{
|
||||
@ -1221,9 +1240,14 @@ class LabelManager {
|
||||
return true;
|
||||
}
|
||||
friend class Label;
|
||||
void incRefCount(int id) { clabelDefList_[id].refCount++; }
|
||||
void decRefCount(int id)
|
||||
void incRefCount(int id, Label *label)
|
||||
{
|
||||
clabelDefList_[id].refCount++;
|
||||
labelPtrList_.insert(label);
|
||||
}
|
||||
void decRefCount(int id, Label *label)
|
||||
{
|
||||
labelPtrList_.erase(label);
|
||||
ClabelDefList::iterator i = clabelDefList_.find(id);
|
||||
if (i == clabelDefList_.end()) return;
|
||||
if (i->second.refCount == 1) {
|
||||
@ -1242,11 +1266,23 @@ class LabelManager {
|
||||
#endif
|
||||
return !list.empty();
|
||||
}
|
||||
// detach all labels linked to LabelManager
|
||||
void resetLabelPtrList()
|
||||
{
|
||||
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
|
||||
(*i)->clear();
|
||||
}
|
||||
labelPtrList_.clear();
|
||||
}
|
||||
public:
|
||||
LabelManager()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
~LabelManager()
|
||||
{
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void reset()
|
||||
{
|
||||
base_ = 0;
|
||||
@ -1256,6 +1292,7 @@ public:
|
||||
stateList_.push_back(SlabelState());
|
||||
clabelDefList_.clear();
|
||||
clabelUndefList_.clear();
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void enterLocal()
|
||||
{
|
||||
@ -1288,10 +1325,11 @@ public:
|
||||
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
|
||||
define_inner(st.defList, st.undefList, label, base_->getSize());
|
||||
}
|
||||
void defineClabel(const Label& label)
|
||||
void defineClabel(Label& label)
|
||||
{
|
||||
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
|
||||
label.mgr = this;
|
||||
labelPtrList_.insert(&label);
|
||||
}
|
||||
void assign(Label& dst, const Label& src)
|
||||
{
|
||||
@ -1299,6 +1337,7 @@ public:
|
||||
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
|
||||
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
|
||||
dst.mgr = this;
|
||||
labelPtrList_.insert(&dst);
|
||||
}
|
||||
bool getOffset(size_t *offset, std::string& label) const
|
||||
{
|
||||
@ -1346,19 +1385,19 @@ inline Label::Label(const Label& rhs)
|
||||
{
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
}
|
||||
inline Label& Label::operator=(const Label& rhs)
|
||||
{
|
||||
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
return *this;
|
||||
}
|
||||
inline Label::~Label()
|
||||
{
|
||||
if (id && mgr) mgr->decRefCount(id);
|
||||
if (id && mgr) mgr->decRefCount(id, this);
|
||||
}
|
||||
inline const uint8* Label::getAddress() const
|
||||
{
|
||||
@ -2162,7 +2201,7 @@ public:
|
||||
const Segment es, cs, ss, ds, fs, gs;
|
||||
#endif
|
||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||
void L(const Label& label) { labelMgr_.defineClabel(label); }
|
||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||
Label L() { Label label; L(label); return label; }
|
||||
void inLocalLabel() { labelMgr_.enterLocal(); }
|
||||
void outLocalLabel() { labelMgr_.leaveLocal(); }
|
||||
@ -2194,7 +2233,7 @@ public:
|
||||
// call(function pointer)
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
template<class Ret, class... Params>
|
||||
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
|
||||
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
|
||||
#endif
|
||||
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
|
||||
|
||||
|
2
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
2
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
@ -1,4 +1,4 @@
|
||||
const char *getVersionString() const { return "5.73"; }
|
||||
const char *getVersionString() const { return "5.76"; }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
138
externals/xbyak/xbyak/xbyak_util.h
vendored
138
externals/xbyak/xbyak/xbyak_util.h
vendored
@ -50,11 +50,21 @@
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
typedef enum {
|
||||
SmtLevel = 1,
|
||||
CoreLevel = 2
|
||||
} IntelCpuTopologyLevel;
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
uint64 type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
unsigned int numCores_[maxTopologyLevels];
|
||||
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
@ -88,6 +98,42 @@ class Cpu {
|
||||
{
|
||||
return (val >> base) & ((1u << (end - base)) - 1);
|
||||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
||||
unsigned int data[4];
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
/*
|
||||
if leaf 11 exists(x2APIC is supported),
|
||||
we use it to get the number of smt cores and cores on socket
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
x2APIC_supported_ = true;
|
||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||
getCpuidEx(0xB, i, data);
|
||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||
if (level == SmtLevel || level == CoreLevel) {
|
||||
numCores_[level - 1] = extractBit(data[1], 0, 15);
|
||||
}
|
||||
}
|
||||
if (numCores_[SmtLevel - 1] != 0) {
|
||||
numCores_[CoreLevel - 1] /= numCores_[SmtLevel - 1];
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
Failed to deremine num of cores without x2APIC support.
|
||||
TODO: USE initial APIC ID to determine ncores.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = 0;
|
||||
numCores_[CoreLevel - 1] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
@ -96,21 +142,12 @@ class Cpu {
|
||||
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||
const unsigned int UNIFIED_CACHE = 3;
|
||||
unsigned int smt_width = 0;
|
||||
unsigned int n_cores = 0;
|
||||
unsigned int logical_cores = 0;
|
||||
unsigned int data[4];
|
||||
|
||||
/*
|
||||
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
||||
If x2APIC is supported, these are the only correct numbers.
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
||||
smt_width = data[1] & 0x7FFF;
|
||||
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
||||
n_cores = data[1] & 0x7FFF;
|
||||
if (x2APIC_supported_) {
|
||||
smt_width = numCores_[0];
|
||||
logical_cores = numCores_[1];
|
||||
}
|
||||
|
||||
/*
|
||||
@ -118,7 +155,7 @@ class Cpu {
|
||||
the first level of data cache is not shared (which is the
|
||||
case for every existing architecture) and use this to
|
||||
determine the SMT width for arch not supporting leaf 11.
|
||||
when leaf 4 reports a number of core less than n_cores
|
||||
when leaf 4 reports a number of core less than numCores_
|
||||
on socket reported by leaf 11, then it is a correct number
|
||||
of cores not an upperbound.
|
||||
*/
|
||||
@ -127,19 +164,19 @@ class Cpu {
|
||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||
if (cacheType == NO_CACHE) break;
|
||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
|
||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
}
|
||||
assert(nb_logical_cores != 0);
|
||||
assert(actual_logical_cores != 0);
|
||||
data_cache_size[data_cache_levels] =
|
||||
(extractBit(data[1], 22, 31) + 1)
|
||||
* (extractBit(data[1], 12, 21) + 1)
|
||||
* (extractBit(data[1], 0, 11) + 1)
|
||||
* (data[2] + 1);
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||
assert(smt_width != 0);
|
||||
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
|
||||
cores_sharing_data_cache[data_cache_levels] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||
data_cache_levels++;
|
||||
}
|
||||
}
|
||||
@ -160,6 +197,12 @@ public:
|
||||
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
||||
unsigned int data_cache_levels;
|
||||
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) {
|
||||
if (level != SmtLevel && level != CoreLevel) throw Error(ERR_BAD_PARAMETER);
|
||||
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
return numCores_[level - 1];
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
{
|
||||
@ -271,6 +314,8 @@ public:
|
||||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
, x2APIC_supported_(false)
|
||||
, numCores_()
|
||||
, data_cache_levels(0)
|
||||
{
|
||||
unsigned int data[4];
|
||||
@ -363,6 +408,7 @@ public:
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
setCacheHierarchy();
|
||||
}
|
||||
void putFamily() const
|
||||
@ -416,7 +462,7 @@ const int UseRCX = 1 << 6;
|
||||
const int UseRDX = 1 << 7;
|
||||
|
||||
class Pack {
|
||||
static const size_t maxTblNum = 10;
|
||||
static const size_t maxTblNum = 15;
|
||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||
size_t n_;
|
||||
public:
|
||||
@ -476,7 +522,7 @@ public:
|
||||
const Xbyak::Reg64& operator[](size_t n) const
|
||||
{
|
||||
if (n >= n_) {
|
||||
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
|
||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
}
|
||||
return *tbl_[n];
|
||||
@ -518,6 +564,7 @@ class StackFrame {
|
||||
static const int rcxPos = 3;
|
||||
static const int rdxPos = 2;
|
||||
#endif
|
||||
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
|
||||
Xbyak::CodeGenerator *code_;
|
||||
int pNum_;
|
||||
int tNum_;
|
||||
@ -527,7 +574,7 @@ class StackFrame {
|
||||
int P_;
|
||||
bool makeEpilog_;
|
||||
Xbyak::Reg64 pTbl_[4];
|
||||
Xbyak::Reg64 tTbl_[10];
|
||||
Xbyak::Reg64 tTbl_[maxRegNum];
|
||||
Pack p_;
|
||||
Pack t_;
|
||||
StackFrame(const StackFrame&);
|
||||
@ -539,7 +586,7 @@ public:
|
||||
make stack frame
|
||||
@param sf [in] this
|
||||
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
||||
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
|
||||
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
|
||||
@param stackSizeByte [in] local stack size
|
||||
@param makeEpilog [in] automatically call close() if true
|
||||
|
||||
@ -566,27 +613,17 @@ public:
|
||||
using namespace Xbyak;
|
||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
|
||||
const Reg64& _rsp = code->rsp;
|
||||
const AddressFrame& _ptr = code->ptr;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
P_ = saveNum_ + (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->push(Reg64(tbl[i]));
|
||||
}
|
||||
P_ = (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
P_ *= 8;
|
||||
if (P_ > 0) code->sub(_rsp, P_);
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#endif
|
||||
int pos = 0;
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
||||
@ -607,21 +644,11 @@ public:
|
||||
{
|
||||
using namespace Xbyak;
|
||||
const Reg64& _rsp = code_->rsp;
|
||||
const AddressFrame& _ptr = code_->ptr;
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#endif
|
||||
if (P_ > 0) code_->add(_rsp, P_);
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
|
||||
}
|
||||
|
||||
if (callRet) code_->ret();
|
||||
}
|
||||
@ -633,9 +660,6 @@ public:
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:StackFrame %s\n", e.what());
|
||||
exit(1);
|
||||
} catch (...) {
|
||||
printf("ERR:StackFrame otherwise\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
@ -654,7 +678,7 @@ private:
|
||||
}
|
||||
int getRegIdx(int& pos) const
|
||||
{
|
||||
assert(pos < 14);
|
||||
assert(pos < maxRegNum);
|
||||
using namespace Xbyak;
|
||||
const int *tbl = getOrderTbl();
|
||||
int r = tbl[pos++];
|
||||
|
Loading…
x
Reference in New Issue
Block a user