diff --git a/readme.md b/readme.md index 52f2d265..3b869796 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ -# Xbyak 5.73 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +# Xbyak 5.76 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract @@ -392,6 +392,11 @@ modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History +* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel +* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility +* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed +* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed +* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov * 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8) * 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) * 2018/Sep/04 ver 5.71 L() returns a new label instance diff --git a/readme.txt b/readme.txt index 7bdde284..e5042d5b 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.73 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.76 ----------------------------------------------------------------------------- ◎概要 @@ -373,6 +373,11 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel) +2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元 +2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す +2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除 +2018/10/15 util::StackFrameでmovの代わりにpush/popを使う 2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整 2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) 2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加 diff --git a/sample/bf.cpp b/sample/bf.cpp index 6968920d..20a0fd96 100644 --- a/sample/bf.cpp +++ b/sample/bf.cpp @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) Brainfuck bf(ifs); if (mode == 0) { static int stack[128 * 1024]; - bf.getCode()(Xbyak::CastTo(putchar), Xbyak::CastTo(getchar), stack); + bf.getCode()(reinterpret_cast(putchar), reinterpret_cast(getchar), stack); } else { dump(bf.getCode(), bf.getSize()); } diff --git a/sample/static_buf.cpp b/sample/static_buf.cpp index 7cf8038d..0a8ff571 100644 --- a/sample/static_buf.cpp +++ b/sample/static_buf.cpp @@ -32,7 +32,7 @@ struct Code : Xbyak::CodeGenerator { inline int add(int a, int b) { - return Xbyak::CastTo(buf)(a, b); + return reinterpret_cast(buf)(a, b); } int main() diff --git a/sample/test0.cpp b/sample/test0.cpp index cd19e484..5a4d91ba 100644 --- a/sample/test0.cpp +++ b/sample/test0.cpp @@ -77,7 +77,7 @@ public: #ifdef XBYAK_VARIADIC_TEMPLATE call(atoi); #else - call(Xbyak::CastTo(atoi)); + call(reinterpret_cast(atoi)); #endif add(esp, 4); #endif @@ -96,7 +96,7 @@ public: mov(rax, (size_t)atoi); jmp(rax); #else - jmp(Xbyak::CastTo(atoi)); + jmp(reinterpret_cast(atoi)); #endif } int (*get() const)(const char *) { return getCode(); } @@ -171,8 +171,9 @@ int main() return 1; } int (*func)(int) = s.getCode(); - if (Xbyak::CastTo(func) != p) { - fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo(func)); + const uint8 *funcp = reinterpret_cast(func); + if (funcp != p) { + fprintf(stderr, "internal error %p %p\n", p, funcp); return 1; } printf("0 + ... + %d = %d\n", 100, func(100)); diff --git a/sample/test_util.cpp b/sample/test_util.cpp index 9b199353..d75a5e06 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp @@ -104,9 +104,12 @@ void putCPUinfo() Core i7-3930K 6 2D */ cpu.putFamily(); + if (!cpu.has(Cpu::tINTEL)) return; for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) { printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i)); } + printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel)); + printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel)); } int main() diff --git a/sample/toyvm.cpp b/sample/toyvm.cpp index 4dedad47..cd869ea3 100644 --- a/sample/toyvm.cpp +++ b/sample/toyvm.cpp @@ -204,7 +204,7 @@ public: push(reg[r]); push('A' + r); push((int)str); - call(Xbyak::CastTo(printf)); + call(reinterpret_cast(printf)); add(esp, 4 * 4); pop(ecx); pop(edx); diff --git a/test/jmp.cpp b/test/jmp.cpp index 6a3b4616..9fe8ff69 100644 --- a/test/jmp.cpp +++ b/test/jmp.cpp @@ -1016,9 +1016,9 @@ struct GetAddressCode1 : Xbyak::CodeGenerator { }; struct CodeLabelTable : Xbyak::CodeGenerator { - static const int ret0 = 3; - static const int ret1 = 5; - static const int ret2 = 8; + enum { ret0 = 3 }; + enum { ret1 = 5 }; + enum { ret2 = 8 }; CodeLabelTable() { using namespace Xbyak; @@ -1225,3 +1225,48 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf) code.setProtectModeRW(); } #endif + +struct ReleaseTestCode : Xbyak::CodeGenerator { + ReleaseTestCode(Label& L1, Label& L2, Label& L3) + { + L(L1); + jmp(L1); + L(L2); + jmp(L3); // not assigned + } +}; + +/* + code must unlink label if code is destroyed +*/ +CYBOZU_TEST_AUTO(release_label_after_code) +{ + puts("---"); + { + Label L1, L2, L3, L4, L5; + { + ReleaseTestCode code(L1, L2, L3); + CYBOZU_TEST_ASSERT(L1.getId() > 0); + CYBOZU_TEST_ASSERT(L1.getAddress() != 0); + CYBOZU_TEST_ASSERT(L2.getId() > 0); + CYBOZU_TEST_ASSERT(L2.getAddress() != 0); + CYBOZU_TEST_ASSERT(L3.getId() > 0); + CYBOZU_TEST_ASSERT(L3.getAddress() == 0); // L3 is not assigned + code.assignL(L4, L1); + L5 = L1; + printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId()); + } + puts("code is released"); + CYBOZU_TEST_ASSERT(L1.getId() == 0); + CYBOZU_TEST_ASSERT(L1.getAddress() == 0); + CYBOZU_TEST_ASSERT(L2.getId() == 0); + CYBOZU_TEST_ASSERT(L2.getAddress() == 0); +// CYBOZU_TEST_ASSERT(L3.getId() == 0); // L3 is not assigned so not cleared + CYBOZU_TEST_ASSERT(L3.getAddress() == 0); + CYBOZU_TEST_ASSERT(L4.getId() == 0); + CYBOZU_TEST_ASSERT(L4.getAddress() == 0); + CYBOZU_TEST_ASSERT(L5.getId() == 0); + CYBOZU_TEST_ASSERT(L5.getAddress() == 0); + printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId()); + } +} diff --git a/test/sf_test.cpp b/test/sf_test.cpp index 84a903f0..286ecd1a 100644 --- a/test/sf_test.cpp +++ b/test/sf_test.cpp @@ -129,6 +129,55 @@ struct Code : public Xbyak::CodeGenerator { add(rax, sf.p[2]); add(rax, sf.p[3]); } + + /* + int64_t f(const int64_t a[13]) { return sum-of-a[]; } + */ + void gen13() + { + StackFrame sf(this, 1, 13); + for (int i = 0; i < 13; i++) { + mov(sf.t[i], ptr[sf.p[0] + i * 8]); + } + mov(rax, sf.t[0]); + for (int i = 1; i < 13; i++) { + add(rax, sf.t[i]); + } + } + /* + same as gen13 + */ + void gen14() + { + StackFrame sf(this, 1, 11 | UseRCX | UseRDX); + Pack t = sf.t; + t.append(rcx); + t.append(rdx); + for (int i = 0; i < 13; i++) { + mov(t[i], ptr[sf.p[0] + i * 8]); + } + mov(rax, t[0]); + for (int i = 1; i < 13; i++) { + add(rax, t[i]); + } + } + /* + return (1 << 15) - 1; + */ + void gen15() + { + StackFrame sf(this, 0, 14, 8); + Pack t = sf.t; + t.append(rax); + for (int i = 0; i < 15; i++) { + mov(t[i], 1 << i); + } + mov(qword[rsp], 0); + for (int i = 0; i < 15; i++) { + add(ptr[rsp], t[i]); + } + mov(rax, ptr[rsp]); + } }; struct Code2 : Xbyak::CodeGenerator { @@ -152,8 +201,14 @@ struct Code2 : Xbyak::CodeGenerator { add(rax, sf.p[i]); } } + void gen2(int pNum, int tNum, int stackSizeByte) + { + StackFrame sf(this, pNum, tNum, stackSizeByte); + mov(rax, rsp); + } }; + static int errNum = 0; void check(int x, int y) { @@ -167,19 +222,19 @@ void verify(const Xbyak::uint8 *f, int pNum) { switch (pNum) { case 0: - check(1, Xbyak::CastTo(f)()); + check(1, reinterpret_cast(f)()); return; case 1: - check(11, Xbyak::CastTo(f)(10)); + check(11, reinterpret_cast(f)(10)); return; case 2: - check(111, Xbyak::CastTo(f)(10, 100)); + check(111, reinterpret_cast(f)(10, 100)); return; case 3: - check(1111, Xbyak::CastTo(f)(10, 100, 1000)); + check(1111, reinterpret_cast(f)(10, 100, 1000)); return; case 4: - check(11111, Xbyak::CastTo(f)(10, 100, 1000, 10000)); + check(11111, reinterpret_cast(f)(10, 100, 1000, 10000)); return; default: printf("ERR pNum=%d\n", pNum); @@ -212,6 +267,15 @@ void testAll() const Xbyak::uint8 *f = code.getCurr(); code.gen(pNum, tNum | opt, stackSize); verify(f, pNum); + /* + check rsp is 16-byte aligned if stackSize > 0 + */ + if (stackSize > 0) { + Code2 c2; + c2.gen2(pNum, tNum | opt, stackSize); + uint64_t addr = c2.getCode()(); + check(addr % 16, 0); + } } } } @@ -268,6 +332,20 @@ void testPartial() int (*f12)(int, int, int, int) = code.getCurr(); code.gen12(); check(24, f12(3, 5, 7, 9)); + + { + int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; + int64_t (*f13)(const int64_t*) = code.getCurr(); + code.gen13(); + check(91, f13(tbl)); + + int64_t (*f14)(const int64_t*) = code.getCurr(); + code.gen14(); + check(91, f14(tbl)); + } + int (*f15)() = code.getCurr(); + code.gen15(); + check((1 << 15) - 1, f15()); } void put(const Xbyak::util::Pack& p) diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 42974e38..336e9b38 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -40,6 +40,8 @@ // This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft. #if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\ ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__))) + #include + #define XBYAK_STD_UNORDERED_SET std::unordered_set #include #define XBYAK_STD_UNORDERED_MAP std::unordered_map #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap @@ -49,16 +51,22 @@ libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version). */ #elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__) + #include + #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set #include #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600) + #include + #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set #include #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap #else + #include + #define XBYAK_STD_UNORDERED_SET std::set #include #define XBYAK_STD_UNORDERED_MAP std::map #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap @@ -105,7 +113,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5730 /* 0xABCD = A.BC(D) */ + VERSION = 0x5760 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -178,7 +186,8 @@ enum { ERR_INVALID_ZERO, ERR_INVALID_RIP_IN_AUTO_GROW, ERR_INVALID_MIB_ADDRESS, - ERR_INTERNAL + ERR_INTERNAL, + ERR_X2APIC_IS_NOT_SUPPORTED }; class Error : public std::exception { @@ -240,6 +249,7 @@ public: "invalid rip in AutoGrow", "invalid mib address", "internal error", + "x2APIC is not supported" }; assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); return errTbl[err_]; @@ -617,6 +627,12 @@ struct RegRip { const Label* label_; bool isAddr_; explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} + friend const RegRip operator+(const RegRip& r, int disp) { + return RegRip(r.disp_ + disp, r.label_, r.isAddr_); + } + friend const RegRip operator-(const RegRip& r, int disp) { + return RegRip(r.disp_ - disp, r.label_, r.isAddr_); + } friend const RegRip operator+(const RegRip& r, sint64 disp) { return RegRip(r.disp_ + disp, r.label_, r.isAddr_); } @@ -919,10 +935,10 @@ public: void dq(uint64 code) { db(code, 8); } const uint8 *getCode() const { return top_; } template - const F getCode() const { return CastTo(top_); } + const F getCode() const { return reinterpret_cast(top_); } const uint8 *getCurr() const { return &top_[size_]; } template - const F getCurr() const { return CastTo(&top_[size_]); } + const F getCurr() const { return reinterpret_cast(&top_[size_]); } size_t getSize() const { return size_; } void setSize(size_t size) { @@ -1128,6 +1144,7 @@ public: Label(const Label& rhs); Label& operator=(const Label& rhs); ~Label(); + void clear() { mgr = 0; id = 0; } int getId() const { return id; } const uint8 *getAddress() const; @@ -1166,6 +1183,7 @@ class LabelManager { }; typedef XBYAK_STD_UNORDERED_MAP ClabelDefList; typedef XBYAK_STD_UNORDERED_MULTIMAP ClabelUndefList; + typedef XBYAK_STD_UNORDERED_SET LabelPtrList; CodeArray *base_; // global : stateList_.front(), local : stateList_.back() @@ -1173,6 +1191,7 @@ class LabelManager { mutable int labelId_; ClabelDefList clabelDefList_; ClabelUndefList clabelUndefList_; + LabelPtrList labelPtrList_; int getId(const Label& label) const { @@ -1221,9 +1240,14 @@ class LabelManager { return true; } friend class Label; - void incRefCount(int id) { clabelDefList_[id].refCount++; } - void decRefCount(int id) + void incRefCount(int id, Label *label) { + clabelDefList_[id].refCount++; + labelPtrList_.insert(label); + } + void decRefCount(int id, Label *label) + { + labelPtrList_.erase(label); ClabelDefList::iterator i = clabelDefList_.find(id); if (i == clabelDefList_.end()) return; if (i->second.refCount == 1) { @@ -1242,11 +1266,23 @@ class LabelManager { #endif return !list.empty(); } + // detach all labels linked to LabelManager + void resetLabelPtrList() + { + for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) { + (*i)->clear(); + } + labelPtrList_.clear(); + } public: LabelManager() { reset(); } + ~LabelManager() + { + resetLabelPtrList(); + } void reset() { base_ = 0; @@ -1256,6 +1292,7 @@ public: stateList_.push_back(SlabelState()); clabelDefList_.clear(); clabelUndefList_.clear(); + resetLabelPtrList(); } void enterLocal() { @@ -1288,10 +1325,11 @@ public: SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); define_inner(st.defList, st.undefList, label, base_->getSize()); } - void defineClabel(const Label& label) + void defineClabel(Label& label) { define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize()); label.mgr = this; + labelPtrList_.insert(&label); } void assign(Label& dst, const Label& src) { @@ -1299,6 +1337,7 @@ public: if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L); define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); dst.mgr = this; + labelPtrList_.insert(&dst); } bool getOffset(size_t *offset, std::string& label) const { @@ -1346,19 +1385,19 @@ inline Label::Label(const Label& rhs) { id = rhs.id; mgr = rhs.mgr; - if (mgr) mgr->incRefCount(id); + if (mgr) mgr->incRefCount(id, this); } inline Label& Label::operator=(const Label& rhs) { if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L); id = rhs.id; mgr = rhs.mgr; - if (mgr) mgr->incRefCount(id); + if (mgr) mgr->incRefCount(id, this); return *this; } inline Label::~Label() { - if (id && mgr) mgr->decRefCount(id); + if (id && mgr) mgr->decRefCount(id, this); } inline const uint8* Label::getAddress() const { @@ -2162,7 +2201,7 @@ public: const Segment es, cs, ss, ds, fs, gs; #endif void L(const std::string& label) { labelMgr_.defineSlabel(label); } - void L(const Label& label) { labelMgr_.defineClabel(label); } + void L(Label& label) { labelMgr_.defineClabel(label); } Label L() { Label label; L(label); return label; } void inLocalLabel() { labelMgr_.enterLocal(); } void outLocalLabel() { labelMgr_.leaveLocal(); } @@ -2194,7 +2233,7 @@ public: // call(function pointer) #ifdef XBYAK_VARIADIC_TEMPLATE template - void call(Ret(*func)(Params...)) { call(CastTo(func)); } + void call(Ret(*func)(Params...)) { call(reinterpret_cast(func)); } #endif void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); } diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 92a31f9a..774b8c4e 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.73"; } +const char *getVersionString() const { return "5.76"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 0f6aada0..b8b37c51 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -50,11 +50,21 @@ namespace Xbyak { namespace util { +typedef enum { + SmtLevel = 1, + CoreLevel = 2 +} IntelCpuTopologyLevel; + /** CPU detection class */ class Cpu { uint64 type_; + //system topology + bool x2APIC_supported_; + static const size_t maxTopologyLevels = 2; + unsigned int numCores_[maxTopologyLevels]; + unsigned int get32bitAsBE(const char *x) const { return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24); @@ -88,6 +98,42 @@ class Cpu { { return (val >> base) & ((1u << (end - base)) - 1); } + void setNumCores() + { + if ((type_ & tINTEL) == 0) return; + + unsigned int data[4]; + + /* CAUTION: These numbers are configuration as shipped by Intel. */ + getCpuidEx(0x0, 0, data); + if (data[0] >= 0xB) { + /* + if leaf 11 exists(x2APIC is supported), + we use it to get the number of smt cores and cores on socket + + leaf 0xB can be zeroed-out by a hypervisor + */ + x2APIC_supported_ = true; + for (unsigned int i = 0; i < maxTopologyLevels; i++) { + getCpuidEx(0xB, i, data); + IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15); + if (level == SmtLevel || level == CoreLevel) { + numCores_[level - 1] = extractBit(data[1], 0, 15); + } + } + if (numCores_[SmtLevel - 1] != 0) { + numCores_[CoreLevel - 1] /= numCores_[SmtLevel - 1]; + } + } else { + /* + Failed to deremine num of cores without x2APIC support. + TODO: USE initial APIC ID to determine ncores. + */ + numCores_[SmtLevel - 1] = 0; + numCores_[CoreLevel - 1] = 0; + } + + } void setCacheHierarchy() { if ((type_ & tINTEL) == 0) return; @@ -96,21 +142,12 @@ class Cpu { // const unsigned int INSTRUCTION_CACHE = 2; const unsigned int UNIFIED_CACHE = 3; unsigned int smt_width = 0; - unsigned int n_cores = 0; + unsigned int logical_cores = 0; unsigned int data[4]; - /* - if leaf 11 exists, we use it to get the number of smt cores and cores on socket - If x2APIC is supported, these are the only correct numbers. - - leaf 0xB can be zeroed-out by a hypervisor - */ - getCpuidEx(0x0, 0, data); - if (data[0] >= 0xB) { - getCpuidEx(0xB, 0, data); // CPUID for SMT Level - smt_width = data[1] & 0x7FFF; - getCpuidEx(0xB, 1, data); // CPUID for CORE Level - n_cores = data[1] & 0x7FFF; + if (x2APIC_supported_) { + smt_width = numCores_[0]; + logical_cores = numCores_[1]; } /* @@ -118,7 +155,7 @@ class Cpu { the first level of data cache is not shared (which is the case for every existing architecture) and use this to determine the SMT width for arch not supporting leaf 11. - when leaf 4 reports a number of core less than n_cores + when leaf 4 reports a number of core less than numCores_ on socket reported by leaf 11, then it is a correct number of cores not an upperbound. */ @@ -127,19 +164,19 @@ class Cpu { unsigned int cacheType = extractBit(data[0], 0, 4); if (cacheType == NO_CACHE) break; if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) { - unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1; - if (n_cores != 0) { // true only if leaf 0xB is supported and valid - nb_logical_cores = (std::min)(nb_logical_cores, n_cores); + unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1; + if (logical_cores != 0) { // true only if leaf 0xB is supported and valid + actual_logical_cores = (std::min)(actual_logical_cores, logical_cores); } - assert(nb_logical_cores != 0); + assert(actual_logical_cores != 0); data_cache_size[data_cache_levels] = (extractBit(data[1], 22, 31) + 1) * (extractBit(data[1], 12, 21) + 1) * (extractBit(data[1], 0, 11) + 1) * (data[2] + 1); - if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores; + if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores; assert(smt_width != 0); - cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u); + cores_sharing_data_cache[data_cache_levels] = (std::max)(actual_logical_cores / smt_width, 1u); data_cache_levels++; } } @@ -160,6 +197,12 @@ public: unsigned int cores_sharing_data_cache[maxNumberCacheLevels]; unsigned int data_cache_levels; + unsigned int getNumCores(IntelCpuTopologyLevel level) { + if (level != SmtLevel && level != CoreLevel) throw Error(ERR_BAD_PARAMETER); + if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED); + return numCores_[level - 1]; + } + unsigned int getDataCacheLevels() const { return data_cache_levels; } unsigned int getCoresSharingDataCache(unsigned int i) const { @@ -271,6 +314,8 @@ public: Cpu() : type_(NONE) + , x2APIC_supported_(false) + , numCores_() , data_cache_levels(0) { unsigned int data[4]; @@ -363,6 +408,7 @@ public: if (ECX & (1U << 0)) type_ |= tPREFETCHWT1; } setFamily(); + setNumCores(); setCacheHierarchy(); } void putFamily() const @@ -416,7 +462,7 @@ const int UseRCX = 1 << 6; const int UseRDX = 1 << 7; class Pack { - static const size_t maxTblNum = 10; + static const size_t maxTblNum = 15; const Xbyak::Reg64 *tbl_[maxTblNum]; size_t n_; public: @@ -476,7 +522,7 @@ public: const Xbyak::Reg64& operator[](size_t n) const { if (n >= n_) { - fprintf(stderr, "ERR Pack bad n=%d\n", (int)n); + fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_); throw Error(ERR_BAD_PARAMETER); } return *tbl_[n]; @@ -518,6 +564,7 @@ class StackFrame { static const int rcxPos = 3; static const int rdxPos = 2; #endif + static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax Xbyak::CodeGenerator *code_; int pNum_; int tNum_; @@ -527,7 +574,7 @@ class StackFrame { int P_; bool makeEpilog_; Xbyak::Reg64 pTbl_[4]; - Xbyak::Reg64 tTbl_[10]; + Xbyak::Reg64 tTbl_[maxRegNum]; Pack p_; Pack t_; StackFrame(const StackFrame&); @@ -539,7 +586,7 @@ public: make stack frame @param sf [in] this @param pNum [in] num of function parameter(0 <= pNum <= 4) - @param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX) + @param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14 @param stackSizeByte [in] local stack size @param makeEpilog [in] automatically call close() if true @@ -566,27 +613,17 @@ public: using namespace Xbyak; if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM); const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); - if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM); + if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM); const Reg64& _rsp = code->rsp; - const AddressFrame& _ptr = code->ptr; saveNum_ = (std::max)(0, allRegNum - noSaveNum); const int *tbl = getOrderTbl() + noSaveNum; - P_ = saveNum_ + (stackSizeByte + 7) / 8; - if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment + for (int i = 0; i < saveNum_; i++) { + code->push(Reg64(tbl[i])); + } + P_ = (stackSizeByte + 7) / 8; + if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment P_ *= 8; if (P_ > 0) code->sub(_rsp, P_); -#ifdef XBYAK64_WIN - for (int i = 0; i < (std::min)(saveNum_, 4); i++) { - code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i])); - } - for (int i = 4; i < saveNum_; i++) { - code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); - } -#else - for (int i = 0; i < saveNum_; i++) { - code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); - } -#endif int pos = 0; for (int i = 0; i < pNum; i++) { pTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); @@ -607,21 +644,11 @@ public: { using namespace Xbyak; const Reg64& _rsp = code_->rsp; - const AddressFrame& _ptr = code_->ptr; const int *tbl = getOrderTbl() + noSaveNum; -#ifdef XBYAK64_WIN - for (int i = 0; i < (std::min)(saveNum_, 4); i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]); - } - for (int i = 4; i < saveNum_; i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); - } -#else - for (int i = 0; i < saveNum_; i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); - } -#endif if (P_ > 0) code_->add(_rsp, P_); + for (int i = 0; i < saveNum_; i++) { + code_->pop(Reg64(tbl[saveNum_ - 1 - i])); + } if (callRet) code_->ret(); } @@ -633,9 +660,6 @@ public: } catch (std::exception& e) { printf("ERR:StackFrame %s\n", e.what()); exit(1); - } catch (...) { - printf("ERR:StackFrame otherwise\n"); - exit(1); } } private: @@ -654,7 +678,7 @@ private: } int getRegIdx(int& pos) const { - assert(pos < 14); + assert(pos < maxRegNum); using namespace Xbyak; const int *tbl = getOrderTbl(); int r = tbl[pos++];