Update Xbyak to 5.71

Merge commit 'f7c26e9f7ace572f440b80b0e71625295755c38b'
This commit is contained in:
Lioncash 2018-09-08 16:52:55 -04:00
commit d17599af40
No known key found for this signature in database
GPG Key ID: 4E3C3CC1031BA9C7
9 changed files with 505 additions and 241 deletions

View File

@ -1,107 +1,121 @@
Xbyak 5.67 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
=============
# Xbyak 5.71 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
Abstract
-------------
## Abstract
This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
Feature
-------------
header file only
you can use Xbyak's functions at once if xbyak.h is included.
## Feature
* header file only
* Intel/MASM like syntax
* fully support AVX-512
### Supported Instructions Sets
**Note**: Xbyak uses and(), or(), xor(), not() functions, so `-fno-operator-names` option is necessary for gcc/clang.
MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(*partial*)/AVX/AVX2/FMA/VEX-encoded GPR/AVX-512
Or define `XBYAK_NO_OP_NAMES` before including `xbyak.h` and use and_(), or_(), xor_(), not_() instead of them.
and_(), or_(), xor_(), not_() are always available.
`XBYAK_NO_OP_NAMES` will be defined in the feature version.
### Supported OS
* Windows Xp, Vista, Windows 7(32bit, 64bit)
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
* Linux(32bit, 64bit)
* Intel Mac OSX
* Intel macOS
### Supported Compilers
* Visual Studio C++ VC2012 or later
* gcc 4.7 or later
* clang 3.3
* cygwin gcc 4.5.3
* icc 7.2
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
>Note: Xbyak uses and(), or(), xor(), not() functions, so "-fno-operator-names" option is required on gcc.
Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_() instead of them.
and_(), or_(), xor_(), not_() are always available.
## Install
Install
-------------
The following files are necessary. Please add the path to your compile directories.
The following files are necessary. Please add the path to your compile directory.
* xbyak.h
* xbyak_mnemonic.h
* xbyak_util.h
Linux:
```
make install
```
make install
These files are copied into `/usr/local/include/xbyak`.
These files are copied into /usr/local/include/xbyak
## How to use it
New Feature
-------------
Inherit `Xbyak::CodeGenerator` class and make the class method.
```
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
Add support for AVX-512 instruction set.
struct Code : Xbyak::CodeGenerator {
Code(int x)
{
mov(eax, x);
ret();
}
};
```
Make an instance of the class and get the function
pointer by calling `getCode()` and call it.
```
Code c(5);
int (*f)() = c.getCode<int (*)()>();
printf("ret=%d\n", f()); // ret = 5
```
Syntax
-------------
Make Xbyak::CodeGenerator and make the class method and get the function
pointer by calling cgetCode() and casting the return value.
NASM Xbyak
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
### Addressing
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only
NASM Xbyak
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
test byte [esp], 4 --> test (byte [esp], 4);
How to use Selector(Segment Register)
>Note: Segment class is not derived from Operand.
## Syntax
Similar to MASM/NASM syntax with parentheses.
```
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
NASM Xbyak
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
```
## Addressing
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
otherwise use `ptr`.
```
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only
NASM Xbyak
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
test byte [esp], 4 --> test(byte [esp], 4);
inc qword [rax] --> inc(qword [rax]);
```
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
### How to use Selector (Segment Register)
```
mov eax, [fs:eax] --> putSeg(fs);
mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs);
```
**Note**: Segment class is not derived from `Operand`.
>you can use ptr for almost memory access unless you specify the size of memory.
## AVX
>dword, word and byte are member variables, then don't use dword as unsigned int, for example.
### AVX
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp+123+xmm2*4], xmm3);
*Remark*
The omitted destination syntax as the following ss disabled.
```
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
```
define `XBYAK_ENABLE_OMITTED_OPERAND` if you use it for backward compatibility.
**Note**:
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
But the newer version will not support it.
```
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
```
### AVX-512
## AVX-512
```
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
@ -130,97 +144,122 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5)
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
```
Remark
* k1, ..., k7 are new opmask registers.
### Remark
* `k1`, ..., `k7` are opmask registers.
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
* `k4 | k3` is different from `k3 | k4`.
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
* specify xword/yword/zword(_b) for m128/m256/m512 if necessary.
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
### Label
## Label
Two kinds of Label are supported. (String literal and Label class).
L("L1");
jmp ("L1");
### String literal
```
L("L1");
jmp("L1");
jmp ("L2");
...
a few mnemonics(8-bit displacement jmp)
...
L("L2");
jmp("L2");
...
a few mnemonics (8-bit displacement jmp)
...
L("L2");
jmp ("L3", T_NEAR);
...
a lot of mnemonics(32-bit displacement jmp)
...
L("L3");
jmp("L3", T_NEAR);
...
a lot of mnemonics (32-bit displacement jmp)
...
L("L3");
```
>Call hasUndefinedLabel() to verify your code has no undefined label.
> you can use a label for immediate value of mov like as mov (eax, "L2");
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
#### 1. support @@, @f, @b like MASM
### Support `@@`, `@f`, `@b` like MASM
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
mov(eax, "@b");
jmp(eax); // jmp to <B>
```
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
mov(eax, "@b");
jmp(eax); // jmp to <B>
```
#### 2. localization of label by calling inLocalLabel(), outLocallabel().
### Local label
labels begining of period between inLocalLabel() and outLocalLabel()
are dealed with local label.
inLocalLabel() and outLocalLabel() can be nested.
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
are treated as a local label.
`inLocalLabel()` and `outLocalLabel()` can be nested.
void func1()
{
inLocalLabel();
L(".lp"); // <A> ; local label
...
jmp(".lp"); // jmpt to <A>
L("aaa"); // global label
outLocalLabel();
}
```
void func1()
{
inLocalLabel();
L(".lp"); // <A> ; local label
...
jmp(".lp"); // jmp to <A>
L("aaa"); // global label <C>
outLocalLabel();
void func2()
{
inLocalLabel();
L(".lp"); // <B> ; local label
func1();
jmp(".lp"); // jmp to <B>
inLocalLabel();
}
inLocalLabel();
L(".lp"); // <B> ; local label
func1();
jmp(".lp"); // jmp to <B>
inLocalLabel();
jmp("aaa"); // jmp to <C>
}
```
### Label class
L() and jxx() functions support a new Label class.
`L()` and `jxx()` support Label class.
Label label1, label2;
L(label1);
...
jmp(label1);
...
jmp(label2);
...
L(label2);
```
Xbyak::Label label1, label2;
L(label1);
...
jmp(label1);
...
jmp(label2);
...
L(label2);
```
Moreover, assignL(dstLabel, srcLabel) method binds dstLabel with srcLabel.
Use `putL` for jmp table
```
Label labelTbl, L0, L1, L2;
mov(rax, labelTbl);
// rdx is an index of jump table
jmp(ptr [rax + rdx * sizeof(void*)]);
L(labelTbl);
putL(L0);
putL(L1);
putL(L2);
L(L0);
....
L(L1);
....
```
Label label1, label2;
L(label1);
...
jmp(label2);
...
assignL(label2, label1); // label2 <= label1
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
The above jmp opecode jumps label1.
```
Label label2;
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
...
jmp(label2); // label2 is not determined here
...
assignL(label2, label1); // label2 <- label1
```
The `jmp` in the above code jumps to label1 assigned by `assignL`.
* Restriction:
* srcLabel must be used in L().
* dstLabel must not be used in L().
**Note**:
* srcLabel must be used in `L()`.
* dstLabel must not be used in `L()`.
Label::getAddress() returns the address specified by the label instance and 0 if not specified.
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
```
// not AutoGrow mode
Label label;
@ -229,7 +268,7 @@ L(label);
assert(label.getAddress() == getCurr());
```
### Rip
### Rip ; relative addressing
```
Label label;
mov(eax, ptr [rip + label]); // eax = 4
@ -243,92 +282,119 @@ int x;
...
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
```
### Code size
The default max code size is 4096 bytes. Please set it in constructor of CodeGenerator() if you want to use large size.
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
## Code size
The default max code size is 4096 bytes.
Specify the size in constructor of `CodeGenerator()` if necessary.
### use user allocated memory
```
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
```
## User allocated memory
You can make jit code on prepaired memory.
class Sample : public Xbyak::CodeGenerator {
public:
Sample(void *userPtr, size_t size)
: Xbyak::CodeGenerator(size, userPtr)
{
...
}
};
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
const size_t codeSize = 1024;
uint8 buf[codeSize + 16];
// get 16-byte aligned address
uint8 *p = Xbyak::CodeArray::getAlignedAddress(buf);
// append executable attribute to the memory
Xbyak::CodeArray::protect(p, codeSize, true);
// construct your jit code on the memory
Sample s(p, codeSize);
>See *sample/test0.cpp*
AutoGrow
-------------
Under `AutoGrow` mode, Xbyak extends memory automatically if necessary.
Call ready() before calling getCode() to calc address of jmp.
```
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
{
...
}
};
uint8_t alignas(4096) buf[8192]; // C++11 or later
struct Code : Xbyak::CodeGenerator {
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
{
mov(rax, 123);
ret();
}
};
int main()
{
Code c;
c.ready(); // Don't forget to call this function
c.setProtectModeRE(); // set memory to Read/Exec
printf("%d\n", c.getCode<int(*)()>()());
}
```
>Don't use the address returned by getCurr() before calling ready().
>It may be invalid address.
>RESTRICTION : rip addressing is not supported in AutoGrow
Macro
-------------
**Note**: See [sample/test0.cpp](sample/test0.cpp).
### AutoGrow
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
```
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
{
...
}
};
Code c;
// generate code for jit
c.ready(); // mode = Read/Write/Exec
```
**Note**:
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
### Read/Exec mode
Xbyak set Read/Write/Exec mode to memory to run jit code.
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
call `setProtectModeRE()` after generating jit code.
```
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
{
mov(eax, 123);
ret();
}
};
Code c;
c.setProtectModeRE();
...
```
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
See [protect-re.cpp](sample/protect-re.cpp).
## Macro
* **XBYAK32** is defined on 32bit.
* **XBYAK64** is defined on 64bit.
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names`
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(duplicated in the future)
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
Sample
-------------
## Sample
* test0.cpp ; tiny sample of Xbyak(x86, x64)
* quantize.cpp ; JIT optimized quantization by fast division(x86 only)
* calc.cpp ; assemble and estimate a given polynomial(x86, x64)
* bf.cpp ; JIT brainfuck(x86, x64)
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
License
-------------
## License
modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
History
-------------
## History
* 2018/Sep/04 ver 5.71 L() returns a new label instance
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
* 2018/Jul/26 ver 5.661 support mingw64
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
@ -392,8 +458,7 @@ History
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm).
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
@ -453,8 +518,6 @@ History
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
* 2007/Jan/4 first version
Author
-------------
## Author
MITSUNARI Shigeo(herumi@nifty.com)

View File

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.67
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.71
-----------------------------------------------------------------------------
◎概要
@ -245,8 +245,8 @@ void func2()
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
Label label1, label2;
L(label1);
Label label2;
Label label1 = L(); // Label label1; L(label1);と同じ意味
...
jmp(label2);
...
@ -309,6 +309,41 @@ bool CodeArray::protect(const void *addr, size_t size, bool canExec);
*/
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
・read/execモード
デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。
コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成
{
mov(eax, 123);
ret();
}
};
Code c;
c.setProtectModeRE(); // read/execモードに変更
// JIT領域を実行
AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成
{
mov(eax, 123);
ret();
}
};
Code c;
c.readyRE(); // read/exeモードに変更
// JIT領域を実行
setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。
その他詳細は各種サンプルを参照してください。
-----------------------------------------------------------------------------
◎マクロ
@ -338,6 +373,9 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday)
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
2018/07/26 ver 5.661 mingw64対応
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加

View File

@ -10,12 +10,6 @@
#endif
class Brainfuck : public Xbyak::CodeGenerator {
private:
enum Direction { B, F };
std::string toStr(int labelNo, Direction dir)
{
return Xbyak::Label::toStr(labelNo) + (dir == B ? 'B' : 'F');
}
public:
int getContinuousChar(std::istream& is, char c)
{
@ -67,8 +61,7 @@ public:
mov(pGetchar, rsi); // getchar
mov(stack, rdx); // stack
#endif
int labelNo = 0;
std::stack<int> keepLabelNo;
std::stack<Label> labelF, labelB;
char c;
while (is >> c) {
switch (c) {
@ -116,17 +109,22 @@ public:
mov(cur, eax);
break;
case '[':
L(toStr(labelNo, B));
mov(eax, cur);
test(eax, eax);
jz(toStr(labelNo, F), T_NEAR);
keepLabelNo.push(labelNo++);
{
Label B = L();
labelB.push(B);
mov(eax, cur);
test(eax, eax);
Label F;
jz(F, T_NEAR);
labelF.push(F);
}
break;
case ']':
{
int no = keepLabelNo.top(); keepLabelNo.pop();
jmp(toStr(no, B));
L(toStr(no, F));
Label B = labelB.top(); labelB.pop();
jmp(B);
Label F = labelF.top(); labelF.pop();
L(F);
}
break;
default:

70
externals/xbyak/sample/protect-re.cpp vendored Normal file
View File

@ -0,0 +1,70 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
struct Code1 : Xbyak::CodeGenerator {
Code1()
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
{
mov(eax, 123);
ret();
}
void update()
{
db(0);
}
};
void test1(bool updateCode)
{
Code1 c;
c.setProtectModeRE();
if (updateCode) c.update(); // segmentation fault
int (*f)() = c.getCode<int (*)()>();
printf("f=%d\n", f());
c.setProtectModeRW();
c.update();
puts("ok");
}
struct Code2 : Xbyak::CodeGenerator {
Code2()
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow)
{
mov(eax, 123);
ret();
}
void update()
{
db(0);
}
};
void test2(bool updateCode)
{
Code2 c;
c.readyRE();
if (updateCode) c.update(); // segmentation fault
int (*f)() = c.getCode<int (*)()>();
printf("f=%d\n", f());
c.setProtectModeRW();
c.update();
puts("ok");
}
int main(int argc, char *argv[])
{
if (argc < 2) {
fprintf(stderr, "%s <testNum> [update]\n", argv[0]);
return 0;
}
bool update = argc == 3;
int n = atoi(argv[1]);
printf("n=%d update=%d\n", n, update);
switch (n) {
case 1: test1(update); break;
case 2: test2(update); break;
default: fprintf(stderr, "no test %d\n", n); break;
}
}

View File

@ -889,6 +889,34 @@ CYBOZU_TEST_AUTO(testNewLabel)
}
}
CYBOZU_TEST_AUTO(returnLabel)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
xor_(eax, eax);
Label L1 = L();
test(eax, eax);
Label exit;
jnz(exit);
inc(eax); // 1
Label L2;
call(L2);
jmp(L1);
L(L2);
inc(eax); // 2
ret();
L(exit);
inc(eax); // 3
ret();
}
};
Code code;
int (*f)() = code.getCode<int (*)()>();
int r = f();
CYBOZU_TEST_EQUAL(r, 3);
}
CYBOZU_TEST_AUTO(testAssign)
{
struct Code : Xbyak::CodeGenerator {
@ -987,6 +1015,52 @@ struct GetAddressCode1 : Xbyak::CodeGenerator {
}
};
struct CodeLabelTable : Xbyak::CodeGenerator {
static const int ret0 = 3;
static const int ret1 = 5;
static const int ret2 = 8;
CodeLabelTable()
{
using namespace Xbyak;
#ifdef XBYAK64_WIN
const Reg64& p0 = rcx;
const Reg64& a = rax;
#elif defined (XBYAK64_GCC)
const Reg64& p0 = rdi;
const Reg64& a = rax;
#else
const Reg32& p0 = edx;
const Reg32& a = eax;
mov(edx, ptr [esp + 4]);
#endif
Label labelTbl, L0, L1, L2;
mov(a, labelTbl);
jmp(ptr [a + p0 * sizeof(void*)]);
L(labelTbl);
putL(L0);
putL(L1);
putL(L2);
L(L0);
mov(a, ret0);
ret();
L(L1);
mov(a, ret1);
ret();
L(L2);
mov(a, ret2);
ret();
}
};
CYBOZU_TEST_AUTO(LabelTable)
{
CodeLabelTable c;
int (*f)(int) = c.getCode<int (*)(int)>();
CYBOZU_TEST_EQUAL(f(0), c.ret0);
CYBOZU_TEST_EQUAL(f(1), c.ret1);
CYBOZU_TEST_EQUAL(f(2), c.ret2);
}
CYBOZU_TEST_AUTO(getAddress1)
{
GetAddressCode1 c;
@ -1143,11 +1217,11 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
ret();
}
} code;
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RE);
code.setProtectModeRE();
code.getCode<void (*)()>()();
CYBOZU_TEST_EQUAL(*x0, 123);
CYBOZU_TEST_EQUAL(*x1, 456);
CYBOZU_TEST_EQUAL(buf[8], 99);
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RW);
code.setProtectModeRW();
}
#endif

View File

@ -352,7 +352,8 @@ class Test {
case VM32Y_K:
return isXbyak_ ? "ptr [64+ymm13*2+r13] | k6" : "[64+ymm13*2+r13]{k6}";
case VM32Z_K:
return isXbyak_ ? "ptr [64+zmm13*2+r13] | k6" : "[64+zmm13*2+r13]{k6}";
if (idx & 1) return isXbyak_ ? "ptr [64+zmm10*8+r9] | k6" : "[64+zmm10*8+r9]{k6}";
return isXbyak_ ? "ptr [64+zmm30*2+r13] | k6" : "[64+zmm30*2+r13]{k6}";
case VM32Z:
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";

View File

@ -40,8 +40,8 @@ struct Code : Xbyak::CodeGenerator {
cmpss(xmm0, ptr[rip + label], 0);
test(dword[rip + label], 33);
bt(dword[rip + label ], 3);
vblendpd(xmm0, dword[rip + label], 3);
vpalignr(xmm0, qword[rip + label], 4);
vblendpd(xmm0, xmm0, dword[rip + label], 3);
vpalignr(xmm0, xmm0, qword[rip + label], 4);
vextractf128(dword[rip + label], ymm3, 12);
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
vcvtps2ph(ptr[rip + label], xmm2, 44);

View File

@ -105,7 +105,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5670 /* 0xABCD = A.BC(D) */
VERSION = 0x5710 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -786,6 +786,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
void *const AutoGrow = (void*)1; //-V566
void *const DontSetProtectRWE = (void*)2; //-V566
class CodeArray {
enum Type {
@ -825,6 +826,7 @@ protected:
size_t size_;
bool isCalledCalcJmpAddress_;
bool useProtect() const { return alloc_->useProtect(); }
/*
allocate new memory and copy old data to the new area
*/
@ -848,7 +850,6 @@ protected:
uint64 disp = i->getVal(top_);
rewrite(i->codeOffset, disp, i->jmpSize);
}
if (alloc_->useProtect() && !protect(top_, size_, PROTECT_RWE)) throw Error(ERR_CANT_PROTECT);
isCalledCalcJmpAddress_ = true;
}
public:
@ -858,7 +859,7 @@ public:
PROTECT_RE = 2 // read/exec
};
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
, maxSize_(maxSize)
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
@ -866,7 +867,7 @@ public:
, isCalledCalcJmpAddress_(false)
{
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, PROTECT_RWE)) {
if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
alloc_->free(top_);
throw Error(ERR_CANT_PROTECT);
}
@ -874,10 +875,19 @@ public:
virtual ~CodeArray()
{
if (isAllocType()) {
if (alloc_->useProtect()) protect(top_, maxSize_, PROTECT_RW);
if (useProtect()) setProtectModeRW(false);
alloc_->free(top_);
}
}
bool setProtectMode(ProtectMode mode, bool throwException = true)
{
bool isOK = protect(top_, maxSize_, mode);
if (isOK) return true;
if (throwException) throw Error(ERR_CANT_PROTECT);
return false;
}
bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
void resetSize()
{
size_ = 0;
@ -995,6 +1005,9 @@ public:
size_t pageSize = sysconf(_SC_PAGESIZE);
size_t iaddr = reinterpret_cast<size_t>(addr);
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
#ifndef NDEBUG
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
#endif
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
#else
return true;
@ -1500,7 +1513,7 @@ private:
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
return v;
}
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
{
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
int w = (type & T_EW1) ? 1 : 0;
@ -1543,7 +1556,7 @@ private:
}
}
}
bool Vp = !(v ? v->isExtIdx2() : 0);
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
db(0x62);
@ -1935,9 +1948,10 @@ private:
const Address& addr = op2.getAddress();
const RegExp& regExp = addr.getRegExp();
const Reg& base = regExp.getBase();
const Reg& index = regExp.getIndex();
if (BIT == 64 && addr.is32bit()) db(0x67);
int disp8N = 0;
bool x = regExp.getIndex().isExtIdx();
bool x = index.isExtIdx();
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
int aaa = addr.getOpmaskIdx();
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
@ -1946,8 +1960,8 @@ private:
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
b = true;
}
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
int VL = regExp.isVsib() ? index.getBit() : 0;
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
} else {
vex(r, base, p1, type, code, x);
}
@ -2148,6 +2162,7 @@ public:
#endif
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
void L(const Label& label) { labelMgr_.defineClabel(label); }
Label L() { Label label; L(label); return label; }
void inLocalLabel() { labelMgr_.enterLocal(); }
void outLocalLabel() { labelMgr_.leaveLocal(); }
/*
@ -2436,11 +2451,16 @@ public:
MUST call ready() to complete generating code if you use AutoGrow mode.
It is not necessary for the other mode if hasUndefinedLabel() is true.
*/
void ready()
void ready(ProtectMode mode = PROTECT_RWE)
{
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
if (isAutoGrow()) calcJmpAddress();
if (isAutoGrow()) {
calcJmpAddress();
if (useProtect()) setProtectMode(mode);
}
}
// set read/exec
void readyRE() { return ready(PROTECT_RE); }
#ifdef XBYAK_TEST
void dump(bool doClear = true)
{

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.67"; }
const char *getVersionString() const { return "5.71"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }