a64_emitter: Fix ABI push and pop

This commit is contained in:
SachinVin 2019-07-18 16:32:50 +01:00
parent ddc8b7f932
commit 0ce4fa4480
2 changed files with 74 additions and 62 deletions

View File

@ -1840,62 +1840,74 @@ bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2) {
return element;
}
void ARM64XEmitter::ABI_PushRegisters(std::bitset<32> registers) {
auto num_regs = registers.count();
s32 stack_size = static_cast<s32>((num_regs + (num_regs & 1)) * 8);
unsigned int it = 0;
void ARM64XEmitter::ABI_PushRegisters(u32 registers) {
int num_regs = Common::BitCount(registers);
int stack_size = (num_regs + (num_regs & 1)) * 8;
int it = 0;
std::array<ARM64Reg, 32> gpr{};
if (!num_regs)
return;
// 8 byte per register, but 16 byte alignment, so we may have to padd one
// register. Only update the SP on the last write to avoid the dependency
// between those stores.
for (int i = 0; i < 32; ++i) {
if (Common::Bit(i, registers)) {
gpr[it++] = static_cast<ARM64Reg>(X0 + i);
}
}
// The first push must adjust the SP, else a context switch may invalidate
// everything below SP.
// 8 byte per register, but 16 byte alignment, so we may have to padd one register.
// Only update the SP on the last write to avoid the dependency between those stores.
// The first push must adjust the SP, else a context switch may invalidate everything below SP.
it = 0;
if (num_regs & 1) {
STR(INDEX_PRE, static_cast<ARM64Reg>(X0 + registers[it++]), SP, -stack_size);
STR(INDEX_PRE, gpr[0], SP, -stack_size);
it++;
} else {
STP(INDEX_PRE, static_cast<ARM64Reg>(X0 + registers[it]),
static_cast<ARM64Reg>(X0 + registers[it + 1]), SP, -stack_size);
STP(INDEX_PRE, gpr[0], gpr[1], SP, -stack_size);
it += 2;
}
// Fast store for all other registers, this is always an even number.
// Fast store for all other registers, this is always an even number.
for (unsigned int i = 0; i < (num_regs - 1) / 2; i++) {
STP(INDEX_SIGNED, static_cast<ARM64Reg>(X0 + registers[it]),
static_cast<ARM64Reg>(X0 + registers[it + 1]), SP, 16 * (i + 1));
for (int i = 0; i < (num_regs - 1) / 2; i++) {
STP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1));
it += 2;
}
ASSERT_MSG(it == registers.count(), "%s registers don't match.", __func__);
ASSERT_MSG(it == num_regs, "%s registers don't match.", __func__);
}
void ARM64XEmitter::ABI_PopRegisters(std::bitset<32> registers) {
u32 num_regs = static_cast<u32>(registers.count());
void ARM64XEmitter::ABI_PopRegisters(u32 registers) {
u8 num_regs = static_cast<u8>(Common::BitCount(registers));
int stack_size = (num_regs + (num_regs & 1)) * 8;
unsigned int it = 0;
int it = 0;
std::array<ARM64Reg, 32> gpr{};
if (!num_regs)
return;
// We must adjust the SP in the end, so load the first (two) registers at
// least.
ARM64Reg first = static_cast<ARM64Reg>(X0 + registers[it++]);
ARM64Reg second = static_cast<ARM64Reg>(0);
for (int i = 0; i < 32; ++i) {
if (Common::Bit(i, registers)) {
gpr[it++] = static_cast<ARM64Reg>(X0 + i);
}
}
it = 0;
// We must adjust the SP in the end, so load the first (two) registers at least.
ARM64Reg first = gpr[it++];
ARM64Reg second = INVALID_REG;
if (!(num_regs & 1))
second = static_cast<ARM64Reg>(X0 + registers[it++]);
second = gpr[it++];
// 8 byte per register, but 16 byte alignment, so we may have to padd one
// register. Only update the SP on the last load to avoid the dependency
// between those loads.
// 8 byte per register, but 16 byte alignment, so we may have to padd one register.
// Only update the SP on the last load to avoid the dependency between those loads.
// Fast load for all but the first (two) registers, this is always an even
// number.
for (unsigned int i = 0; i < (num_regs - 1) / 2; i++) {
LDP(INDEX_SIGNED, static_cast<ARM64Reg>(X0 + registers[it]),
static_cast<ARM64Reg>(X0 + registers[it + 1]), SP, 16 * (i + 1));
// Fast load for all but the first (two) registers, this is always an even number.
for (int i = 0; i < (num_regs - 1) / 2; ++i) {
LDP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1));
it += 2;
}
@ -1905,7 +1917,7 @@ void ARM64XEmitter::ABI_PopRegisters(std::bitset<32> registers) {
else
LDP(INDEX_POST, first, second, SP, stack_size);
ASSERT_MSG(it == registers.count(), "%s registers don't match.", __func__);
ASSERT_MSG(it == num_regs, "%s registers don't match.", __func__);
}
// Float Emitter
@ -3334,15 +3346,15 @@ void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift) {
EncodeModImm(Q, op, cmode, 0, Rd, imm);
}
void ARM64FloatEmitter::ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tmp) {
void ARM64FloatEmitter::ABI_PushRegisters(u32 registers, ARM64Reg tmp) {
bool bundled_loadstore = false;
for (int i = 0; i < 32; ++i) {
if (!registers[i])
if (!Common::Bit(i, registers))
continue;
int count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {
while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
}
if (count > 1) {
bundled_loadstore = true;
@ -3351,28 +3363,28 @@ void ARM64FloatEmitter::ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tm
}
if (bundled_loadstore && tmp != INVALID_REG) {
u32 num_regs = static_cast<u32>(registers.count());
int num_regs = Common::BitCount(registers);
m_emit->SUB(SP, SP, num_regs * 16);
m_emit->ADD(tmp, SP, 0);
std::vector<ARM64Reg> island_regs;
for (int i = 0; i < 32; ++i) {
if (!registers[i])
if (!Common::Bit(i, registers))
continue;
u8 count = 0;
int count = 0;
// 0 = true
// 1 < 4 && registers[i + 1] true!
// 2 < 4 && registers[i + 2] true!
// 3 < 4 && registers[i + 3] true!
// 4 < 4 && registers[i + 4] false!
while (++count < 4 && (i + count) < 32 && registers[i + count]) {
while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
}
if (count == 1)
island_regs.push_back(static_cast<ARM64Reg>(Q0 + i));
island_regs.push_back((ARM64Reg)(Q0 + i));
else
ST1(64, count, INDEX_POST, static_cast<ARM64Reg>(Q0 + i), tmp);
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
i += count - 1;
}
@ -3390,28 +3402,29 @@ void ARM64FloatEmitter::ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tm
STR(128, INDEX_POST, pair_regs[0], tmp, 16);
} else {
std::vector<ARM64Reg> pair_regs;
for (size_t i = 0; i < registers.count(); ++i) {
auto it = registers[i];
pair_regs.push_back(static_cast<ARM64Reg>(Q0 + it));
if (pair_regs.size() == 2) {
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
pair_regs.clear();
for (int i = 0; i < 32; ++i) {
if (Common::Bit(i, registers)) {
pair_regs.push_back((ARM64Reg)(Q0 + i));
if (pair_regs.size() == 2) {
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
pair_regs.clear();
}
}
}
if (pair_regs.size())
STR(128, INDEX_PRE, pair_regs[0], SP, -16);
}
}
void ARM64FloatEmitter::ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp) {
void ARM64FloatEmitter::ABI_PopRegisters(u32 registers, ARM64Reg tmp) {
bool bundled_loadstore = false;
auto num_regs = registers.count();
int num_regs = Common::BitCount(registers);
for (int i = 0; i < 32; ++i) {
if (!registers[i])
if (!Common::Bit(i, registers))
continue;
int count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {
while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
}
if (count > 1) {
bundled_loadstore = true;
@ -3420,15 +3433,14 @@ void ARM64FloatEmitter::ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp
}
if (bundled_loadstore && tmp != INVALID_REG) {
// The temporary register is only used to indicate that we can use this code
// path
// The temporary register is only used to indicate that we can use this code path
std::vector<ARM64Reg> island_regs;
for (int i = 0; i < 32; ++i) {
if (!registers[i])
if (!Common::Bit(i, registers))
continue;
u8 count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {
while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
}
if (count == 1)
@ -3454,7 +3466,7 @@ void ARM64FloatEmitter::ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp
bool odd = num_regs % 2;
std::vector<ARM64Reg> pair_regs;
for (int i = 31; i >= 0; --i) {
if (!registers[i])
if (!Common::Bit(i, registers))
continue;
if (odd) {

View File

@ -855,8 +855,8 @@ public:
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
// ABI related
void ABI_PushRegisters(std::bitset<32> registers);
void ABI_PopRegisters(std::bitset<32> registers);
void ABI_PushRegisters(u32 registers);
void ABI_PopRegisters(u32 registers);
// Utility to generate a call to a std::function object.
//
@ -1062,8 +1062,8 @@ public:
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
// ABI related
void ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tmp = INVALID_REG);
void ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp = INVALID_REG);
void ABI_PushRegisters(u32 registers, ARM64Reg tmp = INVALID_REG);
void ABI_PopRegisters(u32 registers, ARM64Reg tmp = INVALID_REG);
private:
ARM64XEmitter* m_emit;