diff --git a/src/backend/A64/emitter/a64_emitter.cpp b/src/backend/A64/emitter/a64_emitter.cpp index 15287e5e..b3f25823 100644 --- a/src/backend/A64/emitter/a64_emitter.cpp +++ b/src/backend/A64/emitter/a64_emitter.cpp @@ -1840,62 +1840,74 @@ bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2) { return element; } -void ARM64XEmitter::ABI_PushRegisters(std::bitset<32> registers) { - auto num_regs = registers.count(); - s32 stack_size = static_cast((num_regs + (num_regs & 1)) * 8); - unsigned int it = 0; +void ARM64XEmitter::ABI_PushRegisters(u32 registers) { + int num_regs = Common::BitCount(registers); + int stack_size = (num_regs + (num_regs & 1)) * 8; + int it = 0; + + std::array gpr{}; if (!num_regs) return; - // 8 byte per register, but 16 byte alignment, so we may have to padd one - // register. Only update the SP on the last write to avoid the dependency - // between those stores. + for (int i = 0; i < 32; ++i) { + if (Common::Bit(i, registers)) { + gpr[it++] = static_cast(X0 + i); + } + } - // The first push must adjust the SP, else a context switch may invalidate - // everything below SP. + // 8 byte per register, but 16 byte alignment, so we may have to padd one register. + // Only update the SP on the last write to avoid the dependency between those stores. + + // The first push must adjust the SP, else a context switch may invalidate everything below SP. + + it = 0; if (num_regs & 1) { - STR(INDEX_PRE, static_cast(X0 + registers[it++]), SP, -stack_size); + STR(INDEX_PRE, gpr[0], SP, -stack_size); + it++; } else { - STP(INDEX_PRE, static_cast(X0 + registers[it]), - static_cast(X0 + registers[it + 1]), SP, -stack_size); + STP(INDEX_PRE, gpr[0], gpr[1], SP, -stack_size); it += 2; } // Fast store for all other registers, this is always an even number. - // Fast store for all other registers, this is always an even number. - for (unsigned int i = 0; i < (num_regs - 1) / 2; i++) { - STP(INDEX_SIGNED, static_cast(X0 + registers[it]), - static_cast(X0 + registers[it + 1]), SP, 16 * (i + 1)); + for (int i = 0; i < (num_regs - 1) / 2; i++) { + STP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1)); it += 2; } - ASSERT_MSG(it == registers.count(), "%s registers don't match.", __func__); + + ASSERT_MSG(it == num_regs, "%s registers don't match.", __func__); } -void ARM64XEmitter::ABI_PopRegisters(std::bitset<32> registers) { - u32 num_regs = static_cast(registers.count()); +void ARM64XEmitter::ABI_PopRegisters(u32 registers) { + u8 num_regs = static_cast(Common::BitCount(registers)); int stack_size = (num_regs + (num_regs & 1)) * 8; - unsigned int it = 0; + int it = 0; + + std::array gpr{}; if (!num_regs) return; - // We must adjust the SP in the end, so load the first (two) registers at - // least. - ARM64Reg first = static_cast(X0 + registers[it++]); - ARM64Reg second = static_cast(0); + for (int i = 0; i < 32; ++i) { + if (Common::Bit(i, registers)) { + gpr[it++] = static_cast(X0 + i); + } + } + it = 0; + // We must adjust the SP in the end, so load the first (two) registers at least. + ARM64Reg first = gpr[it++]; + ARM64Reg second = INVALID_REG; if (!(num_regs & 1)) - second = static_cast(X0 + registers[it++]); + second = gpr[it++]; - // 8 byte per register, but 16 byte alignment, so we may have to padd one - // register. Only update the SP on the last load to avoid the dependency - // between those loads. + // 8 byte per register, but 16 byte alignment, so we may have to padd one register. + // Only update the SP on the last load to avoid the dependency between those loads. - // Fast load for all but the first (two) registers, this is always an even - // number. - for (unsigned int i = 0; i < (num_regs - 1) / 2; i++) { - LDP(INDEX_SIGNED, static_cast(X0 + registers[it]), - static_cast(X0 + registers[it + 1]), SP, 16 * (i + 1)); + // Fast load for all but the first (two) registers, this is always an even number. + + for (int i = 0; i < (num_regs - 1) / 2; ++i) { + LDP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1)); it += 2; } @@ -1905,7 +1917,7 @@ void ARM64XEmitter::ABI_PopRegisters(std::bitset<32> registers) { else LDP(INDEX_POST, first, second, SP, stack_size); - ASSERT_MSG(it == registers.count(), "%s registers don't match.", __func__); + ASSERT_MSG(it == num_regs, "%s registers don't match.", __func__); } // Float Emitter @@ -3334,15 +3346,15 @@ void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift) { EncodeModImm(Q, op, cmode, 0, Rd, imm); } -void ARM64FloatEmitter::ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tmp) { +void ARM64FloatEmitter::ABI_PushRegisters(u32 registers, ARM64Reg tmp) { bool bundled_loadstore = false; for (int i = 0; i < 32; ++i) { - if (!registers[i]) + if (!Common::Bit(i, registers)) continue; int count = 0; - while (++count < 4 && (i + count) < 32 && registers[i + count]) { + while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) { } if (count > 1) { bundled_loadstore = true; @@ -3351,28 +3363,28 @@ void ARM64FloatEmitter::ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tm } if (bundled_loadstore && tmp != INVALID_REG) { - u32 num_regs = static_cast(registers.count()); + int num_regs = Common::BitCount(registers); m_emit->SUB(SP, SP, num_regs * 16); m_emit->ADD(tmp, SP, 0); std::vector island_regs; for (int i = 0; i < 32; ++i) { - if (!registers[i]) + if (!Common::Bit(i, registers)) continue; - u8 count = 0; + int count = 0; // 0 = true // 1 < 4 && registers[i + 1] true! // 2 < 4 && registers[i + 2] true! // 3 < 4 && registers[i + 3] true! // 4 < 4 && registers[i + 4] false! - while (++count < 4 && (i + count) < 32 && registers[i + count]) { + while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) { } if (count == 1) - island_regs.push_back(static_cast(Q0 + i)); + island_regs.push_back((ARM64Reg)(Q0 + i)); else - ST1(64, count, INDEX_POST, static_cast(Q0 + i), tmp); + ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp); i += count - 1; } @@ -3390,28 +3402,29 @@ void ARM64FloatEmitter::ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tm STR(128, INDEX_POST, pair_regs[0], tmp, 16); } else { std::vector pair_regs; - for (size_t i = 0; i < registers.count(); ++i) { - auto it = registers[i]; - pair_regs.push_back(static_cast(Q0 + it)); - if (pair_regs.size() == 2) { - STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32); - pair_regs.clear(); + for (int i = 0; i < 32; ++i) { + if (Common::Bit(i, registers)) { + pair_regs.push_back((ARM64Reg)(Q0 + i)); + if (pair_regs.size() == 2) { + STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32); + pair_regs.clear(); + } } } if (pair_regs.size()) STR(128, INDEX_PRE, pair_regs[0], SP, -16); } } -void ARM64FloatEmitter::ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp) { +void ARM64FloatEmitter::ABI_PopRegisters(u32 registers, ARM64Reg tmp) { bool bundled_loadstore = false; - auto num_regs = registers.count(); + int num_regs = Common::BitCount(registers); for (int i = 0; i < 32; ++i) { - if (!registers[i]) + if (!Common::Bit(i, registers)) continue; int count = 0; - while (++count < 4 && (i + count) < 32 && registers[i + count]) { + while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) { } if (count > 1) { bundled_loadstore = true; @@ -3420,15 +3433,14 @@ void ARM64FloatEmitter::ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp } if (bundled_loadstore && tmp != INVALID_REG) { - // The temporary register is only used to indicate that we can use this code - // path + // The temporary register is only used to indicate that we can use this code path std::vector island_regs; for (int i = 0; i < 32; ++i) { - if (!registers[i]) + if (!Common::Bit(i, registers)) continue; u8 count = 0; - while (++count < 4 && (i + count) < 32 && registers[i + count]) { + while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) { } if (count == 1) @@ -3454,7 +3466,7 @@ void ARM64FloatEmitter::ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp bool odd = num_regs % 2; std::vector pair_regs; for (int i = 31; i >= 0; --i) { - if (!registers[i]) + if (!Common::Bit(i, registers)) continue; if (odd) { diff --git a/src/backend/A64/emitter/a64_emitter.h b/src/backend/A64/emitter/a64_emitter.h index 0fd2668e..66a4303b 100644 --- a/src/backend/A64/emitter/a64_emitter.h +++ b/src/backend/A64/emitter/a64_emitter.h @@ -855,8 +855,8 @@ public: bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm); // ABI related - void ABI_PushRegisters(std::bitset<32> registers); - void ABI_PopRegisters(std::bitset<32> registers); + void ABI_PushRegisters(u32 registers); + void ABI_PopRegisters(u32 registers); // Utility to generate a call to a std::function object. // @@ -1062,8 +1062,8 @@ public: void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG); // ABI related - void ABI_PushRegisters(std::bitset<32> registers, ARM64Reg tmp = INVALID_REG); - void ABI_PopRegisters(std::bitset<32> registers, ARM64Reg tmp = INVALID_REG); + void ABI_PushRegisters(u32 registers, ARM64Reg tmp = INVALID_REG); + void ABI_PopRegisters(u32 registers, ARM64Reg tmp = INVALID_REG); private: ARM64XEmitter* m_emit;