backend/A64: Use X26 for storing remaining cycles.

This commit is contained in:
SachinVin 2019-12-14 18:15:02 +05:30
parent b6733a089a
commit 9c74e334b1
4 changed files with 14 additions and 22 deletions

View File

@ -695,13 +695,10 @@ void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitA64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitA64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
// Use an unused HostCall register
ARM64Reg cycles_remaining = X9;
code.SwitchFpscrOnExit(); code.SwitchFpscrOnExit();
code.LDR(INDEX_UNSIGNED, code.ABI_PARAM2, X28, offsetof(A32JitState, cycles_to_run)); code.LDR(INDEX_UNSIGNED, code.ABI_PARAM2, X28, offsetof(A32JitState, cycles_to_run));
code.LDR(INDEX_UNSIGNED, cycles_remaining, X28, offsetof(A32JitState, cycles_remaining)); code.SUB(code.ABI_PARAM2, code.ABI_PARAM2, X26);
code.SUB(code.ABI_PARAM2, code.ABI_PARAM2, cycles_remaining);
Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks).EmitCall(code);
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
@ -710,7 +707,7 @@ void A32EmitA64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
Devirtualize<&A32::UserCallbacks::CallSVC>(config.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::CallSVC>(config.callbacks).EmitCall(code);
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks).EmitCall(code);
code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_to_run)); code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_to_run));
code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_remaining)); code.MOV(X26, code.ABI_RETURN);
code.SwitchFpscrOnEntry(); code.SwitchFpscrOnEntry();
} }
@ -1427,8 +1424,7 @@ void A32EmitA64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
EmitSetUpperLocationDescriptor(terminal.next, initial_location); EmitSetUpperLocationDescriptor(terminal.next, initial_location);
code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, offsetof(A32JitState, cycles_remaining)); code.CMP(X26, ZR);
code.CMP(code.ABI_SCRATCH1, ZR);
patch_information[terminal.next].jg.emplace_back(code.GetCodePtr()); patch_information[terminal.next].jg.emplace_back(code.GetCodePtr());
if (auto next_bb = GetBasicBlock(terminal.next)) { if (auto next_bb = GetBasicBlock(terminal.next)) {

View File

@ -155,15 +155,15 @@ void BlockOfCode::GenRunCode() {
MOV(Arm64Gen::X28, ABI_PARAM1); MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X27, cb.value_in_X27); MOVI2R(Arm64Gen::X27, cb.value_in_X27);
MOV(Arm64Gen::X26, ABI_PARAM2); // save temporarily in non-volatile register MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this); cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); MOV(Arm64Gen::X26, ABI_RETURN);
SwitchFpscrOnEntry(); SwitchFpscrOnEntry();
BR(Arm64Gen::X26); BR(Arm64Gen::X25);
AlignCode16(); AlignCode16();
run_code = (RunCodeFuncType) GetWritableCodePtr(); run_code = (RunCodeFuncType) GetWritableCodePtr();
@ -179,7 +179,7 @@ void BlockOfCode::GenRunCode() {
cb.GetTicksRemaining->EmitCall(*this); cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); MOV(Arm64Gen::X26, ABI_RETURN);
enter_fpscr_then_loop = GetCodePtr(); enter_fpscr_then_loop = GetCodePtr();
SwitchFpscrOnEntry(); SwitchFpscrOnEntry();
@ -191,8 +191,7 @@ void BlockOfCode::GenRunCode() {
// Return from run code variants // Return from run code variants
const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){ const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){
if (!force_return) { if (!force_return) {
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); CMP(Arm64Gen::X26, Arm64Gen::ZR);
CMP(ABI_SCRATCH1, Arm64Gen::ZR);
B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop); B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop);
} }
@ -202,8 +201,7 @@ void BlockOfCode::GenRunCode() {
cb.AddTicks->EmitCall(*this, [this](RegList param) { cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); SUB(param[0], param[0], Arm64Gen::X26);
SUBS(param[0], param[0], ABI_SCRATCH1);
}); });
ABI_PopCalleeSaveRegistersAndAdjustStack(*this); ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
@ -248,13 +246,12 @@ void BlockOfCode::SwitchFpscrOnExit() {
void BlockOfCode::UpdateTicks() { void BlockOfCode::UpdateTicks() {
cb.AddTicks->EmitCall(*this, [this](RegList param) { cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); SUB(param[0], param[0], Arm64Gen::X26);
SUBS(param[0], param[0], ABI_SCRATCH1);
}); });
cb.GetTicksRemaining->EmitCall(*this); cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); MOV(Arm64Gen::X26, ABI_RETURN);
} }
void BlockOfCode::LookupBlock() { void BlockOfCode::LookupBlock() {

View File

@ -149,9 +149,7 @@ void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
void EmitA64::EmitAddCycles(size_t cycles) { void EmitA64::EmitAddCycles(size_t cycles) {
ASSERT(cycles < std::numeric_limits<u32>::max()); ASSERT(cycles < std::numeric_limits<u32>::max());
code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining); code.SUBI2R(X26, X26, static_cast<u32>(cycles));
code.SUBI2R(code.ABI_SCRATCH1, code.ABI_SCRATCH1, static_cast<u32>(cycles));
code.STR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining);
} }
FixupBranch EmitA64::EmitCond(IR::Cond cond) { FixupBranch EmitA64::EmitCond(IR::Cond cond) {

View File

@ -131,13 +131,14 @@ inline size_t HostLocBitWidth(HostLoc loc) {
using HostLocList = std::initializer_list<HostLoc>; using HostLocList = std::initializer_list<HostLoc>;
// X18 may be reserved.(Windows and iOS) // X18 may be reserved.(Windows and iOS)
// X26 holds the cycle counter
// X27 contains an emulated memory relate pointer // X27 contains an emulated memory relate pointer
// X28 used for holding the JitState. // X28 used for holding the JitState.
// X30 is the link register. // X30 is the link register.
// In order of desireablity based first on ABI // In order of desireablity based first on ABI
constexpr HostLocList any_gpr = { constexpr HostLocList any_gpr = {
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
HostLoc::X24, HostLoc::X25, HostLoc::X26, HostLoc::X24, HostLoc::X25,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12,
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17, HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,