From 8e1f543bfb2f70213fbd5535e470ddba911d007d Mon Sep 17 00:00:00 2001 From: SachinVin Date: Sat, 14 Dec 2019 18:15:02 +0530 Subject: [PATCH] backend/A64: Use X26 for storing remaining cycles. --- src/backend/A64/a32_emit_a64.cpp | 10 +++------- src/backend/A64/block_of_code.cpp | 19 ++++++++----------- src/backend/A64/emit_a64.cpp | 4 +--- src/backend/A64/hostloc.h | 3 ++- 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/backend/A64/a32_emit_a64.cpp b/src/backend/A64/a32_emit_a64.cpp index 8e0d35d7..93fb7f34 100644 --- a/src/backend/A64/a32_emit_a64.cpp +++ b/src/backend/A64/a32_emit_a64.cpp @@ -695,13 +695,10 @@ void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitA64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(nullptr); - // Use an unused HostCall register - ARM64Reg cycles_remaining = X9; code.SwitchFpscrOnExit(); code.LDR(INDEX_UNSIGNED, code.ABI_PARAM2, X28, offsetof(A32JitState, cycles_to_run)); - code.LDR(INDEX_UNSIGNED, cycles_remaining, X28, offsetof(A32JitState, cycles_remaining)); - code.SUB(code.ABI_PARAM2, code.ABI_PARAM2, cycles_remaining); + code.SUB(code.ABI_PARAM2, code.ABI_PARAM2, X26); Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks).EmitCall(code); ctx.reg_alloc.EndOfAllocScope(); @@ -710,7 +707,7 @@ void A32EmitA64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { Devirtualize<&A32::UserCallbacks::CallSVC>(config.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks).EmitCall(code); code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_to_run)); - code.STR(INDEX_UNSIGNED, code.ABI_RETURN, X28, offsetof(A32JitState, cycles_remaining)); + code.MOV(X26, code.ABI_RETURN); code.SwitchFpscrOnEntry(); } @@ -1427,8 +1424,7 @@ void A32EmitA64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { EmitSetUpperLocationDescriptor(terminal.next, initial_location); - code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, offsetof(A32JitState, cycles_remaining)); - code.CMP(code.ABI_SCRATCH1, ZR); + code.CMP(X26, ZR); patch_information[terminal.next].jg.emplace_back(code.GetCodePtr()); if (auto next_bb = GetBasicBlock(terminal.next)) { diff --git a/src/backend/A64/block_of_code.cpp b/src/backend/A64/block_of_code.cpp index cb20736a..d6670beb 100644 --- a/src/backend/A64/block_of_code.cpp +++ b/src/backend/A64/block_of_code.cpp @@ -155,15 +155,15 @@ void BlockOfCode::GenRunCode() { MOV(Arm64Gen::X28, ABI_PARAM1); MOVI2R(Arm64Gen::X27, cb.value_in_X27); - MOV(Arm64Gen::X26, ABI_PARAM2); // save temporarily in non-volatile register + MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register cb.GetTicksRemaining->EmitCall(*this); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); - STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + MOV(Arm64Gen::X26, ABI_RETURN); SwitchFpscrOnEntry(); - BR(Arm64Gen::X26); + BR(Arm64Gen::X25); AlignCode16(); run_code = (RunCodeFuncType) GetWritableCodePtr(); @@ -179,7 +179,7 @@ void BlockOfCode::GenRunCode() { cb.GetTicksRemaining->EmitCall(*this); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); - STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + MOV(Arm64Gen::X26, ABI_RETURN); enter_fpscr_then_loop = GetCodePtr(); SwitchFpscrOnEntry(); @@ -191,8 +191,7 @@ void BlockOfCode::GenRunCode() { // Return from run code variants const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){ if (!force_return) { - LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); - CMP(ABI_SCRATCH1, Arm64Gen::ZR); + CMP(Arm64Gen::X26, Arm64Gen::ZR); B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop); } @@ -202,8 +201,7 @@ void BlockOfCode::GenRunCode() { cb.AddTicks->EmitCall(*this, [this](RegList param) { LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); - LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); - SUBS(param[0], param[0], ABI_SCRATCH1); + SUB(param[0], param[0], Arm64Gen::X26); }); ABI_PopCalleeSaveRegistersAndAdjustStack(*this); @@ -248,13 +246,12 @@ void BlockOfCode::SwitchFpscrOnExit() { void BlockOfCode::UpdateTicks() { cb.AddTicks->EmitCall(*this, [this](RegList param) { LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); - LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_cycles_remaining); - SUBS(param[0], param[0], ABI_SCRATCH1); + SUB(param[0], param[0], Arm64Gen::X26); }); cb.GetTicksRemaining->EmitCall(*this); STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); - STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_remaining); + MOV(Arm64Gen::X26, ABI_RETURN); } void BlockOfCode::LookupBlock() { diff --git a/src/backend/A64/emit_a64.cpp b/src/backend/A64/emit_a64.cpp index 6349b458..604ab060 100644 --- a/src/backend/A64/emit_a64.cpp +++ b/src/backend/A64/emit_a64.cpp @@ -149,9 +149,7 @@ void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) { void EmitA64::EmitAddCycles(size_t cycles) { ASSERT(cycles < std::numeric_limits::max()); - code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining); - code.SUBI2R(code.ABI_SCRATCH1, code.ABI_SCRATCH1, static_cast(cycles)); - code.STR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, code.GetJitStateInfo().offsetof_cycles_remaining); + code.SUBI2R(X26, X26, static_cast(cycles)); } FixupBranch EmitA64::EmitCond(IR::Cond cond) { diff --git a/src/backend/A64/hostloc.h b/src/backend/A64/hostloc.h index 36a40f0d..7183d0a8 100644 --- a/src/backend/A64/hostloc.h +++ b/src/backend/A64/hostloc.h @@ -131,13 +131,14 @@ inline size_t HostLocBitWidth(HostLoc loc) { using HostLocList = std::initializer_list; // X18 may be reserved.(Windows and iOS) +// X26 holds the cycle counter // X27 contains an emulated memory relate pointer // X28 used for holding the JitState. // X30 is the link register. // In order of desireablity based first on ABI constexpr HostLocList any_gpr = { HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23, - HostLoc::X24, HostLoc::X25, HostLoc::X26, + HostLoc::X24, HostLoc::X25, HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12, HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,