diff --git a/src/dynarmic/backend/A64/a32_emit_a64.cpp b/src/dynarmic/backend/A64/a32_emit_a64.cpp index b2e80389..d22fcce4 100644 --- a/src/dynarmic/backend/A64/a32_emit_a64.cpp +++ b/src/dynarmic/backend/A64/a32_emit_a64.cpp @@ -1419,15 +1419,28 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc code.ReturnFromRunCode(); return; } + + if (config.enable_cycle_counting) { + code.CMP(X26, ZR); - code.CMP(X26, ZR); - - patch_information[terminal.next].jg.emplace_back(code.GetCodePtr()); - if (auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); + patch_information[terminal.next].jg.emplace_back(code.GetCodePtr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } } else { - EmitPatchJg(terminal.next); + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, halt_reason)); + code.CMP(code.ABI_SCRATCH1, ZR); + + patch_information[terminal.next].jz.emplace_back(code.GetCodePtr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } } + FixupBranch dest = code.B(); code.SwitchToFarCode(); @@ -1495,7 +1508,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A32EmitA64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.LDRB(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, halt_requested)); + code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, halt_reason)); // Conditional branch only gives +/- 1MB of branch distance FixupBranch zero = code.CBZ(DecodeReg(code.ABI_SCRATCH1)); code.B(code.GetForceReturnFromRunCodeAddress()); @@ -1529,6 +1542,32 @@ void A32EmitA64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr code.EnsurePatchLocationSize(patch_location, 24); } +void A32EmitA64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { + const CodePtr patch_location = code.GetCodePtr(); + + auto long_branch_gt = [this](CodePtr ptr){ + const s64 distance = reinterpret_cast(ptr) - reinterpret_cast(code.GetCodePtr()); + + if((distance >> 2) >= -0x40000 && (distance >> 2) <= 0x3FFFF) { + code.B(CC_EQ, ptr); + return; + } + + FixupBranch cc_neq = code.B(CC_NEQ); + code.B(ptr); + code.SetJumpTarget(cc_neq); + }; + + if (target_code_ptr) { + long_branch_gt(target_code_ptr); + } else { + code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), A32::LocationDescriptor{target_desc}.PC()); + code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, MJitStateReg(A32::Reg::PC)); + long_branch_gt(code.GetReturnFromRunCodeAddress()); + } + code.EnsurePatchLocationSize(patch_location, 24); +} + void A32EmitA64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { const CodePtr patch_location = code.GetCodePtr(); if (target_code_ptr) { diff --git a/src/dynarmic/backend/A64/a32_emit_a64.h b/src/dynarmic/backend/A64/a32_emit_a64.h index 7ea92b0b..981921b6 100644 --- a/src/dynarmic/backend/A64/a32_emit_a64.h +++ b/src/dynarmic/backend/A64/a32_emit_a64.h @@ -131,6 +131,7 @@ protected: // Patching void Unpatch(const IR::LocationDescriptor& target_desc) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; + void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override; }; diff --git a/src/dynarmic/backend/A64/a32_interface.cpp b/src/dynarmic/backend/A64/a32_interface.cpp index 8cc85a0d..adea12c2 100644 --- a/src/dynarmic/backend/A64/a32_interface.cpp +++ b/src/dynarmic/backend/A64/a32_interface.cpp @@ -60,7 +60,7 @@ struct Jit::Impl { boost::icl::interval_set invalid_cache_ranges; bool invalidate_entire_cache = false; - void Execute() { + HaltReason Execute() { const CodePtr current_codeptr = [this]{ // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; @@ -72,11 +72,15 @@ struct Jit::Impl { return GetCurrentBlock(); }(); - block_of_code.RunCode(&jit_state, current_codeptr); + return block_of_code.RunCode(&jit_state, current_codeptr); } - void Step() { - block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); + HaltReason Step() { + return block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); + } + + void HaltExecution(HaltReason hr) { + Atomic::Or(&jit_state.halt_reason, static_cast(hr)); } std::string Disassemble(const IR::LocationDescriptor& descriptor) { @@ -117,7 +121,7 @@ struct Jit::Impl { void RequestCacheInvalidation() { if (jit_interface->is_executing) { - jit_state.halt_requested = true; + HaltExecution(HaltReason::CacheInvalidation); return; } @@ -155,8 +159,8 @@ private: PerformCacheInvalidation(); } - IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions}); if (config.enable_optimizations) { + IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, config.callbacks, {config.arch_version, config.define_unpredictable_behaviour, config.hook_hint_instructions}); Optimization::A32GetSetElimination(ir_block); Optimization::DeadCodeElimination(ir_block); Optimization::A32ConstantMemoryReads(ir_block, config.callbacks); @@ -173,28 +177,28 @@ Jit::Jit(UserConfig config) : impl(std::make_unique(this, std::move(config Jit::~Jit() = default; -void Jit::Run() { +HaltReason Jit::Run() { ASSERT(!is_executing); is_executing = true; SCOPE_EXIT { this->is_executing = false; }; - impl->jit_state.halt_requested = false; - - impl->Execute(); + const HaltReason hr = impl->Execute(); impl->PerformCacheInvalidation(); + + return hr; } -void Jit::Step() { +HaltReason Jit::Step() { ASSERT(!is_executing); is_executing = true; SCOPE_EXIT { this->is_executing = false; }; - impl->jit_state.halt_requested = true; - - impl->Step(); + const HaltReason hr = impl->Step(); impl->PerformCacheInvalidation(); + + return hr; } void Jit::ClearCache() { @@ -212,8 +216,8 @@ void Jit::Reset() { impl->jit_state = {}; } -void Jit::HaltExecution() { - impl->jit_state.halt_requested = true; +void Jit::HaltExecution(HaltReason hr) { + impl->HaltExecution(hr); } std::array& Jit::Regs() { diff --git a/src/dynarmic/backend/A64/a32_jitstate.h b/src/dynarmic/backend/A64/a32_jitstate.h index d2c8980f..953594c6 100644 --- a/src/dynarmic/backend/A64/a32_jitstate.h +++ b/src/dynarmic/backend/A64/a32_jitstate.h @@ -48,8 +48,8 @@ struct A32JitState { u64 guest_fpsr = 0; u64 save_host_FPCR = 0; s64 cycles_to_run = 0; - s64 cycles_remaining = 0; - bool halt_requested = false; + u64 cycles_remaining = 0; + volatile u32 halt_reason = 0; bool check_bit = false; // Exclusive state diff --git a/src/dynarmic/backend/A64/block_of_code.cpp b/src/dynarmic/backend/A64/block_of_code.cpp index f65ac5cf..6c7ae039 100644 --- a/src/dynarmic/backend/A64/block_of_code.cpp +++ b/src/dynarmic/backend/A64/block_of_code.cpp @@ -131,12 +131,12 @@ size_t BlockOfCode::SpaceRemaining() const { return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset); } -void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { - run_code(jit_state, code_ptr); +HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { + return run_code(jit_state, code_ptr); } -void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const { - step_code(jit_state, code_ptr); +HaltReason BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const { + return step_code(jit_state, code_ptr); } void BlockOfCode::ReturnFromRunCode(bool fpscr_already_exited) { @@ -155,6 +155,7 @@ void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) { void BlockOfCode::GenRunCode() { const u8* loop, *enter_fpscr_then_loop; + std::vector return_to_caller_fpscr_already_exited; AlignCode16(); run_code = reinterpret_cast(GetWritableCodePtr()); @@ -173,6 +174,9 @@ void BlockOfCode::GenRunCode() { STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run); MOV(Arm64Gen::X26, ABI_RETURN); + LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_halt_reason); + return_to_caller_fpscr_already_exited.push_back(CBNZ(ABI_SCRATCH1)); + SwitchFpscrOnEntry(); BR(Arm64Gen::X25); @@ -181,9 +185,14 @@ void BlockOfCode::GenRunCode() { ABI_PushCalleeSaveRegistersAndAdjustStack(*this); MOV(Arm64Gen::X28, ABI_PARAM1); - + MOVI2R(Arm64Gen::X26, 1); - STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run); + STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run); + + LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(ABI_SCRATCH1), Arm64Gen::X28, jsi.offsetof_halt_reason); + return_to_caller_fpscr_already_exited.push_back(CBNZ(ABI_SCRATCH1)); + ORRI2R(ABI_SCRATCH1, ABI_SCRATCH1, static_cast(HaltReason::Step)); + STR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(ABI_SCRATCH1), Arm64Gen::X28, jsi.offsetof_halt_reason); SwitchFpscrOnEntry(); BR(ABI_PARAM2); @@ -192,24 +201,31 @@ void BlockOfCode::GenRunCode() { SwitchFpscrOnEntry(); loop = GetCodePtr(); cb.LookupBlock->EmitCall(*this); - BR(ABI_RETURN); + BR(ABI_RETURN); // Return from run code variants - const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){ + const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return) { + Arm64Gen::FixupBranch return_to_caller; if (!force_return) { + LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(ABI_SCRATCH1), Arm64Gen::X28, jsi.offsetof_halt_reason); + return_to_caller = CBNZ(ABI_SCRATCH1); CMP(Arm64Gen::X26, Arm64Gen::ZR); B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop); + SetJumpTarget(return_to_caller); } if (!fpscr_already_exited) { SwitchFpscrOnExit(); } - cb.AddTicks->EmitCall(*this, [this](RegList param) { LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run); SUB(param[0], param[0], Arm64Gen::X26); }); + LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(ABI_RETURN), Arm64Gen::X28, jsi.offsetof_halt_reason); + // TODO: lock a mutex + STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::WZR, Arm64Gen::X28, jsi.offsetof_halt_reason); + ABI_PopCalleeSaveRegistersAndAdjustStack(*this); RET(); }; @@ -224,6 +240,8 @@ void BlockOfCode::GenRunCode() { emit_return_from_run_code(false, true); return_from_run_code[FPSCR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16(); + for (const auto& jump_target : return_to_caller_fpscr_already_exited) + SetJumpTarget(jump_target); emit_return_from_run_code(true, true); PerfMapRegister(run_code, GetCodePtr(), "dynarmic_dispatcher"); diff --git a/src/dynarmic/backend/A64/block_of_code.h b/src/dynarmic/backend/A64/block_of_code.h index e6417339..af6b7935 100644 --- a/src/dynarmic/backend/A64/block_of_code.h +++ b/src/dynarmic/backend/A64/block_of_code.h @@ -49,9 +49,9 @@ public: size_t SpaceRemaining() const; /// Runs emulated code from code_ptr. - void RunCode(void* jit_state, CodePtr code_ptr) const; + HaltReason RunCode(void* jit_state, CodePtr code_ptr) const; /// Runs emulated code from code_ptr for a single cycle. - void StepCode(void* jit_state, CodePtr code_ptr) const; + HaltReason StepCode(void* jit_state, CodePtr code_ptr) const; /// Code emitter: Returns to dispatcher void ReturnFromRunCode(bool fpscr_already_exited = false); /// Code emitter: Returns to dispatcher, forces return to host @@ -135,7 +135,7 @@ private: CodePtr near_code_ptr; CodePtr far_code_ptr; - using RunCodeFuncType = void(*)(void*, CodePtr); + using RunCodeFuncType = HaltReason (*)(void*, CodePtr); RunCodeFuncType run_code = nullptr; RunCodeFuncType step_code = nullptr; static constexpr size_t FPSCR_ALREADY_EXITED = 1 << 0; diff --git a/src/dynarmic/backend/A64/emit_a64.cpp b/src/dynarmic/backend/A64/emit_a64.cpp index dab04b59..9ff9138e 100644 --- a/src/dynarmic/backend/A64/emit_a64.cpp +++ b/src/dynarmic/backend/A64/emit_a64.cpp @@ -242,6 +242,12 @@ void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) { code.FlushIcache(); } + for (CodePtr location : patch_info.jz) { + code.SetCodePtr(location); + EmitPatchJz(desc, bb); + code.FlushIcache(); + } + for (CodePtr location : patch_info.jmp) { code.SetCodePtr(location); EmitPatchJmp(desc, bb); diff --git a/src/dynarmic/backend/A64/emit_a64.h b/src/dynarmic/backend/A64/emit_a64.h index 29c864d0..854fd543 100644 --- a/src/dynarmic/backend/A64/emit_a64.h +++ b/src/dynarmic/backend/A64/emit_a64.h @@ -107,12 +107,14 @@ protected: // Patching struct PatchInformation { std::vector jg; + std::vector jz; std::vector jmp; std::vector mov_x0; }; void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr); virtual void Unpatch(const IR::LocationDescriptor& target_desc); virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; + virtual void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0; diff --git a/src/dynarmic/backend/A64/jitstate_info.h b/src/dynarmic/backend/A64/jitstate_info.h index 63336d79..1df3a5bb 100644 --- a/src/dynarmic/backend/A64/jitstate_info.h +++ b/src/dynarmic/backend/A64/jitstate_info.h @@ -25,6 +25,7 @@ struct JitStateInfo { , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) + , offsetof_halt_reason(offsetof(JitStateType, halt_reason)) {} const size_t offsetof_cycles_remaining; @@ -39,6 +40,7 @@ struct JitStateInfo { const size_t offsetof_cpsr_nzcv; const size_t offsetof_fpsr_exc; const size_t offsetof_fpsr_qc; + const size_t offsetof_halt_reason; }; } // namespace Dynarmic::BackendA64