From 5b23e5b52e70f9a271a782eceb2ce0853cd07de9 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 27 Nov 2017 20:29:19 +0000 Subject: [PATCH 1/9] emit_x64: Make RSB a stack --- src/backend_x64/emit_x64.cpp | 68 ++++++++++++++++++++---------------- src/backend_x64/emit_x64.h | 1 + src/backend_x64/jitstate.h | 1 + 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 703836a4..6d63ee76 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -483,41 +483,39 @@ void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value); } -void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, u64 target_hash) { using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); - ASSERT(args[0].IsImmediate()); - u64 unique_hash_of_target = args[0].GetImmediateU64(); - - auto iter = block_descriptors.find(unique_hash_of_target); + auto iter = block_descriptors.find(target_hash); CodePtr target_code_ptr = iter != block_descriptors.end() ? iter->second.entrypoint : code->GetReturnFromRunCodeAddress(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + code->mov(index_reg.cvt32(), dword[r15 + offsetof(JitState, rsb_ptr)]); - code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(loc_desc_reg, target_hash); - code->mov(loc_desc_reg, unique_hash_of_target); - - patch_information[unique_hash_of_target].mov_rcx.emplace_back(code->getCurr()); + patch_information[target_hash].mov_rcx.emplace_back(code->getCurr()); EmitPatchMovRcx(target_code_ptr); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } + code->mov(qword[r15 + index_reg * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[r15 + index_reg * 8 + offsetof(JitState, rsb_codeptrs)], rcx); - code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + code->add(index_reg.cvt32(), 1); + code->and_(index_reg.cvt32(), u32(JitState::RSBPtrMask)); + code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg.cvt32()); +} + +void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + ASSERT(args[0].IsImmediate()); + u64 unique_hash_of_target = args[0].GetImmediateU64(); + + reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg64 index_reg = reg_alloc.ScratchGpr(); + + PushRSBHelper(loc_desc_reg, index_reg, unique_hash_of_target); } void EmitX64::EmitGetCarryFromOp(RegAlloc&, IR::Block&, IR::Inst*) { @@ -3412,9 +3410,16 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor } else { EmitPatchJg(terminal.next); } + Xbyak::Label dest; + code->jmp(dest, Xbyak::CodeGenerator::T_NEAR); + code->SwitchToFarCode(); + code->align(16); + code->L(dest); code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC()); - code->ForceReturnFromRunCode(); // TODO: Check cycles, Properly do a link + PushRSBHelper(rax, rbx, terminal.next.UniqueHash()); + code->ForceReturnFromRunCode(); + code->SwitchToNearCode(); } void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { @@ -3455,12 +3460,13 @@ void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) { code->shl(rbx, 32); code->or_(rbx, rcx); - code->mov(rax, reinterpret_cast(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - + code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]); + code->sub(eax, 1); + code->and_(eax, u32(JitState::RSBPtrMask)); + code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], eax); + code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code->jne(code->GetReturnFromRunCodeAddress()); + code->mov(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + rax * sizeof(u64)]); code->jmp(rax); } diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index 1c847aab..6d7d4865 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -73,6 +73,7 @@ private: // Helpers void EmitAddCycles(size_t cycles); void EmitCondPrelude(const IR::Block& block); + void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, u64 target_hash); // Terminal instruction emitters void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location); diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index a809e794..b765d8d1 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -46,6 +46,7 @@ struct JitState { u32 exclusive_address = 0; static constexpr size_t RSBSize = 8; // MUST be a power of 2. + static constexpr size_t RSBPtrMask = RSBSize - 1; u32 rsb_ptr = 0; std::array rsb_location_descriptors; std::array rsb_codeptrs; From e1daadff8185ad64ee9adfcdc269d6c22c5fd2c3 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Dec 2017 13:55:04 +0000 Subject: [PATCH 2/9] jit_state: Hide cpsr implementation --- include/dynarmic/dynarmic.h | 4 +-- src/backend_x64/emit_x64.cpp | 29 +++++++++++++------- src/backend_x64/interface_x64.cpp | 12 ++++----- src/backend_x64/jitstate.cpp | 38 +++++++++++++++++++++++++++ src/backend_x64/jitstate.h | 5 +++- tests/arm/fuzz_arm.cpp | 16 +++++------ tests/arm/fuzz_thumb.cpp | 4 +-- tests/arm/test_thumb_instructions.cpp | 16 +++++------ 8 files changed, 87 insertions(+), 37 deletions(-) diff --git a/include/dynarmic/dynarmic.h b/include/dynarmic/dynarmic.h index bfe2ecf7..d5b82886 100644 --- a/include/dynarmic/dynarmic.h +++ b/include/dynarmic/dynarmic.h @@ -63,12 +63,12 @@ public: const std::array& ExtRegs() const; /// View and modify CPSR. - std::uint32_t& Cpsr(); std::uint32_t Cpsr() const; + void SetCpsr(std::uint32_t value); /// View and modify FPSCR. std::uint32_t Fpscr() const; - void SetFpscr(std::uint32_t value) const; + void SetFpscr(std::uint32_t value); /** * Returns true if Jit::Run was called but hasn't returned yet. diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 6d63ee76..11d9b1a6 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -62,7 +62,7 @@ static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) { static Xbyak::Address MJitStateCpsr() { using namespace Xbyak::util; - return dword[r15 + offsetof(JitState, Cpsr)]; + return dword[r15 + offsetof(JitState, CPSR)]; } static void EraseInstruction(IR::Block& block, IR::Inst* inst) { @@ -196,16 +196,25 @@ void EmitX64::EmitSetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Ins code->movsd(MJitStateExtReg(reg), source); } +static u32 GetCpsrImpl(JitState* jit_state) { + return jit_state->Cpsr(); +} + void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr()); - reg_alloc.DefineValue(inst, result); + reg_alloc.HostCall(inst); + code->mov(code->ABI_PARAM1, code->r15); + code->CallFunction(&GetCpsrImpl); +} + +static void SetCpsrImpl(u32 value, JitState* jit_state) { + jit_state->SetCpsr(value); } void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 arg = reg_alloc.UseGpr(args[0]).cvt32(); - code->mov(MJitStateCpsr(), arg); + reg_alloc.HostCall(nullptr, args[0]); + code->mov(code->ABI_PARAM2, code->r15); + code->CallFunction(&SetCpsrImpl); } void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { @@ -404,9 +413,9 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } else { using Xbyak::util::ptr; - Xbyak::Reg64 new_pc = reg_alloc.UseScratchGpr(arg); - Xbyak::Reg64 tmp1 = reg_alloc.ScratchGpr(); - Xbyak::Reg64 tmp2 = reg_alloc.ScratchGpr(); + Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 tmp1 = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp2 = reg_alloc.ScratchGpr().cvt32(); code->mov(tmp1, MJitStateCpsr()); code->mov(tmp2, tmp1); @@ -415,7 +424,7 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { code->test(new_pc, u32(1)); code->cmove(tmp1, tmp2); // CPSR.T = pc & 1 code->mov(MJitStateCpsr(), tmp1); - code->lea(tmp2, ptr[new_pc + new_pc * 1]); + code->lea(tmp2, ptr[new_pc.cvt64() + new_pc.cvt64() * 1]); code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code->and_(new_pc, tmp2); code->mov(MJitStateReg(Arm::Reg::PC), new_pc); diff --git a/src/backend_x64/interface_x64.cpp b/src/backend_x64/interface_x64.cpp index 67d8e33e..ad991a36 100644 --- a/src/backend_x64/interface_x64.cpp +++ b/src/backend_x64/interface_x64.cpp @@ -127,7 +127,7 @@ private: JitState& jit_state = this_.jit_state; u32 pc = jit_state.Reg[15]; - Arm::PSR cpsr{jit_state.Cpsr}; + Arm::PSR cpsr{jit_state.Cpsr()}; Arm::FPSCR fpscr{jit_state.FPSCR_mode}; IR::LocationDescriptor descriptor{pc, cpsr, fpscr}; @@ -205,19 +205,19 @@ const std::array& Jit::ExtRegs() const { return impl->jit_state.ExtReg; } -u32& Jit::Cpsr() { - return impl->jit_state.Cpsr; +u32 Jit::Cpsr() const { + return impl->jit_state.Cpsr(); } -u32 Jit::Cpsr() const { - return impl->jit_state.Cpsr; +void Jit::SetCpsr(u32 value) { + return impl->jit_state.SetCpsr(value); } u32 Jit::Fpscr() const { return impl->jit_state.Fpscr(); } -void Jit::SetFpscr(u32 value) const { +void Jit::SetFpscr(u32 value) { return impl->jit_state.SetFpscr(value); } diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 1d01c4a2..379eafb5 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -14,6 +14,44 @@ namespace Dynarmic { namespace BackendX64 { +/** + * CPSR Bits + * ========= + * + * ARM CPSR flags + * -------------- + * N bit 31 Negative flag + * Z bit 30 Zero flag + * C bit 29 Carry flag + * V bit 28 oVerflow flag + * Q bit 27 Saturation flag + * J bit 24 Jazelle instruction set flag + * GE bits 16-19 Greater than or Equal flags + * E bit 9 Data Endianness flag + * A bit 8 Disable imprecise Aborts + * I bit 7 Disable IRQ interrupts + * F bit 6 Disable FIQ interrupts + * T bit 5 Thumb instruction set flag + * M bits 0-4 Processor Mode bits + * + * x64 LAHF+SETO flags + * ------------------- + * SF bit 15 Sign flag + * ZF bit 14 Zero flag + * AF bit 12 Auxiliary flag + * PF bit 10 Parity flag + * CF bit 8 Carry flag + * OF bit 0 Overflow flag + */ + +u32 JitState::Cpsr() const { + return CPSR; +} + +void JitState::SetCpsr(u32 cpsr) { + CPSR = cpsr; +} + void JitState::ResetRSB() { rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull); rsb_codeptrs.fill(0); diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index b765d8d1..44ffd356 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -25,10 +25,13 @@ constexpr size_t SpillCount = 64; struct JitState { JitState() { ResetRSB(); } - u32 Cpsr = 0; std::array Reg{}; // Current register file. // TODO: Mode-specific register sets unimplemented. + u32 CPSR = 0; + u32 Cpsr() const; + void SetCpsr(u32 cpsr); + alignas(u64) std::array ExtReg{}; // Extension registers. std::array Spill{}; // Spill. diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index c8eefece..2458d947 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -115,7 +115,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit, void*) { jit->Regs() = interp_state.Reg; jit->ExtRegs() = interp_state.ExtReg; - jit->Cpsr() = interp_state.Cpsr; + jit->SetCpsr(interp_state.Cpsr); jit->SetFpscr(interp_state.VFP[VFP_FPSCR]); } @@ -233,7 +233,7 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe interp.ExtReg = initial_extregs; interp.VFP[VFP_FPSCR] = initial_fpscr; jit.Reset(); - jit.Cpsr() = initial_cpsr; + jit.SetCpsr(initial_cpsr); jit.Regs() = initial_regs; jit.ExtRegs() = initial_extregs; jit.SetFpscr(initial_fpscr); @@ -369,7 +369,7 @@ TEST_CASE( "arm: Optimization Failure (Randomized test case)", "[arm]" ) { 0x6973b6bb, 0x267ea626, 0x69debf49, 0x8f976895, 0x4ecd2d0d, 0xcf89b8c7, 0xb6713f85, 0x15e2aa5, 0xcd14336a, 0xafca0f3e, 0xace2efd9, 0x68fb82cd, 0x775447c0, 0xc9e1f8cd, 0xebe0e626, 0x0 }; - jit.Cpsr() = 0x000001d0; // User-mode + jit.SetCpsr(0x000001d0); // User-mode jit.Run(6); @@ -407,7 +407,7 @@ TEST_CASE( "arm: shsax r11, sp, r9 (Edge-case)", "[arm]" ) { 0x3a3b8b18, 0x96156555, 0xffef039f, 0xafb946f2, 0x2030a69a, 0xafe09b2a, 0x896823c8, 0xabde0ded, 0x9825d6a6, 0x17498000, 0x999d2c95, 0x8b812a59, 0x209bdb58, 0x2f7fb1d4, 0x0f378107, 0x00000000 }; - jit.Cpsr() = 0x000001d0; // User-mode + jit.SetCpsr(0x000001d0); // User-mode jit.Run(2); @@ -443,7 +443,7 @@ TEST_CASE( "arm: uasx (Edge-case)", "[arm]" ) { jit.Regs()[4] = 0x8ed38f4c; jit.Regs()[5] = 0x0000261d; jit.Regs()[15] = 0x00000000; - jit.Cpsr() = 0x000001d0; // User-mode + jit.SetCpsr(0x000001d0); // User-mode jit.Run(2); @@ -472,7 +472,7 @@ static void RunVfpTests(u32 instr, std::vector tests) { for (const auto& test : tests) { jit.Regs()[15] = 0; - jit.Cpsr() = 0x000001d0; + jit.SetCpsr(0x000001d0); jit.ExtRegs()[4] = test.a; jit.ExtRegs()[6] = test.b; jit.SetFpscr(test.initial_fpscr); @@ -1106,7 +1106,7 @@ TEST_CASE( "SMUAD", "[JitX64]" ) { 0, 0, 0, 0, 0, 0, 0, 0, }; - jit.Cpsr() = 0x000001d0; // User-mode + jit.SetCpsr(0x000001d0); // User-mode jit.Run(6); @@ -1225,7 +1225,7 @@ TEST_CASE("arm: Test InvalidateCacheRange", "[arm]") { code_mem[3] = 0xeafffffe; // b +#0 (infinite loop) jit.Regs() = {}; - jit.Cpsr() = 0x000001d0; // User-mode + jit.SetCpsr(0x000001d0); // User-mode jit.Run(4); diff --git a/tests/arm/fuzz_thumb.cpp b/tests/arm/fuzz_thumb.cpp index 1fec6b2c..e4305c7b 100644 --- a/tests/arm/fuzz_thumb.cpp +++ b/tests/arm/fuzz_thumb.cpp @@ -107,7 +107,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit, void*) { interp_state.Reg[15] &= T ? 0xFFFFFFFE : 0xFFFFFFFC; jit->Regs() = interp_state.Reg; - jit->Cpsr() = interp_state.Cpsr; + jit->SetCpsr(interp_state.Cpsr); } static void Fail() { @@ -204,7 +204,7 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e interp.Cpsr = 0x000001F0; interp.Reg = initial_regs; - jit.Cpsr() = 0x000001F0; + jit.SetCpsr(0x000001F0); jit.Regs() = initial_regs; std::generate_n(code_mem.begin(), instruction_count, instruction_generator); diff --git a/tests/arm/test_thumb_instructions.cpp b/tests/arm/test_thumb_instructions.cpp index 64375b42..ec4a94d3 100644 --- a/tests/arm/test_thumb_instructions.cpp +++ b/tests/arm/test_thumb_instructions.cpp @@ -43,7 +43,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit, void*) { InterpreterMainLoop(&interp_state); jit->Regs() = interp_state.Reg; - jit->Cpsr() = interp_state.Cpsr; + jit->SetCpsr(interp_state.Cpsr); } static void AddTicks(u64) {} @@ -66,7 +66,7 @@ TEST_CASE( "thumb: lsls r0, r1, #2", "[thumb]" ) { jit.Regs()[0] = 1; jit.Regs()[1] = 2; jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); @@ -85,7 +85,7 @@ TEST_CASE( "thumb: lsls r0, r1, #31", "[thumb]" ) { jit.Regs()[0] = 1; jit.Regs()[1] = 0xFFFFFFFF; jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); @@ -103,7 +103,7 @@ TEST_CASE( "thumb: revsh r4, r3", "[thumb]" ) { jit.Regs()[3] = 0x12345678; jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); @@ -121,7 +121,7 @@ TEST_CASE( "thumb: ldr r3, [r3, #28]", "[thumb]" ) { jit.Regs()[3] = 0x12345678; jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); @@ -137,7 +137,7 @@ TEST_CASE( "thumb: blx +#67712", "[thumb]" ) { code_mem[2] = 0xE7FE; // b +#0 jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); @@ -153,7 +153,7 @@ TEST_CASE( "thumb: bl +#234584", "[thumb]" ) { code_mem[2] = 0xE7FE; // b +#0 jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); @@ -169,7 +169,7 @@ TEST_CASE( "thumb: bl -#42", "[thumb]" ) { code_mem[2] = 0xE7FE; // b +#0 jit.Regs()[15] = 0; // PC = 0 - jit.Cpsr() = 0x00000030; // Thumb, User-mode + jit.SetCpsr(0x00000030); // Thumb, User-mode jit.Run(1); From 809ca5fcc202bfbfcd086380a647cd9e16fd5a19 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Dec 2017 14:03:54 +0000 Subject: [PATCH 3/9] jit_state: Split off CPSR.GE --- src/backend_x64/emit_x64.cpp | 118 ++++++++++++++--------------------- src/backend_x64/jitstate.cpp | 21 ++++++- src/backend_x64/jitstate.h | 3 +- 3 files changed, 69 insertions(+), 73 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 11d9b1a6..f07735dc 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -60,9 +60,9 @@ static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) { ASSERT_MSG(false, "Should never happen."); } -static Xbyak::Address MJitStateCpsr() { +static Xbyak::Address MJitStateCpsr_other() { using namespace Xbyak::util; - return dword[r15 + offsetof(JitState, CPSR)]; + return dword[r15 + offsetof(JitState, CPSR_other)]; } static void EraseInstruction(IR::Block& block, IR::Inst* inst) { @@ -219,7 +219,7 @@ void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr()); + code->mov(result, MJitStateCpsr_other()); code->shr(result, 31); reg_alloc.DefineValue(inst, result); } @@ -230,22 +230,22 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr(), flag_mask); + code->or_(MJitStateCpsr_other(), flag_mask); } else { - code->and_(MJitStateCpsr(), ~flag_mask); + code->and_(MJitStateCpsr_other(), ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr(), ~flag_mask); - code->or_(MJitStateCpsr(), to_store); + code->and_(MJitStateCpsr_other(), ~flag_mask); + code->or_(MJitStateCpsr_other(), to_store); } } void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr()); + code->mov(result, MJitStateCpsr_other()); code->shr(result, 30); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -257,22 +257,22 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr(), flag_mask); + code->or_(MJitStateCpsr_other(), flag_mask); } else { - code->and_(MJitStateCpsr(), ~flag_mask); + code->and_(MJitStateCpsr_other(), ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr(), ~flag_mask); - code->or_(MJitStateCpsr(), to_store); + code->and_(MJitStateCpsr_other(), ~flag_mask); + code->or_(MJitStateCpsr_other(), to_store); } } void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr()); + code->mov(result, MJitStateCpsr_other()); code->shr(result, 29); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -284,22 +284,22 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr(), flag_mask); + code->or_(MJitStateCpsr_other(), flag_mask); } else { - code->and_(MJitStateCpsr(), ~flag_mask); + code->and_(MJitStateCpsr_other(), ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr(), ~flag_mask); - code->or_(MJitStateCpsr(), to_store); + code->and_(MJitStateCpsr_other(), ~flag_mask); + code->or_(MJitStateCpsr_other(), to_store); } } void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr()); + code->mov(result, MJitStateCpsr_other()); code->shr(result, 28); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -311,16 +311,16 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr(), flag_mask); + code->or_(MJitStateCpsr_other(), flag_mask); } else { - code->and_(MJitStateCpsr(), ~flag_mask); + code->and_(MJitStateCpsr_other(), ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr(), ~flag_mask); - code->or_(MJitStateCpsr(), to_store); + code->and_(MJitStateCpsr_other(), ~flag_mask); + code->or_(MJitStateCpsr_other(), to_store); } } @@ -330,58 +330,36 @@ void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) - code->or_(MJitStateCpsr(), flag_mask); + code->or_(MJitStateCpsr_other(), flag_mask); } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->or_(MJitStateCpsr(), to_store); + code->or_(MJitStateCpsr_other(), to_store); } } void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 tmp; - - if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { - tmp = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp, 0x01010101); - } - code->mov(result, MJitStateCpsr()); - code->shr(result, 16); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { - code->pdep(result, result, tmp); - } else { - code->and_(result, 0xF); - code->imul(result, result, 0x00204081); - code->and_(result, 0x01010101); - } - code->imul(result, result, 0xFF); + using namespace Xbyak::util; + Xbyak::Xmm result = reg_alloc.ScratchXmm(); + code->movd(result, dword[r15 + offsetof(JitState, CPSR_ge)]); reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - constexpr size_t flag_bit = 16; - constexpr u32 flag_mask = 0xFu << flag_bit; + using namespace Xbyak::util; + auto args = reg_alloc.GetArgumentInfo(inst); ASSERT(!args[0].IsImmediate()); - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); - - if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { - Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp, 0x80808080); - code->pext(to_store, to_store, tmp); + if (args[0].IsInXmm()) { + Xbyak::Xmm to_store = reg_alloc.UseXmm(args[0]); + code->movd(dword[r15 + offsetof(JitState, CPSR_ge)], to_store); } else { - code->and_(to_store, 0x80808080); - code->imul(to_store, to_store, 0x00204081); - code->shr(to_store, 28); + Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[0]).cvt32(); + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], to_store); } - - code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr(), ~flag_mask); - code->or_(MJitStateCpsr(), to_store); } void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { @@ -404,11 +382,11 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { if (Common::Bit<0>(new_pc)) { new_pc &= 0xFFFFFFFE; code->mov(MJitStateReg(Arm::Reg::PC), new_pc); - code->or_(MJitStateCpsr(), T_bit); + code->or_(MJitStateCpsr_other(), T_bit); } else { new_pc &= 0xFFFFFFFC; code->mov(MJitStateReg(Arm::Reg::PC), new_pc); - code->and_(MJitStateCpsr(), ~T_bit); + code->and_(MJitStateCpsr_other(), ~T_bit); } } else { using Xbyak::util::ptr; @@ -417,13 +395,13 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 tmp1 = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 tmp2 = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp1, MJitStateCpsr()); + code->mov(tmp1, MJitStateCpsr_other()); code->mov(tmp2, tmp1); code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0 code->or_(tmp1, u32(T_bit)); // CPSR.T = 1 code->test(new_pc, u32(1)); code->cmove(tmp1, tmp2); // CPSR.T = pc & 1 - code->mov(MJitStateCpsr(), tmp1); + code->mov(MJitStateCpsr_other(), tmp1); code->lea(tmp2, ptr[new_pc.cvt64() + new_pc.cvt64() * 1]); code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code->and_(new_pc, tmp2); @@ -3248,7 +3226,7 @@ static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) { Xbyak::Label label; const Xbyak::Reg32 cpsr = eax; - code->mov(cpsr, MJitStateCpsr()); + code->mov(cpsr, MJitStateCpsr_other()); constexpr size_t n_shift = 31; constexpr size_t z_shift = 30; @@ -3398,16 +3376,16 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor if (terminal.next.TFlag() != initial_location.TFlag()) { if (terminal.next.TFlag()) { - code->or_(MJitStateCpsr(), u32(1 << 5)); + code->or_(MJitStateCpsr_other(), u32(1 << 5)); } else { - code->and_(MJitStateCpsr(), u32(~(1 << 5))); + code->and_(MJitStateCpsr_other(), u32(~(1 << 5))); } } if (terminal.next.EFlag() != initial_location.EFlag()) { if (terminal.next.EFlag()) { - code->or_(MJitStateCpsr(), u32(1 << 9)); + code->or_(MJitStateCpsr_other(), u32(1 << 9)); } else { - code->and_(MJitStateCpsr(), u32(~(1 << 9))); + code->and_(MJitStateCpsr_other(), u32(~(1 << 9))); } } @@ -3436,16 +3414,16 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescrip if (terminal.next.TFlag() != initial_location.TFlag()) { if (terminal.next.TFlag()) { - code->or_(MJitStateCpsr(), u32(1 << 5)); + code->or_(MJitStateCpsr_other(), u32(1 << 5)); } else { - code->and_(MJitStateCpsr(), u32(~(1 << 5))); + code->and_(MJitStateCpsr_other(), u32(~(1 << 5))); } } if (terminal.next.EFlag() != initial_location.EFlag()) { if (terminal.next.EFlag()) { - code->or_(MJitStateCpsr(), u32(1 << 9)); + code->or_(MJitStateCpsr_other(), u32(1 << 9)); } else { - code->and_(MJitStateCpsr(), u32(~(1 << 9))); + code->and_(MJitStateCpsr_other(), u32(~(1 << 9))); } } @@ -3461,7 +3439,7 @@ void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) { using namespace Xbyak::util; // This calculation has to match up with IREmitter::PushRSB - code->mov(ebx, MJitStateCpsr()); + code->mov(ebx, MJitStateCpsr_other()); code->mov(ecx, MJitStateReg(Arm::Reg::PC)); code->and_(ebx, u32((1 << 5) | (1 << 9))); code->shr(ebx, 2); diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 379eafb5..35649ffa 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -45,11 +45,28 @@ namespace BackendX64 { */ u32 JitState::Cpsr() const { - return CPSR; + u32 cpsr = 0; + + // GE flags + cpsr |= Common::Bit<31>(CPSR_ge) ? 1 << 19 : 0; + cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0; + cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0; + cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0; + // Other flags + cpsr |= CPSR_other; + + return cpsr; } void JitState::SetCpsr(u32 cpsr) { - CPSR = cpsr; + // GE flags + CPSR_ge = 0; + CPSR_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0; + CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0; + CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0; + CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0; + // Other flags + CPSR_other = cpsr & 0xFFF0FFFF; } void JitState::ResetRSB() { diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 44ffd356..befa1d38 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -28,7 +28,8 @@ struct JitState { std::array Reg{}; // Current register file. // TODO: Mode-specific register sets unimplemented. - u32 CPSR = 0; + u32 CPSR_other = 0; + u32 CPSR_ge = 0; u32 Cpsr() const; void SetCpsr(u32 cpsr); From 2e6eda226cfc4061fb34c723cc8248e15f12030e Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Dec 2017 15:24:10 +0000 Subject: [PATCH 4/9] jit_state: Split off CPSR.{E,T} This allows us to improve code-emission for PopRSBHint. We also improve code emission other terminals at the same time. --- docs/ReturnStackBufferOptimization.md | 16 ++-- src/backend_x64/emit_x64.cpp | 105 ++++++++++++-------------- src/backend_x64/jitstate.cpp | 9 ++- src/backend_x64/jitstate.h | 1 + src/frontend/ir/location_descriptor.h | 8 +- 5 files changed, 67 insertions(+), 72 deletions(-) diff --git a/docs/ReturnStackBufferOptimization.md b/docs/ReturnStackBufferOptimization.md index caf2f423..e5298cad 100644 --- a/docs/ReturnStackBufferOptimization.md +++ b/docs/ReturnStackBufferOptimization.md @@ -26,10 +26,10 @@ computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. u64 LocationDescriptor::UniqueHash() const { // This value MUST BE UNIQUE. // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc); - u64 fpscr_u64 = u64(fpscr.Value()) << 32; - u64 t_u64 = cpsr.T() ? (1ull << 35) : 0; - u64 e_u64 = cpsr.E() ? (1ull << 39) : 0; + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; return pc_u64 | fpscr_u64 | t_u64 | e_u64; } @@ -120,12 +120,10 @@ To check if a predicition is in the RSB, we linearly scan the RSB. using namespace Xbyak::util; // This calculation has to match up with IREmitter::PushRSB - code->mov(ebx, MJitStateCpsr()); code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->and_(ebx, u32((1 << 5) | (1 << 9))); - code->shr(ebx, 2); - code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); - code->shl(rbx, 32); + code->shl(rcx, 32); + code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]); code->or_(rbx, rcx); code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index f07735dc..c991a739 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -362,12 +362,12 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } } -void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + using namespace Xbyak::util; + auto args = reg_alloc.GetArgumentInfo(inst); auto& arg = args[0]; - const u32 T_bit = 1 << 5; - // Pseudocode: // if (new_pc & 1) { // new_pc &= 0xFFFFFFFE; @@ -376,36 +376,41 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { // new_pc &= 0xFFFFFFFC; // cpsr.T = false; // } + // We rely on the fact we disallow EFlag from changing within a block. if (arg.IsImmediate()) { u32 new_pc = arg.GetImmediateU32(); - if (Common::Bit<0>(new_pc)) { - new_pc &= 0xFFFFFFFE; - code->mov(MJitStateReg(Arm::Reg::PC), new_pc); - code->or_(MJitStateCpsr_other(), T_bit); - } else { - new_pc &= 0xFFFFFFFC; - code->mov(MJitStateReg(Arm::Reg::PC), new_pc); - code->and_(MJitStateCpsr_other(), ~T_bit); - } + u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC; + u32 et = 0; + et |= block.Location().EFlag() ? 2 : 0; + et |= Common::Bit<0>(new_pc) ? 1 : 0; + + code->mov(MJitStateReg(Arm::Reg::PC), new_pc & mask); + code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et); } else { - using Xbyak::util::ptr; + if (block.Location().EFlag()) { + Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 et = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); - Xbyak::Reg32 tmp1 = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 tmp2 = reg_alloc.ScratchGpr().cvt32(); + code->mov(mask, new_pc); + code->and_(mask, 1); + code->lea(et, ptr[mask.cvt64() + 2]); + code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et); + code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code->and_(new_pc, mask); + code->mov(MJitStateReg(Arm::Reg::PC), new_pc); + } else { + Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp1, MJitStateCpsr_other()); - code->mov(tmp2, tmp1); - code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0 - code->or_(tmp1, u32(T_bit)); // CPSR.T = 1 - code->test(new_pc, u32(1)); - code->cmove(tmp1, tmp2); // CPSR.T = pc & 1 - code->mov(MJitStateCpsr_other(), tmp1); - code->lea(tmp2, ptr[new_pc.cvt64() + new_pc.cvt64() * 1]); - code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC - code->and_(new_pc, tmp2); - code->mov(MJitStateReg(Arm::Reg::PC), new_pc); + code->mov(mask, new_pc); + code->and_(mask, 1); + code->mov(dword[r15 + offsetof(JitState, CPSR_et)], mask); + code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code->and_(new_pc, mask); + code->mov(MJitStateReg(Arm::Reg::PC), new_pc); + } } } @@ -3371,22 +3376,18 @@ void EmitX64::EmitTerminal(IR::Term::ReturnToDispatch, IR::LocationDescriptor) { code->ReturnFromRunCode(); } +static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) { + u32 et = 0; + et |= desc.EFlag() ? 2 : 0; + et |= desc.TFlag() ? 1 : 0; + return et; +} + void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { using namespace Xbyak::util; - if (terminal.next.TFlag() != initial_location.TFlag()) { - if (terminal.next.TFlag()) { - code->or_(MJitStateCpsr_other(), u32(1 << 5)); - } else { - code->and_(MJitStateCpsr_other(), u32(~(1 << 5))); - } - } - if (terminal.next.EFlag() != initial_location.EFlag()) { - if (terminal.next.EFlag()) { - code->or_(MJitStateCpsr_other(), u32(1 << 9)); - } else { - code->and_(MJitStateCpsr_other(), u32(~(1 << 9))); - } + if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { + code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0); @@ -3412,19 +3413,8 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { using namespace Xbyak::util; - if (terminal.next.TFlag() != initial_location.TFlag()) { - if (terminal.next.TFlag()) { - code->or_(MJitStateCpsr_other(), u32(1 << 5)); - } else { - code->and_(MJitStateCpsr_other(), u32(~(1 << 5))); - } - } - if (terminal.next.EFlag() != initial_location.EFlag()) { - if (terminal.next.EFlag()) { - code->or_(MJitStateCpsr_other(), u32(1 << 9)); - } else { - code->and_(MJitStateCpsr_other(), u32(~(1 << 9))); - } + if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { + code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr()); @@ -3439,12 +3429,11 @@ void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) { using namespace Xbyak::util; // This calculation has to match up with IREmitter::PushRSB - code->mov(ebx, MJitStateCpsr_other()); + // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->and_(ebx, u32((1 << 5) | (1 << 9))); - code->shr(ebx, 2); - code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); - code->shl(rbx, 32); + code->shl(rcx, 32); + code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]); code->or_(rbx, rcx); code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]); diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 35649ffa..715f77a8 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -52,6 +52,9 @@ u32 JitState::Cpsr() const { cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0; cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0; cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0; + // E flag, T flag + cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0; + cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0; // Other flags cpsr |= CPSR_other; @@ -65,8 +68,12 @@ void JitState::SetCpsr(u32 cpsr) { CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0; CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0; CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0; + // E flag, T flag + CPSR_et = 0; + CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0; + CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0; // Other flags - CPSR_other = cpsr & 0xFFF0FFFF; + CPSR_other = cpsr & 0xFFF0FDDF; } void JitState::ResetRSB() { diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index befa1d38..911cdb2b 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -30,6 +30,7 @@ struct JitState { u32 CPSR_other = 0; u32 CPSR_ge = 0; + u32 CPSR_et = 0; u32 Cpsr() const; void SetCpsr(u32 cpsr); diff --git a/src/frontend/ir/location_descriptor.h b/src/frontend/ir/location_descriptor.h index 5eaf913f..cf4f3ff2 100644 --- a/src/frontend/ir/location_descriptor.h +++ b/src/frontend/ir/location_descriptor.h @@ -75,10 +75,10 @@ public: u64 UniqueHash() const { // This value MUST BE UNIQUE. // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc); - u64 fpscr_u64 = u64(fpscr.Value()) << 32; - u64 t_u64 = cpsr.T() ? (1ull << 35) : 0; - u64 e_u64 = cpsr.E() ? (1ull << 39) : 0; + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; return pc_u64 | fpscr_u64 | t_u64 | e_u64; } From cfdc8d882f93c4e3537e9c5cc43a6075a585dfc1 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Dec 2017 15:54:48 +0000 Subject: [PATCH 5/9] jit_state: Split off CPSR.Q --- src/backend_x64/emit_x64.cpp | 11 +++++------ src/backend_x64/jitstate.cpp | 6 +++++- src/backend_x64/jitstate.h | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index c991a739..085bce5a 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -325,17 +325,16 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - constexpr size_t flag_bit = 27; - constexpr u32 flag_mask = 1u << flag_bit; + using namespace Xbyak::util; + auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) - code->or_(MJitStateCpsr_other(), flag_mask); + code->mov(dword[r15 + offsetof(JitState, CPSR_q)], 1); } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg8 to_store = reg_alloc.UseGpr(args[0]).cvt8(); - code->shl(to_store, flag_bit); - code->or_(MJitStateCpsr_other(), to_store); + code->or_(code->byte[r15 + offsetof(JitState, CPSR_q)], to_store); } } diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 715f77a8..9898a3d8 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -47,6 +47,8 @@ namespace BackendX64 { u32 JitState::Cpsr() const { u32 cpsr = 0; + // Q flag + cpsr |= CPSR_q ? 1 << 27 : 0; // GE flags cpsr |= Common::Bit<31>(CPSR_ge) ? 1 << 19 : 0; cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0; @@ -62,6 +64,8 @@ u32 JitState::Cpsr() const { } void JitState::SetCpsr(u32 cpsr) { + // Q flag + CPSR_q = Common::Bit<27>(cpsr) ? 1 : 0; // GE flags CPSR_ge = 0; CPSR_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0; @@ -73,7 +77,7 @@ void JitState::SetCpsr(u32 cpsr) { CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0; CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0; // Other flags - CPSR_other = cpsr & 0xFFF0FDDF; + CPSR_other = cpsr & 0xF7F0FDDF; } void JitState::ResetRSB() { diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 911cdb2b..3695b8e8 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -31,6 +31,7 @@ struct JitState { u32 CPSR_other = 0; u32 CPSR_ge = 0; u32 CPSR_et = 0; + u32 CPSR_q = 0; u32 Cpsr() const; void SetCpsr(u32 cpsr); From 976a098bf626aa8a6f441a7ed551e4277a77cebd Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 9 Dec 2017 15:42:47 +0000 Subject: [PATCH 6/9] jit_state: Split off CPSR.NZCV --- src/backend_x64/emit_x64.cpp | 151 ++++++++++-------- src/backend_x64/jitstate.cpp | 13 +- src/backend_x64/jitstate.h | 4 +- src/frontend/ir/ir_emitter.cpp | 12 ++ src/frontend/ir/ir_emitter.h | 3 + src/frontend/ir/microinstruction.cpp | 3 + src/frontend/ir/opcodes.inc | 3 + .../translate/translate_arm/coprocessor.cpp | 3 +- .../translate_arm/status_register_access.cpp | 31 ++-- src/frontend/translate/translate_arm/vfp2.cpp | 3 +- tests/arm/fuzz_arm.cpp | 34 +++- tests/arm/fuzz_thumb.cpp | 16 +- 12 files changed, 182 insertions(+), 94 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 085bce5a..dc111a23 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -29,6 +29,8 @@ namespace Dynarmic { namespace BackendX64 { +using namespace Xbyak::util; + constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_nan = 0x7fc00000u; constexpr u64 f32_non_sign_mask = 0x7fffffffu; @@ -43,12 +45,10 @@ constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double static Xbyak::Address MJitStateReg(Arm::Reg reg) { - using namespace Xbyak::util; return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast(reg)]; } static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) { - using namespace Xbyak::util; if (Arm::IsSingleExtReg(reg)) { size_t index = static_cast(reg) - static_cast(Arm::ExtReg::S0); return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index]; @@ -60,11 +60,6 @@ static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) { ASSERT_MSG(false, "Should never happen."); } -static Xbyak::Address MJitStateCpsr_other() { - using namespace Xbyak::util; - return dword[r15 + offsetof(JitState, CPSR_other)]; -} - static void EraseInstruction(IR::Block& block, IR::Inst* inst) { block.Instructions().erase(inst); inst->Invalidate(); @@ -217,9 +212,40 @@ void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { code->CallFunction(&SetCpsrImpl); } +void EmitX64::EmitSetCpsrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); + } else { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code->and_(a, 0xF0000000); + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a); + } +} + +void EmitX64::EmitSetCpsrNZCVQ(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); + code->mov(code->byte[r15 + offsetof(JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + } else { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code->bt(a, 27); + code->setc(code->byte[r15 + offsetof(JitState, CPSR_q)]); + code->and_(a, 0xF0000000); + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a); + } +} + void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 31); reg_alloc.DefineValue(inst, result); } @@ -230,22 +256,22 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 30); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -257,22 +283,22 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 29); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -284,22 +310,22 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 28); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -311,22 +337,20 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) @@ -339,16 +363,12 @@ void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->movd(result, dword[r15 + offsetof(JitState, CPSR_ge)]); reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); ASSERT(!args[0].IsImmediate()); @@ -361,9 +381,39 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } } -void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - using namespace Xbyak::util; +void EmitX64::EmitSetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + u32 ge = 0; + ge |= Common::Bit<19>(imm) ? 0xFF000000 : 0; + ge |= Common::Bit<18>(imm) ? 0x00FF0000 : 0; + ge |= Common::Bit<17>(imm) ? 0x0000FF00 : 0; + ge |= Common::Bit<16>(imm) ? 0x000000FF : 0; + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], ge); + } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32(); + + code->mov(b, 0x01010101); + code->shr(a, 16); + code->pdep(a, a, b); + code->imul(a, a, 0xFF); + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a); + } else { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code->shr(a, 16); + code->and_(a, 0xF); + code->imul(a, a, 0x00204081); + code->and_(a, 0x01010101); + code->imul(a, a, 0xFF); + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a); + } +} + +void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); auto& arg = args[0]; @@ -414,8 +464,6 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* ins } void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - reg_alloc.HostCall(nullptr); code->SwitchMxcsrOnExit(); @@ -458,16 +506,12 @@ void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]); reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32(); @@ -2140,7 +2184,6 @@ void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) } static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; // We need to report back whether we've found a denormal on input. @@ -2157,7 +2200,6 @@ static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R } static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; auto mask = code->MConst(f64_non_sign_mask); @@ -2176,7 +2218,6 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R } static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; code->movd(gpr_scratch, xmm_value); @@ -2190,7 +2231,6 @@ static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 } static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; auto mask = code->MConst(f64_non_sign_mask); @@ -2430,7 +2470,6 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) { reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32(); - using namespace Xbyak::util; code->mov(nzcv, 0x28630000); code->sete(cl); @@ -2749,14 +2788,10 @@ void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) { - using namespace Xbyak::util; - code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0)); } void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32(); @@ -2775,7 +2810,6 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U return; } - using namespace Xbyak::util; reg_alloc.UseScratch(args[0], ABI_PARAM1); @@ -2829,7 +2863,6 @@ static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, return; } - using namespace Xbyak::util; reg_alloc.ScratchGpr({ABI_RETURN}); reg_alloc.UseScratch(args[0], ABI_PARAM1); @@ -2916,7 +2949,6 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. - using namespace Xbyak::util; Xbyak::Label end; code->mov(passed, u32(1)); @@ -3219,18 +3251,15 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in } void EmitX64::EmitAddCycles(size_t cycles) { - using namespace Xbyak::util; ASSERT(cycles < std::numeric_limits::max()); code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast(cycles)); } static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) { - using namespace Xbyak::util; - Xbyak::Label label; const Xbyak::Reg32 cpsr = eax; - code->mov(cpsr, MJitStateCpsr_other()); + code->mov(cpsr, dword[r15 + offsetof(JitState, CPSR_nzcv)]); constexpr size_t n_shift = 31; constexpr size_t z_shift = 30; @@ -3383,8 +3412,6 @@ static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) { } void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; - if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } @@ -3410,8 +3437,6 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor } void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; - if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } @@ -3425,8 +3450,6 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescrip } void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) { - using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. code->mov(ecx, MJitStateReg(Arm::Reg::PC)); @@ -3453,8 +3476,6 @@ void EmitX64::EmitTerminal(IR::Term::If terminal, IR::LocationDescriptor initial } void EmitX64::EmitTerminal(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; - code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0)); code->jne(code->GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location); diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 9898a3d8..7101c5ce 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -45,8 +45,15 @@ namespace BackendX64 { */ u32 JitState::Cpsr() const { + ASSERT((CPSR_nzcv & ~0xF0000000) == 0); + ASSERT((CPSR_q & ~1) == 0); + ASSERT((CPSR_et & ~3) == 0); + ASSERT((CPSR_jaifm & ~0x010001DF) == 0); + u32 cpsr = 0; + // NZCV flags + cpsr |= CPSR_nzcv; // Q flag cpsr |= CPSR_q ? 1 << 27 : 0; // GE flags @@ -58,12 +65,14 @@ u32 JitState::Cpsr() const { cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0; cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0; // Other flags - cpsr |= CPSR_other; + cpsr |= CPSR_jaifm; return cpsr; } void JitState::SetCpsr(u32 cpsr) { + // NZCV flags + CPSR_nzcv = cpsr & 0xF0000000; // Q flag CPSR_q = Common::Bit<27>(cpsr) ? 1 : 0; // GE flags @@ -77,7 +86,7 @@ void JitState::SetCpsr(u32 cpsr) { CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0; CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0; // Other flags - CPSR_other = cpsr & 0xF7F0FDDF; + CPSR_jaifm = cpsr & 0x07F0FDDF; } void JitState::ResetRSB() { diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 3695b8e8..2916c26d 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -28,10 +28,12 @@ struct JitState { std::array Reg{}; // Current register file. // TODO: Mode-specific register sets unimplemented. - u32 CPSR_other = 0; u32 CPSR_ge = 0; u32 CPSR_et = 0; u32 CPSR_q = 0; + u32 CPSR_nzcv = 0; + u32 CPSR_jaifm = 0; + u32 Cpsr() const; void SetCpsr(u32 cpsr); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 5c8ad05e..7250e5b8 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -117,6 +117,14 @@ void IREmitter::SetCpsr(const Value& value) { Inst(Opcode::SetCpsr, {value}); } +void IREmitter::SetCpsrNZCV(const Value& value) { + Inst(Opcode::SetCpsrNZCV, {value}); +} + +void IREmitter::SetCpsrNZCVQ(const Value& value) { + Inst(Opcode::SetCpsrNZCVQ, {value}); +} + Value IREmitter::GetCFlag() { return Inst(Opcode::GetCFlag, {}); } @@ -149,6 +157,10 @@ void IREmitter::SetGEFlags(const Value& value) { Inst(Opcode::SetGEFlags, {value}); } +void IREmitter::SetGEFlagsCompressed(const Value& value) { + Inst(Opcode::SetGEFlagsCompressed, {value}); +} + Value IREmitter::GetFpscr() { return Inst(Opcode::GetFpscr, {}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a6391bd3..2bda008d 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -84,6 +84,8 @@ public: Value GetCpsr(); void SetCpsr(const Value& value); + void SetCpsrNZCV(const Value& value); + void SetCpsrNZCVQ(const Value& value); Value GetCFlag(); void SetNFlag(const Value& value); void SetZFlag(const Value& value); @@ -92,6 +94,7 @@ public: void OrQFlag(const Value& value); Value GetGEFlags(); void SetGEFlags(const Value& value); + void SetGEFlagsCompressed(const Value& value); Value GetFpscr(); void SetFpscr(const Value& new_fpscr); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 7f7a42a8..9882752a 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -112,12 +112,15 @@ bool Inst::ReadsFromCPSR() const { bool Inst::WritesToCPSR() const { switch (op) { case Opcode::SetCpsr: + case Opcode::SetCpsrNZCV: + case Opcode::SetCpsrNZCVQ: case Opcode::SetNFlag: case Opcode::SetZFlag: case Opcode::SetCFlag: case Opcode::SetVFlag: case Opcode::OrQFlag: case Opcode::SetGEFlags: + case Opcode::SetGEFlagsCompressed: return true; default: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index dc1b594d..6c41782e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -13,6 +13,8 @@ OPCODE(SetExtendedRegister32, T::Void, T::ExtRegRef, T::F32 OPCODE(SetExtendedRegister64, T::Void, T::ExtRegRef, T::F64 ) OPCODE(GetCpsr, T::U32, ) OPCODE(SetCpsr, T::Void, T::U32 ) +OPCODE(SetCpsrNZCV, T::Void, T::U32 ) +OPCODE(SetCpsrNZCVQ, T::Void, T::U32 ) OPCODE(GetNFlag, T::U1, ) OPCODE(SetNFlag, T::Void, T::U1 ) OPCODE(GetZFlag, T::U1, ) @@ -24,6 +26,7 @@ OPCODE(SetVFlag, T::Void, T::U1 OPCODE(OrQFlag, T::Void, T::U1 ) OPCODE(GetGEFlags, T::U32, ) OPCODE(SetGEFlags, T::Void, T::U32 ) +OPCODE(SetGEFlagsCompressed, T::Void, T::U32 ) OPCODE(BXWritePC, T::Void, T::U32 ) OPCODE(CallSupervisor, T::Void, T::U32 ) OPCODE(GetFpscr, T::U32, ) diff --git a/src/frontend/translate/translate_arm/coprocessor.cpp b/src/frontend/translate/translate_arm/coprocessor.cpp index 60728e69..5f5e7a09 100644 --- a/src/frontend/translate/translate_arm/coprocessor.cpp +++ b/src/frontend/translate/translate_arm/coprocessor.cpp @@ -92,9 +92,8 @@ bool ArmTranslatorVisitor::arm_MRC(Cond cond, size_t opc1, CoprocReg CRn, Reg t, if (t != Reg::PC) { ir.SetRegister(t, word); } else { - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(0x0FFFFFFF)); auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000)); - ir.SetCpsr(ir.Or(old_cpsr, new_cpsr_nzcv)); + ir.SetCpsrNZCV(new_cpsr_nzcv); } } return true; diff --git a/src/frontend/translate/translate_arm/status_register_access.cpp b/src/frontend/translate/translate_arm/status_register_access.cpp index f737f5eb..327b31c5 100644 --- a/src/frontend/translate/translate_arm/status_register_access.cpp +++ b/src/frontend/translate/translate_arm/status_register_access.cpp @@ -6,6 +6,8 @@ #include "translate_arm.h" +#include "common/bit_util.h" + namespace Dynarmic { namespace Arm { @@ -30,14 +32,12 @@ bool ArmTranslatorVisitor::arm_MSR_imm(Cond cond, int mask, int rotate, Imm8 imm ASSERT_MSG(write_nzcvq || write_g, "Decode error"); // MSR , # if (ConditionPassed(cond)) { - u32 cpsr_mask = 0; - if (write_nzcvq) - cpsr_mask |= 0xF8000000; - if (write_g) - cpsr_mask |= 0x000F0000; - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(~cpsr_mask)); - auto new_cpsr = ir.Imm32(imm32 & cpsr_mask); - ir.SetCpsr(ir.Or(old_cpsr, new_cpsr)); + if (write_nzcvq) { + ir.SetCpsrNZCVQ(ir.Imm32(imm32 & 0xF8000000)); + } + if (write_g) { + ir.SetGEFlagsCompressed(ir.Imm32(imm32 & 0x000F0000)); + } } return true; } @@ -51,14 +51,13 @@ bool ArmTranslatorVisitor::arm_MSR_reg(Cond cond, int mask, Reg n) { return UnpredictableInstruction(); // MSR , # if (ConditionPassed(cond)) { - u32 cpsr_mask = 0; - if (write_nzcvq) - cpsr_mask |= 0xF8000000; - if (write_g) - cpsr_mask |= 0x000F0000; - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(~cpsr_mask)); - auto new_cpsr = ir.And(ir.GetRegister(n), ir.Imm32(cpsr_mask)); - ir.SetCpsr(ir.Or(old_cpsr, new_cpsr)); + auto value = ir.GetRegister(n); + if (write_nzcvq){ + ir.SetCpsrNZCVQ(ir.And(value, ir.Imm32(0xF8000000))); + } + if (write_g){ + ir.SetGEFlagsCompressed(ir.And(value, ir.Imm32(0x000F0000))); + } } return true; } diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp index 1f6d3285..4fa36606 100644 --- a/src/frontend/translate/translate_arm/vfp2.cpp +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -543,8 +543,7 @@ bool ArmTranslatorVisitor::vfp2_VMRS(Cond cond, Reg t) { if (t == Reg::R15) { // This encodes ASPR_nzcv access auto nzcv = ir.GetFpscrNZCV(); - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(0x0FFFFFFF)); - ir.SetCpsr(ir.Or(nzcv, old_cpsr)); + ir.SetCpsrNZCV(nzcv); } else { ir.SetRegister(t, ir.GetFpscr()); } diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index 2458d947..74547148 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -196,7 +196,7 @@ static bool DoesBehaviorMatch(const ARMul_State& interp, const Dynarmic::Jit& ji return interp.Reg == jit.Regs() && interp.ExtReg == jit.ExtRegs() && interp.Cpsr == jit.Cpsr() - && interp.VFP[VFP_FPSCR] == jit.Fpscr() + //&& interp.VFP[VFP_FPSCR] == jit.Fpscr() && interp_write_records == jit_write_records; } @@ -1155,6 +1155,38 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") { } } +TEST_CASE("Test ARM MSR instructions", "[JitX64]") { + const auto is_msr_valid = [](u32 instr) -> bool { + return Bits<18, 19>(instr) != 0; + }; + + const auto is_msr_reg_valid = [&is_msr_valid](u32 instr) -> bool { + return is_msr_valid(instr) && Bits<0, 3>(instr) != 15; + }; + + const auto is_mrs_valid = [&](u32 inst) -> bool { + return Bits<12, 15>(inst) != 15; + }; + + const std::array instructions = {{ + InstructionGenerator("cccc00110010mm001111rrrrvvvvvvvv", is_msr_valid), // MSR (imm) + InstructionGenerator("cccc00010010mm00111100000000nnnn", is_msr_reg_valid), // MSR (reg) + InstructionGenerator("cccc000100001111dddd000000000000", is_mrs_valid), // MRS + }}; + + SECTION("Ones") { + FuzzJitArm(1, 2, 10000, [&instructions]() -> u32 { + return instructions[RandInt(0, instructions.size() - 1)].Generate(); + }); + } + + SECTION("Fives") { + FuzzJitArm(5, 6, 10000, [&instructions]() -> u32 { + return instructions[RandInt(0, instructions.size() - 1)].Generate(); + }); + } +} + TEST_CASE("Fuzz ARM saturated add/sub instructions", "[JitX64]") { auto is_valid = [](u32 inst) -> bool { // R15 as Rd, Rn, or Rm is UNPREDICTABLE diff --git a/tests/arm/fuzz_thumb.cpp b/tests/arm/fuzz_thumb.cpp index e4305c7b..519ce013 100644 --- a/tests/arm/fuzz_thumb.cpp +++ b/tests/arm/fuzz_thumb.cpp @@ -258,11 +258,17 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e Dynarmic::Arm::PSR cpsr; cpsr.T(true); - Dynarmic::IR::Block ir_block = Dynarmic::Arm::Translate({0, cpsr, Dynarmic::Arm::FPSCR{}}, MemoryReadCode); - Dynarmic::Optimization::GetSetElimination(ir_block); - Dynarmic::Optimization::DeadCodeElimination(ir_block); - Dynarmic::Optimization::VerificationPass(ir_block); - printf("\n\nIR:\n%s", Dynarmic::IR::DumpBlock(ir_block).c_str()); + size_t num_insts = 0; + while (num_insts < instructions_to_execute_count) { + Dynarmic::IR::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, Dynarmic::Arm::FPSCR{}}; + Dynarmic::IR::Block ir_block = Dynarmic::Arm::Translate(descriptor, &MemoryReadCode); + Dynarmic::Optimization::GetSetElimination(ir_block); + Dynarmic::Optimization::DeadCodeElimination(ir_block); + Dynarmic::Optimization::VerificationPass(ir_block); + printf("\n\nIR:\n%s", Dynarmic::IR::DumpBlock(ir_block).c_str()); + printf("\n\nx86_64:\n%s", jit.Disassemble(descriptor).c_str()); + num_insts += ir_block.CycleCount(); + } #ifdef _MSC_VER __debugbreak(); From c823ecf52488ac6d94728e65afc29a93298b8b0b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 3 Dec 2017 18:25:40 +0000 Subject: [PATCH 7/9] interface: Allow saving and storing of contexts --- include/dynarmic/context.h | 44 +++++++++++ include/dynarmic/dynarmic.h | 6 ++ src/backend_x64/interface_x64.cpp | 89 ++++++++++++++++++++++ src/frontend/translate/translate_thumb.cpp | 2 +- 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 include/dynarmic/context.h diff --git a/include/dynarmic/context.h b/include/dynarmic/context.h new file mode 100644 index 00000000..8276a8ae --- /dev/null +++ b/include/dynarmic/context.h @@ -0,0 +1,44 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include + +namespace Dynarmic { + +struct Context { +public: + Context(); + ~Context(); + Context(const Context&); + Context(Context&&); + Context& operator=(const Context&); + Context& operator=(Context&&); + + /// View and modify registers. + std::array& Regs(); + const std::array& Regs() const; + std::array& ExtRegs(); + const std::array& ExtRegs() const; + + /// View and modify CPSR. + std::uint32_t Cpsr() const; + void SetCpsr(std::uint32_t value); + + /// View and modify FPSCR. + std::uint32_t Fpscr() const; + void SetFpscr(std::uint32_t value); + +private: + friend class Jit; + struct Impl; + std::unique_ptr impl; +}; + +} // namespace Dynarmic diff --git a/include/dynarmic/dynarmic.h b/include/dynarmic/dynarmic.h index d5b82886..1bfcc2a0 100644 --- a/include/dynarmic/dynarmic.h +++ b/include/dynarmic/dynarmic.h @@ -15,6 +15,8 @@ namespace Dynarmic { +struct Context; + namespace IR { class LocationDescriptor; } @@ -70,6 +72,10 @@ public: std::uint32_t Fpscr() const; void SetFpscr(std::uint32_t value); + Context SaveContext() const; + void SaveContext(Context&) const; + void LoadContext(const Context&); + /** * Returns true if Jit::Run was called but hasn't returned yet. * i.e.: We're in a callback. diff --git a/src/backend_x64/interface_x64.cpp b/src/backend_x64/interface_x64.cpp index ad991a36..945391fe 100644 --- a/src/backend_x64/interface_x64.cpp +++ b/src/backend_x64/interface_x64.cpp @@ -20,6 +20,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/scope_exit.h" +#include "dynarmic/context.h" #include "dynarmic/dynarmic.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/location_descriptor.h" @@ -45,6 +46,7 @@ struct Jit::Impl { const UserCallbacks callbacks; // Requests made during execution to invalidate the cache are queued up here. + size_t invalid_cache_generation = 0; boost::icl::interval_set invalid_cache_ranges; bool invalidate_entire_cache = false; @@ -98,6 +100,7 @@ struct Jit::Impl { invalid_cache_ranges.clear(); invalidate_entire_cache = false; + invalid_cache_generation++; return; } @@ -108,6 +111,7 @@ struct Jit::Impl { jit_state.ResetRSB(); emitter.InvalidateCacheRanges(invalid_cache_ranges); invalid_cache_ranges.clear(); + invalid_cache_generation++; } void RequestCacheInvalidation() { @@ -221,6 +225,91 @@ void Jit::SetFpscr(u32 value) { return impl->jit_state.SetFpscr(value); } +Context Jit::SaveContext() const { + Context ctx; + SaveContext(ctx); + return ctx; +} + +struct Context::Impl { + JitState jit_state; + size_t invalid_cache_generation; +}; + +Context::Context() : impl(std::make_unique()) { impl->jit_state.ResetRSB(); } +Context::~Context() = default; +Context::Context(const Context& ctx) : impl(std::make_unique(*ctx.impl)) {} +Context::Context(Context&& ctx) : impl(std::move(ctx.impl)) {} +Context& Context::operator=(const Context& ctx) { + *impl = *ctx.impl; + return *this; +} +Context& Context::operator=(Context&& ctx) { + impl = std::move(ctx.impl); + return *this; +} + +std::array& Context::Regs() { + return impl->jit_state.Reg; +} +const std::array& Context::Regs() const { + return impl->jit_state.Reg; +} +std::array& Context::ExtRegs() { + return impl->jit_state.ExtReg; +} +const std::array& Context::ExtRegs() const { + return impl->jit_state.ExtReg; +} + +/// View and modify CPSR. +std::uint32_t Context::Cpsr() const { + return impl->jit_state.Cpsr(); +} +void Context::SetCpsr(std::uint32_t value) { + impl->jit_state.SetCpsr(value); +} + +/// View and modify FPSCR. +std::uint32_t Context::Fpscr() const { + return impl->jit_state.Fpscr(); +} +void Context::SetFpscr(std::uint32_t value) { + return impl->jit_state.SetFpscr(value); +} + +void TransferJitState(JitState& dest, const JitState& src, bool reset_rsb) { + dest.CPSR_ge = src.CPSR_ge; + dest.CPSR_et = src.CPSR_et; + dest.CPSR_q = src.CPSR_q; + dest.CPSR_nzcv = src.CPSR_nzcv; + dest.CPSR_jaifm = src.CPSR_jaifm; + dest.Reg = src.Reg; + dest.ExtReg = src.ExtReg; + dest.guest_MXCSR = src.guest_MXCSR; + dest.FPSCR_IDC = src.FPSCR_IDC; + dest.FPSCR_UFC = src.FPSCR_UFC; + dest.FPSCR_mode = src.FPSCR_mode; + dest.FPSCR_nzcv = src.FPSCR_nzcv; + if (reset_rsb) { + dest.ResetRSB(); + } else { + dest.rsb_ptr = src.rsb_ptr; + dest.rsb_location_descriptors = src.rsb_location_descriptors; + dest.rsb_codeptrs = src.rsb_codeptrs; + } +} + +void Jit::SaveContext(Context& ctx) const { + TransferJitState(ctx.impl->jit_state, impl->jit_state, false); + ctx.impl->invalid_cache_generation = impl->invalid_cache_generation; +} + +void Jit::LoadContext(const Context& ctx) { + bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation; + TransferJitState(impl->jit_state, ctx.impl->jit_state, reset_rsb); +} + std::string Jit::Disassemble(const IR::LocationDescriptor& descriptor) { return impl->Disassemble(descriptor); } diff --git a/src/frontend/translate/translate_thumb.cpp b/src/frontend/translate/translate_thumb.cpp index 6a173274..1062d436 100644 --- a/src/frontend/translate/translate_thumb.cpp +++ b/src/frontend/translate/translate_thumb.cpp @@ -682,7 +682,7 @@ struct ThumbTranslatorVisitor final { ir.LoadWritePC(data); address = ir.Add(address, ir.Imm32(4)); ir.SetRegister(Reg::SP, address); - ir.SetTerm(IR::Term::ReturnToDispatch{}); + ir.SetTerm(IR::Term::PopRSBHint{}); return false; } else { ir.SetRegister(Reg::SP, address); From dd07033dce2630680ea9f6aa5e63d57469071bc6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 12 Dec 2017 14:19:48 +0000 Subject: [PATCH 8/9] emit_x64: Optimize code emitted by EmitGetCpsr --- src/backend_x64/emit_x64.cpp | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index dc111a23..b8324e44 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -196,9 +196,35 @@ static u32 GetCpsrImpl(JitState* jit_state) { } void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst); - code->mov(code->ABI_PARAM1, code->r15); - code->CallFunction(&GetCpsrImpl); + if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 c = reg_alloc.ScratchGpr().cvt32(); + + code->mov(c, dword[r15 + offsetof(JitState, CPSR_ge)]); + // Here we observe that CPSR_q and CPSR_nzcv are right next to each other in memory, + // so we load them both at the same time with one 64-bit read. This allows us to + // extract all of their bits together at once with one pext. + code->mov(result.cvt64(), qword[r15 + offsetof(JitState, CPSR_q)]); + code->mov(b.cvt64(), 0xF000000000000001ull); + code->pext(result.cvt64(), result.cvt64(), b.cvt64()); + code->mov(b, 0x80808080); + code->pext(c.cvt64(), c.cvt64(), b.cvt64()); + code->shl(result, 27); + code->shl(c, 16); + code->or_(result, c); + code->mov(b, 0x00000220); + code->mov(c, dword[r15 + offsetof(JitState, CPSR_et)]); + code->pdep(c.cvt64(), c.cvt64(), b.cvt64()); + code->or_(result, dword[r15 + offsetof(JitState, CPSR_jaifm)]); + code->or_(result, c); + + reg_alloc.DefineValue(inst, result); + } else { + reg_alloc.HostCall(inst); + code->mov(code->ABI_PARAM1, code->r15); + code->CallFunction(&GetCpsrImpl); + } } static void SetCpsrImpl(u32 value, JitState* jit_state) { From d1d4705364031512cb89333aebc00b8d75a2f732 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 12 Dec 2017 14:18:21 +0000 Subject: [PATCH 9/9] Add re-entry prediction to avoid std::unordered_map lookups --- src/backend_x64/block_of_code.cpp | 17 ++++++++++++++++- src/backend_x64/block_of_code.h | 2 ++ src/backend_x64/jitstate.cpp | 4 ++++ src/backend_x64/jitstate.h | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/backend_x64/block_of_code.cpp b/src/backend_x64/block_of_code.cpp index 27a34f51..a1a7cdd6 100644 --- a/src/backend_x64/block_of_code.cpp +++ b/src/backend_x64/block_of_code.cpp @@ -82,7 +82,14 @@ void BlockOfCode::RunCode(JitState* jit_state, size_t cycles_to_run) const { jit_state->cycles_to_run = cycles_to_run; jit_state->cycles_remaining = cycles_to_run; - run_code(jit_state); + + u32 new_rsb_ptr = (jit_state->rsb_ptr - 1) & JitState::RSBPtrMask; + if (jit_state->GetUniqueHash() == jit_state->rsb_location_descriptors[new_rsb_ptr]) { + jit_state->rsb_ptr = new_rsb_ptr; + run_code_from(jit_state, jit_state->rsb_codeptrs[new_rsb_ptr]); + } else { + run_code(jit_state); + } } void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) { @@ -102,6 +109,14 @@ void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) { void BlockOfCode::GenRunCode() { Xbyak::Label loop, enter_mxcsr_then_loop; + align(); + run_code_from = getCurr(); + + ABI_PushCalleeSaveRegistersAndAdjustStack(this); + mov(r15, ABI_PARAM1); + SwitchMxcsrOnEntry(); + jmp(ABI_PARAM2); + align(); run_code = getCurr(); diff --git a/src/backend_x64/block_of_code.h b/src/backend_x64/block_of_code.h index d8cbe7ef..9df949ef 100644 --- a/src/backend_x64/block_of_code.h +++ b/src/backend_x64/block_of_code.h @@ -138,7 +138,9 @@ private: CodePtr far_code_ptr; using RunCodeFuncType = void(*)(JitState*); + using RunCodeFromFuncType = void(*)(JitState*, u64); RunCodeFuncType run_code = nullptr; + RunCodeFromFuncType run_code_from = nullptr; static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0; static constexpr size_t FORCE_RETURN = 1 << 1; std::array return_from_run_code; diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 7101c5ce..b4b76223 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -199,5 +199,9 @@ void JitState::SetFpscr(u32 FPSCR) { } } +u64 JitState::GetUniqueHash() const { + return CPSR_et | FPSCR_mode | (static_cast(Reg[15]) << 32); +} + } // namespace BackendX64 } // namespace Dynarmic diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 2916c26d..ea968432 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -67,6 +67,8 @@ struct JitState { u32 old_FPSCR = 0; u32 Fpscr() const; void SetFpscr(u32 FPSCR); + + u64 GetUniqueHash() const; }; #ifdef _MSC_VER