From 1ee3f3d9e6baa2ab5d214d942cbdbdacb35eede2 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 27 Jul 2018 12:42:10 +0100 Subject: [PATCH] Implement perfmap --- src/CMakeLists.txt | 2 + src/backend/x64/a32_emit_x64.cpp | 29 +++++++--- src/backend/x64/a32_emit_x64.h | 3 ++ src/backend/x64/a64_emit_x64.cpp | 25 ++++++--- src/backend/x64/a64_emit_x64.h | 3 ++ src/backend/x64/block_of_code.cpp | 3 ++ src/backend/x64/emit_x64.cpp | 12 +++++ src/backend/x64/emit_x64.h | 3 ++ src/backend/x64/perf_map.cpp | 89 +++++++++++++++++++++++++++++++ src/backend/x64/perf_map.h | 27 ++++++++++ 10 files changed, 184 insertions(+), 12 deletions(-) create mode 100644 src/backend/x64/perf_map.cpp create mode 100644 src/backend/x64/perf_map.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c3bf2e8d..be0c4a27 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -244,6 +244,8 @@ if (ARCHITECTURE_x86_64) backend/x64/hostloc.h backend/x64/jitstate_info.h backend/x64/oparg.h + backend/x64/perf_map.cpp + backend/x64/perf_map.h backend/x64/reg_alloc.cpp backend/x64/reg_alloc.h ) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 1d041312..aaadde84 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -18,6 +19,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/devirtualize.h" #include "backend/x64/emit_x64.h" +#include "backend/x64/perf_map.h" #include "common/address_range.h" #include "common/assert.h" #include "common/bit_util.h" @@ -132,17 +134,15 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { EmitX64::EmitTerminal(block.GetTerminal(), block.Location()); code.int3(); - const A32::LocationDescriptor descriptor{block.Location()}; - Patch(descriptor, entrypoint); - const size_t size = static_cast(code.getCurr() - entrypoint); + + const A32::LocationDescriptor descriptor{block.Location()}; const A32::LocationDescriptor end_location{block.EndLocation()}; + const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); - A32EmitX64::BlockDescriptor block_desc{entrypoint, size}; - block_descriptors.emplace(descriptor.UniqueHash(), block_desc); block_ranges.AddRange(range, descriptor); - return block_desc; + return RegisterBlock(descriptor, entrypoint, size); } void A32EmitX64::ClearCache() { @@ -161,6 +161,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryRead8>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(read_memory_8, code.getCurr(), "a32_read_memory_8"); code.align(); read_memory_16 = code.getCurr(); @@ -168,6 +169,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryRead16>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(read_memory_16, code.getCurr(), "a32_read_memory_16"); code.align(); read_memory_32 = code.getCurr(); @@ -175,6 +177,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryRead32>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(read_memory_32, code.getCurr(), "a32_read_memory_32"); code.align(); read_memory_64 = code.getCurr(); @@ -182,6 +185,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryRead64>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(read_memory_64, code.getCurr(), "a32_read_memory_64"); code.align(); write_memory_8 = code.getCurr(); @@ -189,6 +193,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryWrite8>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(write_memory_8, code.getCurr(), "a32_write_memory_8"); code.align(); write_memory_16 = code.getCurr(); @@ -196,6 +201,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryWrite16>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(write_memory_16, code.getCurr(), "a32_write_memory_16"); code.align(); write_memory_32 = code.getCurr(); @@ -203,6 +209,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryWrite32>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(write_memory_32, code.getCurr(), "a32_write_memory_32"); code.align(); write_memory_64 = code.getCurr(); @@ -210,6 +217,7 @@ void A32EmitX64::GenMemoryAccessors() { Devirtualize<&A32::UserCallbacks::MemoryWrite64>(config.callbacks).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN); code.ret(); + PerfMapRegister(write_memory_64, code.getCurr(), "a32_write_memory_64"); } void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { @@ -1141,6 +1149,15 @@ void A32EmitX64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) { CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); } +std::string A32EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const { + const A32::LocationDescriptor descriptor{ir_descriptor}; + return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}", + descriptor.TFlag() ? "t" : "a", + descriptor.PC(), + descriptor.EFlag() ? "be" : "le", + descriptor.FPSCR().Value()); +} + void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) { ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented"); ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented"); diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index 96772f95..c559fecb 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -68,6 +68,9 @@ protected: #undef A32OPC #undef A64OPC + // Helpers + std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override; + // Terminal instruction emitters void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override; void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override; diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index bc711e9d..12610147 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include "backend/x64/a64_emit_x64.h" @@ -15,6 +16,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/devirtualize.h" #include "backend/x64/emit_x64.h" +#include "backend/x64/perf_map.h" #include "common/address_range.h" #include "common/assert.h" #include "common/bit_util.h" @@ -118,17 +120,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { EmitX64::EmitTerminal(block.GetTerminal(), block.Location()); code.int3(); - const A64::LocationDescriptor descriptor{block.Location()}; - Patch(descriptor, entrypoint); - const size_t size = static_cast(code.getCurr() - entrypoint); + + const A64::LocationDescriptor descriptor{block.Location()}; const A64::LocationDescriptor end_location{block.EndLocation()}; + const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); - A64EmitX64::BlockDescriptor block_desc{entrypoint, size}; - block_descriptors.emplace(descriptor.UniqueHash(), block_desc); block_ranges.AddRange(range, descriptor); - return block_desc; + return RegisterBlock(descriptor, entrypoint, size); } void A64EmitX64::ClearCache() { @@ -166,6 +166,7 @@ void A64EmitX64::GenMemory128Accessors() { code.add(rsp, 8); #endif code.ret(); + PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128"); code.align(); memory_write_128 = code.getCurr(); @@ -189,6 +190,7 @@ void A64EmitX64::GenMemory128Accessors() { code.add(rsp, 8); #endif code.ret(); + PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_write_128"); } void A64EmitX64::GenFastmemFallbacks() { @@ -224,6 +226,7 @@ void A64EmitX64::GenFastmemFallbacks() { } ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); code.ret(); + PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128"); code.align(); write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); @@ -237,6 +240,7 @@ void A64EmitX64::GenFastmemFallbacks() { code.call(memory_write_128); ABI_PopCallerSaveRegistersAndAdjustStack(code); code.ret(); + PerfMapRegister(write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_write_fallback_128"); if (value_idx == 4 || value_idx == 15) { continue; @@ -255,6 +259,7 @@ void A64EmitX64::GenFastmemFallbacks() { } ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); code.ret(); + PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_read_fallback_{}", bitsize)); } for (auto& [bitsize, callback] : write_callbacks) { @@ -279,6 +284,7 @@ void A64EmitX64::GenFastmemFallbacks() { callback.EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); code.ret(); + PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_write_fallback_{}", bitsize)); } } } @@ -999,6 +1005,13 @@ void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* i EmitExclusiveWrite(ctx, inst, 128); } +std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const { + const A64::LocationDescriptor descriptor{ir_descriptor}; + return fmt::format("a64_{:016X}_fpcr{:08X}", + descriptor.PC(), + descriptor.FPCR().Value()); +} + void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { code.SwitchMxcsrOnExit(); Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index 1ceb2bd6..5b9a6178 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -74,6 +74,9 @@ protected: #undef A32OPC #undef A64OPC + // Helpers + std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override; + // Terminal instruction emitters void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override; void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override; diff --git a/src/backend/x64/block_of_code.cpp b/src/backend/x64/block_of_code.cpp index bb78beeb..b49d9392 100644 --- a/src/backend/x64/block_of_code.cpp +++ b/src/backend/x64/block_of_code.cpp @@ -13,6 +13,7 @@ #include "backend/x64/a32_jitstate.h" #include "backend/x64/abi.h" #include "backend/x64/block_of_code.h" +#include "backend/x64/perf_map.h" #include "common/assert.h" #ifdef _WIN32 @@ -223,6 +224,8 @@ void BlockOfCode::GenRunCode() { align(); return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr(); emit_return_from_run_code(true, true); + + PerfMapRegister(run_code_from, getCurr(), "dynarmic_dispatcher"); } void BlockOfCode::SwitchMxcsrOnEntry() { diff --git a/src/backend/x64/emit_x64.cpp b/src/backend/x64/emit_x64.cpp index 41f3316a..e25258c2 100644 --- a/src/backend/x64/emit_x64.cpp +++ b/src/backend/x64/emit_x64.cpp @@ -9,6 +9,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/emit_x64.h" +#include "backend/x64/perf_map.h" #include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" @@ -286,6 +287,15 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) { code.L(pass); } +EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) { + PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor)); + Patch(descriptor, entrypoint); + + BlockDescriptor block_desc{entrypoint, size}; + block_descriptors.emplace(descriptor.Value(), block_desc); + return block_desc; +} + void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) { Common::VisitVariant(terminal, [this, &initial_location](auto x) { using T = std::decay_t; @@ -326,6 +336,8 @@ void EmitX64::Unpatch(const IR::LocationDescriptor& desc) { void EmitX64::ClearCache() { block_descriptors.clear(); patch_information.clear(); + + PerfMapClear(); } void EmitX64::InvalidateBasicBlocks(const std::unordered_set& locations) { diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index 80b89f56..c0d329c4 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -85,9 +86,11 @@ protected: #undef A64OPC // Helpers + virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0; void EmitAddCycles(size_t cycles); Xbyak::Label EmitCond(IR::Cond cond); void EmitCondPrelude(const IR::Block& block); + BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size); void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target); // Terminal instruction emitters diff --git a/src/backend/x64/perf_map.cpp b/src/backend/x64/perf_map.cpp new file mode 100644 index 00000000..beaf8b83 --- /dev/null +++ b/src/backend/x64/perf_map.cpp @@ -0,0 +1,89 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include + +#ifdef __linux__ + +#include +#include +#include +#include +#include + +#include + +#include "common/common_types.h" + +namespace Dynarmic::BackendX64 { + +namespace { +std::mutex mutex; +std::FILE* file = nullptr; + +void OpenFile() { + const char* perf_dir = std::getenv("PERF_BUILDID_DIR"); + if (!perf_dir) { + file = nullptr; + return; + } + + const pid_t pid = getpid(); + const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid); + + file = std::fopen(filename.c_str(), "w"); + if (!file) { + return; + } + + std::setvbuf(file, nullptr, _IONBF, 0); +} +} // anonymous namespace + +namespace detail { +void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) { + std::lock_guard guard{mutex}; + + if (!file) { + OpenFile(); + if (!file) { + return; + } + } + + const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast(start), reinterpret_cast(end) - reinterpret_cast(start), friendly_name); + std::fwrite(line.data(), sizeof *line.data(), line.size(), file); +} +} // namespace detail + +void PerfMapClear() { + std::lock_guard guard{mutex}; + + if (!file) { + return; + } + + std::fclose(file); + file = nullptr; + OpenFile(); +} + +} // namespace Dynarmic::BackendX64 + +#else + +namespace Dynarmic::BackendX64 { + +namespace detail { +void PerfMapRegister(const void*, const void*, const std::string&) {} +} // namespace detail + +void PerfMapClear() {} + +} // namespace Dynarmic::BackendX64 + +#endif diff --git a/src/backend/x64/perf_map.h b/src/backend/x64/perf_map.h new file mode 100644 index 00000000..80f933fb --- /dev/null +++ b/src/backend/x64/perf_map.h @@ -0,0 +1,27 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include + +#include "common/cast_util.h" + +namespace Dynarmic::BackendX64 { + +namespace detail { +void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name); +} // namespace detail + +template +void PerfMapRegister(T start, const void* end, const std::string& friendly_name) { + detail::PerfMapRegister(Common::BitCast(start), end, friendly_name); +} + +void PerfMapClear(); + +} // namespace Dynarmic::BackendX64