From f627440bb354f7cf3f5f9dd2392f205052d34c64 Mon Sep 17 00:00:00 2001 From: SachinVin Date: Sun, 31 Jan 2021 22:08:51 +0530 Subject: [PATCH] disk_shader : non separable --- src/android/app/src/main/jni/config.cpp | 2 + src/android/app/src/main/jni/native.cpp | 16 +++ .../renderer_opengl/gl_shader_disk_cache.cpp | 111 +++++++++++++++--- .../renderer_opengl/gl_shader_disk_cache.h | 21 +++- .../renderer_opengl/gl_shader_manager.cpp | 95 +++++++++++++-- .../renderer_opengl/gl_shader_util.cpp | 1 + 6 files changed, 213 insertions(+), 33 deletions(-) diff --git a/src/android/app/src/main/jni/config.cpp b/src/android/app/src/main/jni/config.cpp index aa9966c13..11fd3491e 100644 --- a/src/android/app/src/main/jni/config.cpp +++ b/src/android/app/src/main/jni/config.cpp @@ -123,6 +123,8 @@ void Config::ReadValues() { Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); Settings::values.resolution_factor = static_cast(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); + Settings::values.use_disk_shader_cache = + sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", true); Settings::values.use_vsync_new = sdl2_config->GetBoolean("Renderer", "use_vsync_new", true); // Work around to map Android setting for enabling the frame limiter to the format Citra expects diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index b87d2ac7d..c9401620f 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -190,6 +190,22 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { is_running = true; pause_emulation = false; + std::unique_ptr cpu_context{window->CreateSharedContext()}; + if (Settings::values.use_asynchronous_gpu_emulation) { + cpu_context->MakeCurrent(); + } + + system.Renderer().Rasterizer()->LoadDiskResources( + !is_running, [](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { + if(value%10 == 0 || value + 1 == total){ + LOG_INFO(Frontend, "Shader cache Stage {}: {}/{}", stage, value + 1, total); + } + }); + + if (Settings::values.use_asynchronous_gpu_emulation) { + cpu_context->DoneCurrent(); + } + SCOPE_EXIT({ TryShutdown(); }); // Audio stretching on Android is only useful with lower framerates, disable it when fullspeed diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 58730d6ee..7cc11223b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -106,7 +106,7 @@ ShaderDiskCache::ShaderDiskCache(bool separable) : separable{separable} {} std::optional> ShaderDiskCache::LoadTransferable() { const bool has_title_id = GetProgramID() != 0; - if (!Settings::values.use_hw_shader || !Settings::values.shaders_accurate_mul || + if (!Settings::values.use_hw_shader || !Settings::values.use_disk_shader_cache || !has_title_id) { return std::nullopt; } @@ -171,7 +171,7 @@ std::optional> ShaderDiskCache::LoadTransferable } std::pair, ShaderDumpsMap> -ShaderDiskCache::LoadPrecompiled() { +ShaderDiskCache::LoadPrecompiled(bool compressed) { if (!IsUsable()) return {}; @@ -182,7 +182,7 @@ ShaderDiskCache::LoadPrecompiled() { return {}; } - const auto result = LoadPrecompiledFile(file); + const auto result = LoadPrecompiledFile(file, compressed); if (!result) { LOG_INFO(Render_OpenGL, "Failed to load precompiled cache for game with title id={} - removing", @@ -195,12 +195,17 @@ ShaderDiskCache::LoadPrecompiled() { } std::optional, ShaderDumpsMap>> -ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file) { +ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file, bool compressed) { // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - file.ReadBytes(compressed.data(), compressed.size()); - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); + std::vector precompiled_file(file.GetSize()); + file.ReadBytes(precompiled_file.data(), precompiled_file.size()); + if (compressed) { + const std::vector decompressed = Common::Compression::DecompressDataZSTD(precompiled_file); + SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); + } else { + SaveArrayToPrecompiled(precompiled_file.data(), precompiled_file.size()); + } + decompressed_precompiled_cache_offset = 0; ShaderCacheVersionHash file_hash{}; @@ -293,9 +298,26 @@ std::optional ShaderDiskCache::LoadDecompiledEntry() return entry; } -bool ShaderDiskCache::SaveDecompiledFile(u64 unique_identifier, - const ShaderDecompiler::ProgramResult& result, - bool sanitize_mul) { +void ShaderDiskCache::SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier, + const ShaderDecompiler::ProgramResult& result, + bool sanitize_mul) { + if (!IsUsable()) + return; + + if (file.WriteObject(static_cast(PrecompiledEntryKind::Decompiled)) != 1 || + file.WriteObject(unique_identifier) != 1 || + file.WriteObject(sanitize_mul) != 1 || + file.WriteObject(static_cast(result.code.size())) != 1 || + file.WriteArray(result.code.data(), result.code.size()) != result.code.size()) { + LOG_ERROR(Render_OpenGL, "Failed to save decompiled cache entry - removing"); + file.Close(); + InvalidatePrecompiled(); + } +} + +bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier, + const ShaderDecompiler::ProgramResult& result, + bool sanitize_mul) { if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Decompiled)) || !SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) || !SaveObjectToPrecompiled(static_cast(result.code.size())) || @@ -315,7 +337,7 @@ void ShaderDiskCache::InvalidateAll() { } void ShaderDiskCache::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file + // Clear virtual precompiled cache file decompressed_precompiled_cache.resize(0); if (!FileUtil::Delete(GetPrecompiledPath())) { @@ -351,11 +373,11 @@ void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, if (!IsUsable()) return; - if (decompressed_precompiled_cache.size() == 0) { + if (decompressed_precompiled_cache.empty()) { SavePrecompiledHeaderToVirtualPrecompiledCache(); } - if (!SaveDecompiledFile(unique_identifier, code, sanitize_mul)) { + if (!SaveDecompiledToCache(unique_identifier, code, sanitize_mul)) { LOG_ERROR(Render_OpenGL, "Failed to save decompiled entry to the precompiled file - removing"); InvalidatePrecompiled(); @@ -373,6 +395,9 @@ void ShaderDiskCache::SaveDump(u64 unique_identifier, GLuint program) { GLint binary_length{}; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); + if (!binary_length) + return; + GLenum binary_format{}; std::vector binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); @@ -389,6 +414,40 @@ void ShaderDiskCache::SaveDump(u64 unique_identifier, GLuint program) { } } +void ShaderDiskCache::SaveDumpToFile(u64 unique_identifier, GLuint program, bool sanitize_mul) { + if (!IsUsable()) + return; + + FileUtil::IOFile file = AppendPrecompiledFile(); + if (!file.IsOpen()) + return; + + GLint binary_length{}; + glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); + + if (!binary_length) + return; + + GLenum binary_format{}; + std::vector binary(binary_length); + glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); + + if (file.WriteObject(static_cast(PrecompiledEntryKind::Dump)) != 1 || + file.WriteObject(unique_identifier) != 1 || + file.WriteObject(static_cast(binary_format)) != 1 || + file.WriteObject(static_cast(binary_length)) != 1|| + file.WriteArray(binary.data(), binary.size()) != binary.size()) { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + unique_identifier); + InvalidatePrecompiled(); + return; + } + + // SaveDecompiled is used only to store the accurate multiplication setting, a better way is to + // probably change the header in SaveDump + SaveDecompiledToFile(file, unique_identifier, {}, sanitize_mul); +} + bool ShaderDiskCache::IsUsable() const { return tried_to_load && Settings::values.use_disk_shader_cache; } @@ -416,6 +475,30 @@ FileUtil::IOFile ShaderDiskCache::AppendTransferableFile() { return file; } +FileUtil::IOFile ShaderDiskCache::AppendPrecompiledFile() { + if (!EnsureDirectories()) + return {}; + + const auto precompiled_path{GetPrecompiledPath()}; + const bool existed = FileUtil::Exists(precompiled_path); + + FileUtil::IOFile file(precompiled_path, "ab"); + if (!file.IsOpen()) { + LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); + return {}; + } + if (!existed || file.GetSize() == 0) { + // If the file didn't exist, write its version + const auto hash{GetShaderCacheVersionHash()}; + if (file.WriteArray(hash.data(), hash.size()) != hash.size()) { + LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", + precompiled_path); + return {}; + } + } + return file; +} + void ShaderDiskCache::SavePrecompiledHeaderToVirtualPrecompiledCache() { const auto hash{GetShaderCacheVersionHash()}; if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index a3743d1a0..1f7ce96b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -97,7 +97,7 @@ public: std::optional> LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::pair LoadPrecompiled(); + std::pair LoadPrecompiled(bool compressed); /// Removes the transferable (and precompiled) cache file. void InvalidateAll(); @@ -115,21 +115,28 @@ public: /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(u64 unique_identifier, GLuint program); + /// Saves a dump entry to the precompiled file. Does not check for collisions. + void SaveDumpToFile(u64 unique_identifier, GLuint program, bool sanitize_mul); + /// Serializes virtual precompiled shader cache file to real file void SaveVirtualPrecompiledFile(); private: /// Loads the transferable cache. Returns empty on failure. std::optional> LoadPrecompiledFile( - FileUtil::IOFile& file); + FileUtil::IOFile& file, bool compressed); /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on /// failure. std::optional LoadDecompiledEntry(); - /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, - bool sanitize_mul); + /// Saves a decompiled entry to the passed file. Does not check for collisions. + void SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, + bool sanitize_mul); + + /// Saves a decompiled entry to the virtual precompiled cache. Does not check for collisions. + bool SaveDecompiledToCache(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, + bool sanitize_mul); /// Returns if the cache can be used bool IsUsable() const; @@ -197,7 +204,7 @@ private: return LoadArrayFromPrecompiled(&object, 1); } - // Stores whole precompiled cache which will be read from or saved to the precompiled chache + // Stores whole precompiled cache which will be read from or saved to the precompiled cache // file std::vector decompressed_precompiled_cache; // Stores the current offset of the precompiled cache file for IO purposes @@ -213,6 +220,8 @@ private: u64 program_id{}; std::string title_id; + + FileUtil::IOFile AppendPrecompiledFile(); }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8d554ffc1..acd392032 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -26,7 +26,7 @@ static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) } static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::set& supported_formats) { + const std::set& supported_formats, bool separable) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); @@ -35,7 +35,9 @@ static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, auto shader = OGLProgram(); shader.handle = glCreateProgram(); - glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + if (separable) { + glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + } glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(), static_cast(dump.binary.size())); @@ -323,6 +325,10 @@ public: GLuint gs = 0; GLuint fs = 0; + std::size_t vs_hash = 0; + std::size_t gs_hash = 0; + std::size_t fs_hash = 0; + bool operator==(const ShaderTuple& rhs) const { return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); } @@ -331,6 +337,14 @@ public: return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); } + std::size_t GetConfigHash() const { + std::size_t hash = 0; + boost::hash_combine(hash, vs_hash); + boost::hash_combine(hash, gs_hash); + boost::hash_combine(hash, fs_hash); + return hash; + } + struct Hash { std::size_t operator()(const ShaderTuple& tuple) const { std::size_t hash = 0; @@ -354,6 +368,7 @@ public: FragmentShaders fragment_shaders; std::unordered_map program_cache; + std::unordered_map disk_program_cache; OGLPipeline pipeline; ShaderDiskCache disk_cache; }; @@ -370,6 +385,8 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, if (handle == 0) return false; impl->current.vs = handle; + impl->current.vs_hash = config.Hash(); + // Save VS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; @@ -386,22 +403,26 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, void ShaderProgramManager::UseTrivialVertexShader() { impl->current.vs = impl->trivial_vertex_shader.Get(); + impl->current.vs_hash = 0; } void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { PicaFixedGSConfig gs_config(regs); auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); impl->current.gs = handle; + impl->current.gs_hash = gs_config.Hash(); } void ShaderProgramManager::UseTrivialGeometryShader() { impl->current.gs = 0; + impl->current.gs_hash = 0; } void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) { PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); auto [handle, result] = impl->fragment_shaders.Get(config); impl->current.fs = handle; + impl->current.fs_hash = config.Hash(); // Save FS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; @@ -431,7 +452,16 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) { } else { OGLProgram& cached_program = impl->program_cache[impl->current]; if (cached_program.handle == 0) { - cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + u64 unique_identifier = impl->current.GetConfigHash(); + OGLProgram& disk_program = (impl->disk_program_cache[unique_identifier]); + if (disk_program.handle != 0) { + cached_program = std::move(disk_program); + impl->disk_program_cache.erase(unique_identifier); + } else { + cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + auto& disk_cache = impl->disk_cache; + disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle, VideoCore::g_hw_shader_accurate_mul); + } SetShaderUniformBlockBindings(cached_program.handle); SetShaderSamplerBindings(cached_program.handle); } @@ -444,7 +474,7 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, if (!impl->separable) { LOG_ERROR(Render_OpenGL, "Cannot load disk cache as separate shader programs are unsupported!"); - return; +// return; } auto& disk_cache = impl->disk_cache; const auto transferable = disk_cache.LoadTransferable(); @@ -453,7 +483,9 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, } const auto& raws = *transferable; - auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); + // Load uncompressed precompiled file for non-separable shaders. + // Precompiled file for separable shaders is compressed. + auto [decompiled, dumps] = disk_cache.LoadPrecompiled(impl->separable); if (stop_loading) { return; @@ -499,13 +531,14 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, OGLProgram shader; if (dump != dump_map.end() && decomp != decompiled_map.end()) { - // Only load this shader if its sanitize_mul setting matches - if (decomp->second.sanitize_mul == VideoCore::g_hw_shader_accurate_mul) { + // Only load the vertex shader if its sanitize_mul setting matches + if (raw.GetProgramType() == ProgramType::VS && + decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { continue; } // If the shader is dumped, attempt to load it - shader = GeneratePrecompiledProgram(dump->second, supported_formats); + shader = GeneratePrecompiledProgram(dump->second, supported_formats, impl->separable); if (shader.handle == 0) { // If any shader failed, stop trying to compile, delete the cache, and start // loading from raws @@ -527,7 +560,7 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, } else { // Unsupported shader type got stored somehow so nuke the cache - LOG_CRITICAL(Frontend, "failed to load raw programtype {}", + LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType()); compilation_failed = true; return; @@ -543,10 +576,46 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, } }; - LoadPrecompiledWorker(0, raws.size(), raws, decompiled, dumps); + const auto LoadPrecompiledProgram = + [&](const ShaderDecompiledMap& decompiled_map, const ShaderDumpsMap& dump_map) { + std::size_t i{0}; + for (const auto &dump : dump_map) { + if (stop_loading) { + break; + } + const u64 unique_identifier{dump.first}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + // Only load the program if its sanitize_mul setting matches + if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader program is dumped, attempt to load it + OGLProgram shader = + GeneratePrecompiledProgram(dump.second, supported_formats, impl->separable); + if (shader.handle != 0) { + impl->disk_program_cache.insert({unique_identifier, std::move(shader)}); + } else { + LOG_ERROR(Frontend, "Failed to link Precompiled program!"); + compilation_failed = true; + break; + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, ++i, dump_map.size()); + } + } + }; + + if (impl->separable) { + LoadPrecompiledWorker(0, raws.size(), raws, decompiled, dumps); + } else { + LoadPrecompiledProgram(decompiled, dumps); + } if (compilation_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected + impl->disk_program_cache.clear(); disk_cache.InvalidatePrecompiled(); dumps.clear(); precompiled_cache_altered = true; @@ -599,8 +668,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, compilation_failed = true; return; } - // If this is a new shader, add it the precompiled cache - if (result) { + // If this is a new separable shader, add it the precompiled cache + if (impl->separable && result) { disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); disk_cache.SaveDump(unique_identifier, handle); precompiled_cache_altered = true; @@ -621,6 +690,6 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, if (precompiled_cache_altered) { disk_cache.SaveVirtualPrecompiledFile(); } -} // namespace OpenGL +} } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a51ad443b..036cd49a3 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -84,6 +84,7 @@ GLuint LoadProgram(bool separable_program, const std::vector& shaders) { glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); } + glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); glLinkProgram(program_id); // Check the program