From dc27252352a27f466c65e9008b3d56a2540ce224 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 22 May 2020 20:53:27 -0300
Subject: [PATCH 1/4] shader_cache: Implement a generic shader cache

Implement a generic shader cache for fast lookups and invalidations.
Invalidations are cheap but expensive when a shader is invalidated.

Use two mutexes instead of one to avoid locking invalidations for
lookups and vice versa. When a shader has to be removed, lookups are
locked as expected.
---
 src/video_core/CMakeLists.txt |   1 +
 src/video_core/shader_cache.h | 228 ++++++++++++++++++++++++++++++++++
 2 files changed, 229 insertions(+)
 create mode 100644 src/video_core/shader_cache.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2bf8d68ce0..cf1885f92b 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -93,6 +93,7 @@ add_library(video_core STATIC
     renderer_opengl/utils.h
     sampler_cache.cpp
     sampler_cache.h
+    shader_cache.h
     shader/decode/arithmetic.cpp
     shader/decode/arithmetic_immediate.cpp
     shader/decode/bfe.cpp
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 0000000000..a23c238868
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class T>
+class ShaderCache {
+    static constexpr u64 PAGE_SHIFT = 14;
+
+    struct Entry {
+        VAddr addr_start;
+        VAddr addr_end;
+        T* data;
+
+        bool is_memory_marked = true;
+
+        constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+            return start < addr_end && addr_start < end;
+        }
+    };
+
+public:
+    virtual ~ShaderCache() = default;
+
+    /// @brief Removes shaders inside a given region
+    /// @note Checks for ranges
+    /// @param addr Start address of the invalidation
+    /// @param size Number of bytes of the invalidation
+    void InvalidateRegion(VAddr addr, std::size_t size) {
+        std::scoped_lock lock{invalidation_mutex};
+        InvalidatePagesInRegion(addr, size);
+        RemovePendingShaders();
+    }
+
+    /// @brief Unmarks a memory region as cached and marks it for removal
+    /// @param addr Start address of the CPU write operation
+    /// @param size Number of bytes of the CPU write operation
+    void OnCPUWrite(VAddr addr, std::size_t size) {
+        std::lock_guard lock{invalidation_mutex};
+        InvalidatePagesInRegion(addr, size);
+    }
+
+    /// @brief Flushes delayed removal operations
+    void SyncGuestHost() {
+        std::scoped_lock lock{invalidation_mutex};
+        RemovePendingShaders();
+    }
+
+    /// @brief Tries to obtain a cached shader starting in a given address
+    /// @note Doesn't check for ranges, the given address has to be the start of the shader
+    /// @param addr Start address of the shader, this doesn't cache for region
+    /// @return Pointer to a valid shader, nullptr when nothing is found
+    T* TryGet(VAddr addr) const {
+        std::scoped_lock lock{lookup_mutex};
+
+        const auto it = lookup_cache.find(addr);
+        if (it == lookup_cache.end()) {
+            return nullptr;
+        }
+        return it->second->data;
+    }
+
+protected:
+    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+
+    /// @brief Register in the cache a given entry
+    /// @param data Shader to store in the cache
+    /// @param addr Start address of the shader that will be registered
+    /// @param size Size in bytes of the shader
+    void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
+        std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+        const VAddr addr_end = addr + size;
+        Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+            invalidation_cache[page].push_back(entry);
+        }
+
+        storage.push_back(std::move(data));
+
+        rasterizer.UpdatePagesCachedCount(addr, size, 1);
+    }
+
+    /// @brief Called when a shader is going to be removed
+    /// @param shader Shader that will be removed
+    /// @pre invalidation_cache is locked
+    /// @pre lookup_mutex is locked
+    virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+
+private:
+    /// @brief Invalidate pages in a given region
+    /// @pre invalidation_mutex is locked
+    void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
+        const VAddr addr_end = addr + size;
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+            const auto it = invalidation_cache.find(page);
+            if (it == invalidation_cache.end()) {
+                continue;
+            }
+
+            std::vector<Entry*>& entries = it->second;
+            InvalidatePageEntries(entries, addr, addr_end);
+
+            // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
+            if (entries.empty()) {
+                invalidation_cache.erase(it);
+            }
+        }
+    }
+
+    /// @brief Remove shaders marked for deletion
+    /// @pre invalidation_mutex is locked
+    void RemovePendingShaders() {
+        if (marked_for_removal.empty()) {
+            return;
+        }
+        std::scoped_lock lock{lookup_mutex};
+
+        std::vector<T*> removed_shaders;
+        removed_shaders.reserve(marked_for_removal.size());
+
+        for (Entry* const entry : marked_for_removal) {
+            if (lookup_cache.erase(entry->addr_start) > 0) {
+                removed_shaders.push_back(entry->data);
+            }
+        }
+        marked_for_removal.clear();
+
+        if (!removed_shaders.empty()) {
+            RemoveShadersFromStorage(std::move(removed_shaders));
+        }
+    }
+
+    /// @brief Invalidates entries in a given range for the passed page
+    /// @param entries         Vector of entries in the page, it will be modified on overlaps
+    /// @param addr            Start address of the invalidation
+    /// @param addr_end        Non-inclusive end address of the invalidation
+    /// @pre invalidation_mutex is locked
+    void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+        auto it = entries.begin();
+        while (it != entries.end()) {
+            Entry* const entry = *it;
+            if (!entry->Overlaps(addr, addr_end)) {
+                ++it;
+                continue;
+            }
+            UnmarkMemory(entry);
+            marked_for_removal.push_back(entry);
+
+            it = entries.erase(it);
+        }
+    }
+
+    /// @brief Unmarks an entry from the rasterizer cache
+    /// @param entry Entry to unmark from memory
+    void UnmarkMemory(Entry* entry) {
+        if (!entry->is_memory_marked) {
+            return;
+        }
+        entry->is_memory_marked = false;
+
+        const VAddr addr = entry->addr_start;
+        const std::size_t size = entry->addr_end - addr;
+        rasterizer.UpdatePagesCachedCount(addr, size, -1);
+    }
+
+    /// @brief Removes a vector of shaders from a list
+    /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
+    /// @pre invalidation_mutex is locked
+    /// @pre lookup_mutex is locked
+    void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
+        // Remove duplicates
+        std::sort(removed_shaders.begin(), removed_shaders.end());
+        removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
+                              removed_shaders.end());
+
+        // Now that there are no duplicates, we can notify removals
+        for (T* const shader : removed_shaders) {
+            OnShaderRemoval(shader);
+        }
+
+        // Remove them from the cache
+        const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
+            return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
+                   removed_shaders.end();
+        };
+        storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
+    }
+
+    /// @brief Creates a new entry in the lookup cache and returns its pointer
+    /// @pre lookup_mutex is locked
+    Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
+        auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+        Entry* const entry_pointer = entry.get();
+
+        lookup_cache.emplace(addr, std::move(entry));
+        return entry_pointer;
+    }
+
+    VideoCore::RasterizerInterface& rasterizer;
+
+    mutable std::mutex lookup_mutex;
+    std::mutex invalidation_mutex;
+
+    std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
+    std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
+    std::vector<std::unique_ptr<T>> storage;
+    std::vector<Entry*> marked_for_removal;
+};
+
+} // namespace VideoCommon

From b96f65b62b143544716f66a65e93d2dcb141ee2b Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 22 May 2020 20:55:38 -0300
Subject: [PATCH 2/4] gl_shader_cache: Use generic shader cache

Trivially port the generic shader cache to OpenGL.
---
 .../renderer_opengl/gl_rasterizer.cpp         | 19 ++--
 .../renderer_opengl/gl_rasterizer.h           | 16 ++--
 .../renderer_opengl/gl_shader_cache.cpp       | 87 +++++++++----------
 .../renderer_opengl/gl_shader_cache.h         | 51 +++++------
 4 files changed, 80 insertions(+), 93 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 55e79aaf65..909ca9e0e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/shader_cache.h"
 
 namespace OpenGL {
 
@@ -282,7 +283,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             continue;
         }
 
-        Shader shader{shader_cache.GetStageProgram(program)};
+        Shader* const shader = shader_cache.GetStageProgram(program);
 
         if (device.UseAssemblyShaders()) {
             // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -842,7 +843,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     return true;
 }
 
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
     static constexpr std::array PARAMETER_LUT = {
         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -872,7 +873,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
     }
 }
 
-void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
     MICROPROFILE_SCOPE(OpenGL_UBO);
     const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
     const auto& entries = kernel->GetEntries();
@@ -941,7 +942,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
     }
 }
 
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
     auto& gpu{system.GPU()};
     auto& memory_manager{gpu.MemoryManager()};
     const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -956,7 +957,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
     }
 }
 
-void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
     auto& gpu{system.GPU()};
     auto& memory_manager{gpu.MemoryManager()};
     const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -979,7 +980,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
                       static_cast<GLsizeiptr>(size));
 }
 
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
     const auto& maxwell3d = system.GPU().Maxwell3D();
     u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -992,7 +993,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
     }
 }
 
-void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
     const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
@@ -1021,7 +1022,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
     }
 }
 
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
     const auto& maxwell3d = system.GPU().Maxwell3D();
     u32 binding = device.GetBaseBindings(stage_index).image;
     for (const auto& entry : shader->GetEntries().images) {
@@ -1031,7 +1032,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
     }
 }
 
-void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
     const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
     for (const auto& entry : shader->GetEntries().images) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f5dc56a0e2..1f8d45eac6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -100,10 +100,10 @@ private:
     void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
 
     /// Configures the current constbuffers to use for the draw command.
-    void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
+    void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
 
     /// Configures the current constbuffers to use for the kernel invocation.
-    void SetupComputeConstBuffers(const Shader& kernel);
+    void SetupComputeConstBuffers(Shader* kernel);
 
     /// Configures a constant buffer.
     void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -111,30 +111,30 @@ private:
                           std::size_t unified_offset);
 
     /// Configures the current global memory entries to use for the draw command.
-    void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
+    void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
 
     /// Configures the current global memory entries to use for the kernel invocation.
-    void SetupComputeGlobalMemory(const Shader& kernel);
+    void SetupComputeGlobalMemory(Shader* kernel);
 
     /// Configures a constant buffer.
     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
                            std::size_t size);
 
     /// Configures the current textures to use for the draw command.
-    void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
+    void SetupDrawTextures(std::size_t stage_index, Shader* shader);
 
     /// Configures the textures used in a compute shader.
-    void SetupComputeTextures(const Shader& kernel);
+    void SetupComputeTextures(Shader* kernel);
 
     /// Configures a texture.
     void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
                       const SamplerEntry& entry);
 
     /// Configures images in a graphics shader.
-    void SetupDrawImages(std::size_t stage_index, const Shader& shader);
+    void SetupDrawImages(std::size_t stage_index, Shader* shader);
 
     /// Configures images in a compute shader.
-    void SetupComputeImages(const Shader& shader);
+    void SetupComputeImages(Shader* shader);
 
     /// Configures an image.
     void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a991ca64a7..7c6797e02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -29,6 +29,7 @@
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
 
 namespace OpenGL {
 
@@ -194,12 +195,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 } // Anonymous namespace
 
-CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
-                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
-                           ShaderEntries entries, ProgramSharedPtr program_)
-    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
-      size_in_bytes{size_in_bytes}, program{std::move(program_)} {
-    // Assign either the assembly program or source program. We can't have both.
+Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
+               ProgramSharedPtr program)
+    : registry{std::move(registry)}, entries{std::move(entries)}, program{std::move(program)} {
     handle = program->assembly_program.handle;
     if (handle == 0) {
         handle = program->source_program.handle;
@@ -207,16 +205,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
     ASSERT(handle != 0);
 }
 
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
 
-GLuint CachedShader::GetHandle() const {
+GLuint Shader::GetHandle() const {
     DEBUG_ASSERT(registry->IsConsistent());
     return handle;
 }
 
-Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
-                                           Maxwell::ShaderProgram program_type, ProgramCode code,
-                                           ProgramCode code_b) {
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
+                                                      Maxwell::ShaderProgram program_type,
+                                                      ProgramCode code, ProgramCode code_b) {
     const auto shader_type = GetShaderType(program_type);
     const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
@@ -241,12 +239,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
-                         MakeEntries(params.device, ir, shader_type), std::move(program)));
+    return std::unique_ptr<Shader>(new Shader(
+        std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
 }
 
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
+std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
+                                                       ProgramCode code) {
     const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
     auto& engine = params.system.GPU().KeplerCompute();
@@ -266,23 +264,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
-                         MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
+    return std::unique_ptr<Shader>(new Shader(std::move(registry),
+                                              MakeEntries(params.device, ir, ShaderType::Compute),
+                                              std::move(program)));
 }
 
-Shader CachedShader::CreateFromCache(const ShaderParameters& params,
-                                     const PrecompiledShader& precompiled_shader,
-                                     std::size_t size_in_bytes) {
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
-                         precompiled_shader.entries, precompiled_shader.program));
+std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
+                                                const PrecompiledShader& precompiled_shader) {
+    return std::unique_ptr<Shader>(new Shader(
+        precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
 }
 
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                      Core::Frontend::EmuWindow& emu_window, const Device& device)
-    : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
-      disk_cache{system} {}
+    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
+      emu_window{emu_window}, device{device}, disk_cache{system} {}
+
+ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
 
 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                       const VideoCore::DiskResourceLoadCallback& callback) {
@@ -436,7 +434,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
     return program;
 }
 
-Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
         return last_shaders[static_cast<std::size_t>(program)];
     }
@@ -446,8 +444,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     // Look up shader in the cache based on address
     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
-    Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
-    if (shader) {
+    if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
 
@@ -468,30 +465,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     const ShaderParameters params{system,    disk_cache, device,
                                   *cpu_addr, host_ptr,   unique_identifier};
 
+    std::unique_ptr<Shader> shader;
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
-        shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
-                                                     std::move(code_b));
+        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
     } else {
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
-        shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+        shader = Shader::CreateFromCache(params, found->second);
     }
 
+    Shader* const result = shader.get();
     if (cpu_addr) {
-        Register(shader);
+        Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
     } else {
-        null_shader = shader;
+        null_shader = std::move(shader);
     }
 
-    return last_shaders[static_cast<std::size_t>(program)] = shader;
+    return last_shaders[static_cast<std::size_t>(program)] = result;
 }
 
-Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     auto& memory_manager{system.GPU().MemoryManager()};
     const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
 
-    auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
-    if (kernel) {
+    if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
         return kernel;
     }
 
@@ -503,20 +499,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     const ShaderParameters params{system,    disk_cache, device,
                                   *cpu_addr, host_ptr,   unique_identifier};
 
+    std::unique_ptr<Shader> kernel;
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
-        kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+        kernel = Shader::CreateKernelFromMemory(params, std::move(code));
     } else {
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
-        kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+        kernel = Shader::CreateFromCache(params, found->second);
     }
 
+    Shader* const result = kernel.get();
     if (cpu_addr) {
-        Register(kernel);
+        Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
     } else {
-        null_kernel = kernel;
+        null_kernel = std::move(kernel);
     }
-    return kernel;
+    return result;
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f97..6848f13884 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
 
 #include "common/common_types.h"
 #include "video_core/engines/shader_type.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
 
 namespace Core {
 class System;
@@ -35,12 +35,10 @@ class EmuWindow;
 
 namespace OpenGL {
 
-class CachedShader;
 class Device;
 class RasterizerOpenGL;
 struct UnspecializedShader;
 
-using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct ProgramHandle {
@@ -64,62 +62,53 @@ struct ShaderParameters {
     u64 unique_identifier;
 };
 
-class CachedShader final : public RasterizerCacheObject {
+class Shader final {
 public:
-    ~CachedShader();
+    ~Shader();
 
     /// Gets the GL program handle for the shader
     GLuint GetHandle() const;
 
-    /// Returns the size in bytes of the shader
-    std::size_t GetSizeInBytes() const override {
-        return size_in_bytes;
-    }
-
     /// Gets the shader entries for the shader
     const ShaderEntries& GetEntries() const {
         return entries;
     }
 
-    static Shader CreateStageFromMemory(const ShaderParameters& params,
-                                        Maxwell::ShaderProgram program_type,
-                                        ProgramCode program_code, ProgramCode program_code_b);
-    static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+    static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
+                                                         Maxwell::ShaderProgram program_type,
+                                                         ProgramCode program_code,
+                                                         ProgramCode program_code_b);
+    static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
+                                                          ProgramCode code);
 
-    static Shader CreateFromCache(const ShaderParameters& params,
-                                  const PrecompiledShader& precompiled_shader,
-                                  std::size_t size_in_bytes);
+    static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
+                                                   const PrecompiledShader& precompiled_shader);
 
 private:
-    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
-                          std::shared_ptr<VideoCommon::Shader::Registry> registry,
-                          ShaderEntries entries, ProgramSharedPtr program);
+    explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
+                    ProgramSharedPtr program);
 
     std::shared_ptr<VideoCommon::Shader::Registry> registry;
     ShaderEntries entries;
-    std::size_t size_in_bytes = 0;
     ProgramSharedPtr program;
     GLuint handle = 0;
 };
 
-class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
+class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
 public:
     explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                Core::Frontend::EmuWindow& emu_window, const Device& device);
+    ~ShaderCacheOpenGL() override;
 
     /// Loads disk cache for the current game
     void LoadDiskCache(const std::atomic_bool& stop_loading,
                        const VideoCore::DiskResourceLoadCallback& callback);
 
     /// Gets the current specified shader stage program
-    Shader GetStageProgram(Maxwell::ShaderProgram program);
+    Shader* GetStageProgram(Maxwell::ShaderProgram program);
 
     /// Gets a compute kernel in the passed address
-    Shader GetComputeKernel(GPUVAddr code_addr);
-
-protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const Shader& object) override {}
+    Shader* GetComputeKernel(GPUVAddr code_addr);
 
 private:
     ProgramSharedPtr GeneratePrecompiledProgram(
@@ -132,10 +121,10 @@ private:
     ShaderDiskCacheOpenGL disk_cache;
     std::unordered_map<u64, PrecompiledShader> runtime_cache;
 
-    Shader null_shader{};
-    Shader null_kernel{};
+    std::unique_ptr<Shader> null_shader;
+    std::unique_ptr<Shader> null_kernel;
 
-    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
 };
 
 } // namespace OpenGL

From 678f95e4f824740899c31a94c6aa6adaa634e699 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 22 May 2020 21:01:36 -0300
Subject: [PATCH 3/4] vk_pipeline_cache: Use generic shader cache

Trivial port the generic shader cache to Vulkan.
---
 .../renderer_opengl/gl_shader_cache.cpp       |  6 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp     | 65 ++++++++++---------
 .../renderer_vulkan/vk_pipeline_cache.h       | 33 ++++------
 .../renderer_vulkan/vk_rasterizer.cpp         |  7 +-
 .../renderer_vulkan/vk_rasterizer.h           |  2 +-
 5 files changed, 55 insertions(+), 58 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7c6797e02f..c28486b1db 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -195,9 +195,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 } // Anonymous namespace
 
-Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
-               ProgramSharedPtr program)
-    : registry{std::move(registry)}, entries{std::move(entries)}, program{std::move(program)} {
+Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
+               ProgramSharedPtr program_)
+    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
     handle = program->assembly_program.handle;
     if (handle == 0) {
         handle = program->source_program.handle;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 65a1c62454..20cbeb671e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
+#include "video_core/shader_cache.h"
 
 namespace Vulkan {
 
@@ -130,19 +131,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
     return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 
-CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
-                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
-                           u32 main_offset)
-    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+               VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
+    : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
       registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
                                                            compiler_settings, registry},
       entries{GenerateShaderEntries(shader_ir)} {}
 
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
 
-Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
-    Core::System& system, Tegra::Engines::ShaderType stage) {
-    if (stage == Tegra::Engines::ShaderType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
+                                                              Tegra::Engines::ShaderType stage) {
+    if (stage == ShaderType::Compute) {
         return system.GPU().KeplerCompute();
     } else {
         return system.GPU().Maxwell3D();
@@ -154,16 +154,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
                                  VKDescriptorPool& descriptor_pool,
                                  VKUpdateDescriptorQueue& update_descriptor_queue,
                                  VKRenderPassCache& renderpass_cache)
-    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
-      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
-      renderpass_cache{renderpass_cache} {}
+    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
+      scheduler{scheduler}, descriptor_pool{descriptor_pool},
+      update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
 
 VKPipelineCache::~VKPipelineCache() = default;
 
-std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     const auto& gpu = system.GPU().Maxwell3D();
 
-    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+    std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto program{static_cast<Maxwell::ShaderProgram>(index)};
 
@@ -176,24 +176,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
         const GPUVAddr program_addr{GetShaderAddress(system, program)};
         const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
         ASSERT(cpu_addr);
-        auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
-        if (!shader) {
+
+        Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+        if (!result) {
             const auto host_ptr{memory_manager.GetPointer(program_addr)};
 
             // No shader found - create a new one
             constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
-            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+            const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
             ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+            const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+            auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
+                                                   stage_offset);
+            result = shader.get();
 
-            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
-                                                    std::move(code), stage_offset);
             if (cpu_addr) {
-                Register(shader);
+                Register(std::move(shader), *cpu_addr, size_in_bytes);
             } else {
-                null_shader = shader;
+                null_shader = std::move(shader);
             }
         }
-        shaders[index] = std::move(shader);
+        shaders[index] = result;
     }
     return last_shaders = shaders;
 }
@@ -234,19 +238,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
     ASSERT(cpu_addr);
 
-    auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+    Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
     if (!shader) {
         // No shader found - create a new one
         const auto host_ptr = memory_manager.GetPointer(program_addr);
 
         ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
-        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
-                                                program_addr, *cpu_addr, std::move(code),
-                                                KERNEL_MAIN_OFFSET);
+        const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+        auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
+                                                    std::move(code), KERNEL_MAIN_OFFSET);
+        shader = shader_info.get();
+
         if (cpu_addr) {
-            Register(shader);
+            Register(std::move(shader_info), *cpu_addr, size_in_bytes);
         } else {
-            null_kernel = shader;
+            null_kernel = std::move(shader_info);
         }
     }
 
@@ -262,7 +269,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     return *entry;
 }
 
-void VKPipelineCache::Unregister(const Shader& shader) {
+void VKPipelineCache::OnShaderRemoval(Shader* shader) {
     bool finished = false;
     const auto Finish = [&] {
         // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -294,8 +301,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
         Finish();
         it = compute_cache.erase(it);
     }
-
-    RasterizerCache::Unregister(shader);
 }
 
 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fefb..0a36e51129 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
 
 namespace Core {
 class System;
@@ -41,8 +41,6 @@ class VKFence;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
-class CachedShader;
-using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
 
 namespace Vulkan {
 
-class CachedShader final : public RasterizerCacheObject {
+class Shader {
 public:
-    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
-                          u32 main_offset);
-    ~CachedShader();
+    explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                    VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+    ~Shader();
 
     GPUVAddr GetGpuAddr() const {
         return gpu_addr;
     }
 
-    std::size_t GetSizeInBytes() const override {
-        return program_code.size() * sizeof(u64);
-    }
-
     VideoCommon::Shader::ShaderIR& GetIR() {
         return shader_ir;
     }
@@ -144,25 +137,23 @@ private:
     ShaderEntries entries;
 };
 
-class VKPipelineCache final : public RasterizerCache<Shader> {
+class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
 public:
     explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
                              const VKDevice& device, VKScheduler& scheduler,
                              VKDescriptorPool& descriptor_pool,
                              VKUpdateDescriptorQueue& update_descriptor_queue,
                              VKRenderPassCache& renderpass_cache);
-    ~VKPipelineCache();
+    ~VKPipelineCache() override;
 
-    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
     VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
 protected:
-    void Unregister(const Shader& shader) override;
-
-    void FlushObjectInner(const Shader& object) override {}
+    void OnShaderRemoval(Shader* shader) final;
 
 private:
     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
     VKUpdateDescriptorQueue& update_descriptor_queue;
     VKRenderPassCache& renderpass_cache;
 
-    Shader null_shader{};
-    Shader null_kernel{};
+    std::unique_ptr<Shader> null_shader;
+    std::unique_ptr<Shader> null_kernel;
 
-    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
 
     GraphicsPipelineCacheKey last_graphics_key;
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 86328237ea..ffea9ee36b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader_cache.h"
 
 namespace Vulkan {
 
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
 }
 
 std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
-    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
     std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
     for (std::size_t i = 0; i < std::size(addresses); ++i) {
         addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -775,12 +776,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
 }
 
 void RasterizerVulkan::SetupShaderDescriptors(
-    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
     texture_cache.GuardSamplers(true);
 
     for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
         // Skip VertexA stage
-        const auto& shader = shaders[stage + 1];
+        Shader* const shader = shaders[stage + 1];
         if (!shader) {
             continue;
         }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0ed0e48c67..ef77c36222 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -168,7 +168,7 @@ private:
                                  bool is_indexed, bool is_instanced);
 
     /// Setup descriptors in the graphics pipeline.
-    void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
 
     void SetupImageTransitions(Texceptions texceptions,
                                const std::array<View, Maxwell::NumRenderTargets>& color_attachments,

From abcea1bb188cb4db0ee7e27bea26d6458c881c2d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 22 May 2020 21:03:57 -0300
Subject: [PATCH 4/4] rasterizer_cache: Remove files and includes

The rasterizer cache is no longer used. Each cache has its own generic
implementation optimized for the cached data.
---
 src/video_core/CMakeLists.txt                 |   2 -
 src/video_core/rasterizer_cache.cpp           |   7 -
 src/video_core/rasterizer_cache.h             | 253 ------------------
 .../renderer_opengl/gl_buffer_cache.h         |   1 -
 .../renderer_opengl/gl_rasterizer.h           |   1 -
 .../renderer_vulkan/vk_buffer_cache.h         |   1 -
 .../renderer_vulkan/vk_pipeline_cache.cpp     |   7 +-
 7 files changed, 3 insertions(+), 269 deletions(-)
 delete mode 100644 src/video_core/rasterizer_cache.cpp
 delete mode 100644 src/video_core/rasterizer_cache.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index cf1885f92b..39d5d84014 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -49,8 +49,6 @@ add_library(video_core STATIC
     query_cache.h
     rasterizer_accelerated.cpp
     rasterizer_accelerated.h
-    rasterizer_cache.cpp
-    rasterizer_cache.h
     rasterizer_interface.h
     renderer_base.cpp
     renderer_base.h
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4e..0000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/rasterizer_cache.h"
-
-RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337cc..0000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range_core.hpp>
-
-#include "common/common_types.h"
-#include "core/settings.h"
-#include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
-
-class RasterizerCacheObject {
-public:
-    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
-
-    virtual ~RasterizerCacheObject();
-
-    VAddr GetCpuAddr() const {
-        return cpu_addr;
-    }
-
-    /// Gets the size of the shader in guest memory, required for cache management
-    virtual std::size_t GetSizeInBytes() const = 0;
-
-    /// Sets whether the cached object should be considered registered
-    void SetIsRegistered(bool registered) {
-        is_registered = registered;
-    }
-
-    /// Returns true if the cached object is registered
-    bool IsRegistered() const {
-        return is_registered;
-    }
-
-    /// Returns true if the cached object is dirty
-    bool IsDirty() const {
-        return is_dirty;
-    }
-
-    /// Returns ticks from when this cached object was last modified
-    u64 GetLastModifiedTicks() const {
-        return last_modified_ticks;
-    }
-
-    /// Marks an object as recently modified, used to specify whether it is clean or dirty
-    template <class T>
-    void MarkAsModified(bool dirty, T& cache) {
-        is_dirty = dirty;
-        last_modified_ticks = cache.GetModifiedTicks();
-    }
-
-    void SetMemoryMarked(bool is_memory_marked_) {
-        is_memory_marked = is_memory_marked_;
-    }
-
-    bool IsMemoryMarked() const {
-        return is_memory_marked;
-    }
-
-    void SetSyncPending(bool is_sync_pending_) {
-        is_sync_pending = is_sync_pending_;
-    }
-
-    bool IsSyncPending() const {
-        return is_sync_pending;
-    }
-
-private:
-    bool is_registered{};      ///< Whether the object is currently registered with the cache
-    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
-    bool is_memory_marked{};   ///< Whether the object is marking rasterizer memory.
-    bool is_sync_pending{};    ///< Whether the object is pending deletion.
-    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
-    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
-};
-
-template <class T>
-class RasterizerCache : NonCopyable {
-    friend class RasterizerCacheObject;
-
-public:
-    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
-
-    /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
-
-        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
-        for (auto& object : objects) {
-            FlushObject(object);
-        }
-    }
-
-    /// Mark the specified region as being invalidated
-    void InvalidateRegion(VAddr addr, u64 size) {
-        std::lock_guard lock{mutex};
-
-        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
-        for (auto& object : objects) {
-            if (!object->IsRegistered()) {
-                // Skip duplicates
-                continue;
-            }
-            Unregister(object);
-        }
-    }
-
-    void OnCPUWrite(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
-
-        for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
-            if (object->IsRegistered()) {
-                UnmarkMemory(object);
-                object->SetSyncPending(true);
-                marked_for_unregister.emplace_back(object);
-            }
-        }
-    }
-
-    void SyncGuestHost() {
-        std::lock_guard lock{mutex};
-
-        for (const auto& object : marked_for_unregister) {
-            if (object->IsRegistered()) {
-                object->SetSyncPending(false);
-                Unregister(object);
-            }
-        }
-        marked_for_unregister.clear();
-    }
-
-    /// Invalidates everything in the cache
-    void InvalidateAll() {
-        std::lock_guard lock{mutex};
-
-        while (interval_cache.begin() != interval_cache.end()) {
-            Unregister(*interval_cache.begin()->second.begin());
-        }
-    }
-
-protected:
-    /// Tries to get an object from the cache with the specified cache address
-    T TryGet(VAddr addr) const {
-        const auto iter = map_cache.find(addr);
-        if (iter != map_cache.end())
-            return iter->second;
-        return nullptr;
-    }
-
-    /// Register an object into the cache
-    virtual void Register(const T& object) {
-        std::lock_guard lock{mutex};
-
-        object->SetIsRegistered(true);
-        interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetCpuAddr(), object});
-        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
-        object->SetMemoryMarked(true);
-    }
-
-    /// Unregisters an object from the cache
-    virtual void Unregister(const T& object) {
-        std::lock_guard lock{mutex};
-
-        UnmarkMemory(object);
-        object->SetIsRegistered(false);
-        if (object->IsSyncPending()) {
-            marked_for_unregister.remove(object);
-            object->SetSyncPending(false);
-        }
-        const VAddr addr = object->GetCpuAddr();
-        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(addr);
-    }
-
-    void UnmarkMemory(const T& object) {
-        if (!object->IsMemoryMarked()) {
-            return;
-        }
-        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
-        object->SetMemoryMarked(false);
-    }
-
-    /// Returns a ticks counter used for tracking when cached objects were last modified
-    u64 GetModifiedTicks() {
-        std::lock_guard lock{mutex};
-
-        return ++modified_ticks;
-    }
-
-    virtual void FlushObjectInner(const T& object) = 0;
-
-    /// Flushes the specified object, updating appropriate cache state as needed
-    void FlushObject(const T& object) {
-        std::lock_guard lock{mutex};
-
-        if (!object->IsDirty()) {
-            return;
-        }
-        FlushObjectInner(object);
-        object->MarkAsModified(false, *this);
-    }
-
-    std::recursive_mutex mutex;
-
-private:
-    /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
-        if (size == 0) {
-            return {};
-        }
-
-        std::vector<T> objects;
-        const ObjectInterval interval{addr, addr + size};
-        for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
-            for (auto& cached_object : pair.second) {
-                if (!cached_object) {
-                    continue;
-                }
-                objects.push_back(cached_object);
-            }
-        }
-
-        std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
-            return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
-        });
-
-        return objects;
-    }
-
-    using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<VAddr, T>;
-    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
-    using ObjectInterval = typename IntervalCache::interval_type;
-
-    static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetCpuAddr(),
-                                          object->GetCpuAddr() + object->GetSizeInBytes());
-    }
-
-    ObjectCache map_cache;
-    IntervalCache interval_cache; ///< Cache of objects
-    u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
-    VideoCore::RasterizerInterface& rasterizer;
-    std::list<T> marked_for_unregister;
-};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7c..679b9b1d73 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1f8d45eac6..208a4485b1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_accelerated.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d0..65cb3c8ad0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
 
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 20cbeb671e..150d86b621 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -335,12 +335,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
         }
 
         const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
-        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-        const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
-        ASSERT(shader);
+        const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+        Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
 
         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
-        const auto program_type = GetShaderType(program_enum);
+        const ShaderType program_type = GetShaderType(program_enum);
         const auto& entries = shader->GetEntries();
         program[stage] = {
             Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),