From 49b15af05437e665088105bd4873be478ad12b4b Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 9 Sep 2018 19:01:21 -0400
Subject: [PATCH] gl_rasterizer: Implement multiple color attachments.

---
 src/video_core/engines/maxwell_3d.h           |  22 +++-
 .../renderer_opengl/gl_rasterizer.cpp         | 116 +++++++-----------
 .../renderer_opengl/gl_rasterizer.h           |  12 +-
 .../renderer_opengl/gl_rasterizer_cache.cpp   |  74 ++++-------
 .../renderer_opengl/gl_rasterizer_cache.h     |  11 +-
 5 files changed, 99 insertions(+), 136 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f59d01738d..d3be900a4a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -533,7 +533,11 @@ public:
                 u32 stencil_back_mask;
                 u32 stencil_back_func_mask;
 
-                INSERT_PADDING_WORDS(0x20);
+                INSERT_PADDING_WORDS(0x13);
+
+                u32 rt_separate_frag_data;
+
+                INSERT_PADDING_WORDS(0xC);
 
                 struct {
                     u32 address_high;
@@ -557,7 +561,22 @@ public:
                 struct {
                     union {
                         BitField<0, 4, u32> count;
+                        BitField<4, 3, u32> map_0;
+                        BitField<7, 3, u32> map_1;
+                        BitField<10, 3, u32> map_2;
+                        BitField<13, 3, u32> map_3;
+                        BitField<16, 3, u32> map_4;
+                        BitField<19, 3, u32> map_5;
+                        BitField<22, 3, u32> map_6;
+                        BitField<25, 3, u32> map_7;
                     };
+
+                    u32 GetMap(size_t index) const {
+                        const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
+                                                                     map_4, map_5, map_6, map_7};
+                        ASSERT(index < maps.size());
+                        return maps[index];
+                    }
                 } rt_control;
 
                 INSERT_PADDING_WORDS(0x2);
@@ -968,6 +987,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
 ASSERT_REG_POSITION(rt_control, 0x487);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e6d6917fa6..c7e2c877c6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -294,17 +294,10 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
         cached_pages.add({pages_interval, delta});
 }
 
-std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
-                                                                    bool using_depth_fb,
-                                                                    bool preserve_contents) {
+void RasterizerOpenGL::ConfigureFramebuffers(bool using_depth_fb, bool preserve_contents) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
-    if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
-        LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
-        using_color_fb = false;
-    }
-
     const bool has_stencil = regs.stencil_enable;
     const bool write_color_fb =
         state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
@@ -314,41 +307,52 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
         (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
         (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
 
-    Surface color_surface;
     Surface depth_surface;
-    MathUtil::Rectangle<u32> surfaces_rect;
-    std::tie(color_surface, depth_surface, surfaces_rect) =
-        res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents);
+    if (using_depth_fb) {
+        depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
+    }
 
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-    const MathUtil::Rectangle<u32> draw_rect{
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
-                                         surfaces_rect.left, surfaces_rect.right)), // Left
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
-                                         surfaces_rect.bottom, surfaces_rect.top)), // Top
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
-                                         surfaces_rect.left, surfaces_rect.right)), // Right
-        static_cast<u32>(
-            std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
-                            surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+    // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+    // used to enable multiple render targets. However, it is left unset on all games that I have
+    // tested.
+    ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
 
     // Bind the framebuffer surfaces
-    BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
-
-    SyncViewport(surfaces_rect);
-
-    // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
-    // scissor test to prevent drawing outside of the framebuffer region
-    state.scissor.enabled = true;
-    state.scissor.x = draw_rect.left;
-    state.scissor.y = draw_rect.bottom;
-    state.scissor.width = draw_rect.GetWidth();
-    state.scissor.height = draw_rect.GetHeight();
+    state.draw.draw_framebuffer = framebuffer.handle;
     state.Apply();
 
-    // Only return the surface to be marked as dirty if writing to it is enabled.
-    return std::make_pair(write_color_fb ? color_surface : nullptr,
-                          write_depth_fb ? depth_surface : nullptr);
+    std::array<GLenum, Maxwell::NumRenderTargets> buffers;
+    for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+        buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
+                               GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
+                               color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
+    }
+
+    glDrawBuffers(regs.rt_control.count, buffers.data());
+
+    if (depth_surface) {
+        if (has_stencil) {
+            // Attach both depth and stencil
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   depth_surface->Texture().handle, 0);
+        } else {
+            // Attach depth
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                                   depth_surface->Texture().handle, 0);
+            // Clear stencil attachment
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+        }
+    } else {
+        // Clear both depth and stencil attachment
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+                               0);
+    }
+
+    SyncViewport();
+
+    state.Apply();
 }
 
 void RasterizerOpenGL::Clear() {
@@ -407,8 +411,7 @@ void RasterizerOpenGL::Clear() {
 
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    auto [dirty_color_surface, dirty_depth_surface] =
-        ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
+    ConfigureFramebuffers(use_depth_fb, false);
 
     clear_state.Apply();
 
@@ -430,8 +433,7 @@ void RasterizerOpenGL::DrawArrays() {
 
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    const auto [dirty_color_surface, dirty_depth_surface] =
-        ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
+    ConfigureFramebuffers(true, true);
 
     SyncDepthTestState();
     SyncStencilTestState();
@@ -729,38 +731,12 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
     return current_unit + static_cast<u32>(entries.size());
 }
 
-void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
-                                               const Surface& depth_surface, bool has_stencil) {
-    state.draw.draw_framebuffer = framebuffer.handle;
-    state.Apply();
-
-    glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                           color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
-    if (depth_surface != nullptr) {
-        if (has_stencil) {
-            // attach both depth and stencil
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-        } else {
-            // attach depth
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-            // clear stencil attachment
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-        }
-    } else {
-        // clear both depth and stencil attachment
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-    }
-}
-
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
+void RasterizerOpenGL::SyncViewport() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
     const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
 
-    state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
-    state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+    state.viewport.x = viewport_rect.left;
+    state.viewport.y = viewport_rect.bottom;
     state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
     state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index c6bb1516bf..3d62cc196b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -97,14 +97,8 @@ private:
         GLvec4 border_color;
     };
 
-    /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
-    /// surfaces if writing was enabled.
-    std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
-                                                      bool preserve_contents);
-
-    /// Binds the framebuffer color and depth surface
-    void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
-                                 bool has_stencil);
+    /// Configures the color and depth framebuffer states
+    void ConfigureFramebuffers(bool using_depth_fb, bool preserve_contents);
 
     /*
      * Configures the current constbuffers to use for the draw command.
@@ -127,7 +121,7 @@ private:
                       u32 current_unit);
 
     /// Syncs the viewport to match the guest state
-    void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
+    void SyncViewport();
 
     /// Syncs the clip enabled status to match the guest state
     void SyncClipEnabled();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index fa730b9e61..20a8e1cdad 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -61,8 +61,8 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     return params;
 }
 
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
-    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+    const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
     SurfaceParams params{};
     params.addr = TryGetCpuAddr(config.Address());
     params.is_tiled = true;
@@ -708,62 +708,34 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
     return GetSurface(SurfaceParams::CreateForTexture(config));
 }
 
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
-                                                                       bool using_depth_fb,
-                                                                       bool preserve_contents) {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
-    // TODO(bunnei): This is hard corded to use just the first render buffer
-    LOG_TRACE(Render_OpenGL, "hard-coded for render target 0!");
-
-    // get color and depth surfaces
-    SurfaceParams color_params{};
-    SurfaceParams depth_params{};
-
-    if (using_color_fb) {
-        color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
+Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
+    if (!regs.zeta.Address() || !regs.zeta_enable) {
+        return {};
     }
 
-    if (using_depth_fb) {
-        depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
-                                                           regs.zeta.Address(), regs.zeta.format);
+    SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
+        regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
+
+    return GetSurface(depth_params, preserve_contents);
+}
+
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
+
+    ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
+
+    if (index >= regs.rt_control.count) {
+        return {};
     }
 
-    MathUtil::Rectangle<u32> color_rect{};
-    Surface color_surface;
-    if (using_color_fb) {
-        color_surface = GetSurface(color_params, preserve_contents);
-        if (color_surface) {
-            color_rect = color_surface->GetSurfaceParams().GetRect();
-        }
+    if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+        return {};
     }
 
-    MathUtil::Rectangle<u32> depth_rect{};
-    Surface depth_surface;
-    if (using_depth_fb) {
-        depth_surface = GetSurface(depth_params, preserve_contents);
-        if (depth_surface) {
-            depth_rect = depth_surface->GetSurfaceParams().GetRect();
-        }
-    }
+    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
 
-    MathUtil::Rectangle<u32> fb_rect{};
-    if (color_surface && depth_surface) {
-        fb_rect = color_rect;
-        // Color and Depth surfaces must have the same dimensions and offsets
-        if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
-            color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
-            color_surface = GetSurface(color_params);
-            depth_surface = GetSurface(depth_params);
-            fb_rect = color_surface->GetSurfaceParams().GetRect();
-        }
-    } else if (color_surface) {
-        fb_rect = color_rect;
-    } else if (depth_surface) {
-        fb_rect = depth_rect;
-    }
-
-    return std::make_tuple(color_surface, depth_surface, fb_rect);
+    return GetSurface(color_params, preserve_contents);
 }
 
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8312b2c7a5..e215f260f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -669,8 +669,7 @@ struct SurfaceParams {
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
 
     /// Creates SurfaceParams from a framebuffer configuration
-    static SurfaceParams CreateForFramebuffer(
-        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
+    static SurfaceParams CreateForFramebuffer(size_t index);
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -774,9 +773,11 @@ public:
     /// Get a surface based on the texture configuration
     Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
 
-    /// Get the color and depth surfaces based on the framebuffer configuration
-    SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
-                                                    bool preserve_contents);
+    /// Get the depth surface based on the framebuffer configuration
+    Surface GetDepthBufferSurface(bool preserve_contents);
+
+    /// Get the color surface based on the framebuffer configuration and the specified render target
+    Surface GetColorBufferSurface(size_t index, bool preserve_contents);
 
     /// Flushes the surface to Switch memory
     void FlushSurface(const Surface& surface);