From 5af82a8ed4e2e0b7abc9c7da9f7bb5fa1c83de29 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 26 Dec 2018 01:33:56 -0300
Subject: [PATCH] shader_decode: Implement TEXS.F16

---
 src/video_core/shader/decode/memory.cpp   | 38 +++++++++++++++--------
 src/video_core/shader/glsl_decompiler.cpp | 26 ++++++++++++++++
 src/video_core/shader/shader_ir.h         |  8 +++--
 3 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ce34455126..679e7f01ba 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -219,8 +219,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         if (instr.texs.fp32_flag) {
             WriteTexsInstructionFloat(bb, instr, texture);
         } else {
-            UNIMPLEMENTED();
-            // WriteTexsInstructionHalfFloat(bb, instr, texture);
+            WriteTexsInstructionHalfFloat(bb, instr, texture);
         }
         break;
     }
@@ -416,39 +415,52 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
     return *used_samplers.emplace(entry).first;
 }
 
-void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
-                                         Node texture) {
+void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) {
     // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
     // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
     MetaComponents meta;
     std::array<Node, 4> dest;
-
-    std::size_t written_components = 0;
     for (u32 component = 0; component < 4; ++component) {
         if (!instr.texs.IsComponentEnabled(component)) {
             continue;
         }
-        meta.components_map[written_components] = static_cast<u32>(component);
+        meta.components_map[meta.count] = component;
 
-        if (written_components < 2) {
+        if (meta.count < 2) {
             // Write the first two swizzle components to gpr0 and gpr0+1
-            dest[written_components] = GetRegister(instr.gpr0.Value() + written_components % 2);
+            dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2);
         } else {
             ASSERT(instr.texs.HasTwoDestinations());
             // Write the rest of the swizzle components to gpr28 and gpr28+1
-            dest[written_components] = GetRegister(instr.gpr28.Value() + written_components % 2);
+            dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2);
         }
-
-        ++written_components;
+        ++meta.count;
     }
 
-    std::generate(dest.begin() + written_components, dest.end(), [&]() { return GetRegister(RZ); });
+    std::generate(dest.begin() + meta.count, dest.end(), [&]() { return GetRegister(RZ); });
 
     bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2],
                            dest[3]));
 }
 
+void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) {
+    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+    // float instruction).
+
+    MetaComponents meta;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component))
+            continue;
+        meta.components_map[meta.count++] = component;
+    }
+    if (meta.count == 0)
+        return;
+
+    bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture,
+                           GetRegister(instr.gpr0), GetRegister(instr.gpr28)));
+}
+
 Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
                               std::size_t array_offset, std::size_t bias_offset,
diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp
index d27d381789..5aa7966b93 100644
--- a/src/video_core/shader/glsl_decompiler.cpp
+++ b/src/video_core/shader/glsl_decompiler.cpp
@@ -785,6 +785,31 @@ private:
         return {};
     }
 
+    std::string AssignCompositeHalf(Operation operation) {
+        const auto& meta = std::get<MetaComponents>(operation.GetMeta());
+
+        const std::string composite = code.GenerateTemporal();
+        code.AddLine("vec4 " + composite + " = " + Visit(operation[0]) + ';');
+
+        const auto ReadComponent = [&](u32 component) {
+            if (component < meta.count) {
+                return composite + '[' + std::to_string(meta.GetSourceComponent(component)) + ']';
+            }
+            return std::string("0");
+        };
+
+        const auto dst1 = std::get<GprNode>(*operation[1]).GetIndex();
+        const std::string src1 = "vec2(" + ReadComponent(0) + ", " + ReadComponent(1) + ')';
+        code.AddLine(GetRegister(dst1) + " = utof(packHalf2x16(" + src1 + "))");
+
+        if (meta.count > 2) {
+            const auto dst2 = std::get<GprNode>(*operation[2]).GetIndex();
+            const std::string src2 = "vec2(" + ReadComponent(2) + ", " + ReadComponent(3) + ')';
+            code.AddLine(GetRegister(dst2) + " = utof(packHalf2x16(" + src2 + "))");
+        }
+        return {};
+    }
+
     std::string Composite(Operation operation) {
         std::string value = "vec4(";
         for (std::size_t i = 0; i < 4; ++i) {
@@ -1302,6 +1327,7 @@ private:
     static constexpr OperationDecompilersArray operation_decompilers = {
         &GLSLDecompiler::Assign,
         &GLSLDecompiler::AssignComposite,
+        &GLSLDecompiler::AssignCompositeHalf,
 
         &GLSLDecompiler::Composite,
         &GLSLDecompiler::Select,
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5676d32a9e..7f11599bfa 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -44,8 +44,9 @@ constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
 constexpr u32 RZ = 0xff;
 
 enum class OperationCode {
-    Assign,          /// (float& dest, float src) -> void
-    AssignComposite, /// (MetaComponents, float4 src, float&[4] dst) -> void
+    Assign,              /// (float& dest, float src) -> void
+    AssignComposite,     /// (MetaComponents, float4 src, float&[4] dst) -> void
+    AssignCompositeHalf, /// (MetaComponents, float4 src, float&[2] dst) -> void
 
     Composite, /// (float[4] values) -> float4
     Select,    /// (MetaArithmetic, bool pred, float a, float b) -> float
@@ -279,6 +280,7 @@ struct MetaTexture {
 
 struct MetaComponents {
     std::array<u32, 4> components_map{};
+    u32 count{};
 
     u32 GetSourceComponent(u32 dest_index) const {
         return components_map[dest_index];
@@ -692,6 +694,8 @@ private:
                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
 
     void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, Node texture);
+    void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
+                                       Node texture);
 
     Node GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,