From 5d529698c94da9c13f8a685d817c9f660fd8abee Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 10 Apr 2018 00:02:12 -0400
Subject: [PATCH 01/12] gl_shader_decompiler: Partially implement MUFU.

---
 .../renderer_opengl/gl_shader_decompiler.cpp        | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e11711533..a439da470 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -270,6 +270,17 @@ private:
                 SetDest(0, dest, op_a + " + " + op_b, 1, 1);
                 break;
             }
+            case OpCode::Id::MUFU: {
+                switch (instr.sub_op) {
+                case SubOp::Rcp:
+                    SetDest(0, dest, "1.0 / " + op_a, 1, 1);
+                    break;
+                default:
+                    LOG_ERROR(HW_GPU, "Unhandled sub op: 0x%02x", (int)instr.sub_op.Value());
+                    throw DecompileFail("Unhandled sub op");
+                }
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
@@ -298,7 +309,6 @@ private:
                 SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
                 break;
             }
-
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
@@ -347,7 +357,6 @@ private:
                 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
                 break;
             }
             }

From 5ba71369acca9c0d58e9cea4c915a921029ddab4 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 10 Apr 2018 00:04:49 -0400
Subject: [PATCH 02/12] gl_shader_decompiler: Use fragment output color for GPR
 0-3.

---
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a439da470..ba3aa7dd1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -187,6 +187,11 @@ private:
 
     /// Generates code representing a temporary (GPR) register.
     std::string GetRegister(const Register& reg) {
+        if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg.GetIndex() < 4) {
+            // GPRs 0-3 are output color for the fragment shader
+            return std::string{"color."} + "rgba"[reg.GetIndex()];
+        }
+
         return *declr_register.insert("register_" + std::to_string(reg)).first;
     }
 

From 8b4443c966c1f00ca468f41584b74fe22a4580af Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 10 Apr 2018 01:26:15 -0400
Subject: [PATCH 03/12] gl_shader_decompiler: Add support for TEXS instruction.

---
 src/video_core/engines/shader_bytecode.h      | 19 +++++++---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 36 +++++++++++++++----
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index eff0c35a1..51cf4af9f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -16,10 +16,6 @@ struct Register {
 
     constexpr Register(u64 value) : value(value) {}
 
-    constexpr u64 GetIndex() const {
-        return value;
-    }
-
     constexpr operator u64() const {
         return value;
     }
@@ -71,6 +67,19 @@ union Attribute {
     u64 value;
 };
 
+union Sampler {
+    Sampler() = default;
+
+    constexpr Sampler(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Sampler_0 = 8,
+    };
+
+    BitField<36, 13, Index> index;
+    u64 value;
+};
+
 union Uniform {
     BitField<20, 14, u64> offset;
     BitField<34, 5, u64> index;
@@ -295,7 +304,6 @@ union Instruction {
     BitField<20, 8, Register> gpr20;
     BitField<20, 7, SubOp> sub_op;
     BitField<28, 8, Register> gpr28;
-    BitField<36, 13, u64> imm36;
     BitField<39, 8, Register> gpr39;
 
     union {
@@ -316,6 +324,7 @@ union Instruction {
 
     Attribute attribute;
     Uniform uniform;
+    Sampler sampler;
 
     u64 hex;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ba3aa7dd1..a8f1ac5b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::Sampler;
 using Tegra::Shader::SubOp;
 using Tegra::Shader::Uniform;
 
@@ -186,13 +187,13 @@ private:
     }
 
     /// Generates code representing a temporary (GPR) register.
-    std::string GetRegister(const Register& reg) {
-        if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg.GetIndex() < 4) {
+    std::string GetRegister(const Register& reg, unsigned elem = 0) {
+        if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
             // GPRs 0-3 are output color for the fragment shader
-            return std::string{"color."} + "rgba"[reg.GetIndex()];
+            return std::string{"color."} + "rgba"[reg + elem];
         }
 
-        return *declr_register.insert("register_" + std::to_string(reg)).first;
+        return *declr_register.insert("register_" + std::to_string(reg + elem)).first;
     }
 
     /// Generates code representing a uniform (C buffer) register.
@@ -201,6 +202,15 @@ private:
         return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
     }
 
+    /// Generates code representing a texture sampler.
+    std::string GetSampler(const Sampler& sampler) const {
+        // TODO(Subv): Support more than just texture sampler 0
+        ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
+        const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
+                             static_cast<unsigned>(Sampler::Index::Sampler_0)};
+        return "tex[" + std::to_string(index) + "]";
+    }
+
     /**
      * Adds code that calls a subroutine.
      * @param subroutine the subroutine to call.
@@ -245,7 +255,7 @@ private:
 
         switch (OpCode::GetInfo(instr.opcode).type) {
         case OpCode::Type::Arithmetic: {
-            ASSERT(!instr.alu.abs_d);
+            ASSERT_MSG(!instr.alu.abs_d, "unimplemented");
 
             std::string dest = GetRegister(instr.gpr0);
             std::string op_a = instr.alu.negate_a ? "-" : "";
@@ -330,15 +340,27 @@ private:
 
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::LD_A: {
-                ASSERT(instr.attribute.fmt20.size == 0);
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
                 break;
             }
             case OpCode::Id::ST_A: {
-                ASSERT(instr.attribute.fmt20.size == 0);
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
                 break;
             }
+            case OpCode::Id::TEXS: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+                const std::string op_a = GetRegister(instr.gpr8);
+                const std::string op_b = GetRegister(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2(" + op_a + ", " + op_b + ")";
+                const std::string texture = "texture(" + sampler + ", " + coord + ")";
+                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+                    SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
+                }
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()),

From 95144cc39c37d370d6911446736865ddca50312d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 10 Apr 2018 21:37:49 -0400
Subject: [PATCH 04/12] gl_shader_decompiler: Implement IPA instruction.

---
 .../renderer_opengl/gl_shader_decompiler.cpp          | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a8f1ac5b5..045ccdb0f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -379,7 +379,18 @@ private:
                 offset = PROGRAM_END - 1;
                 break;
             }
+            case OpCode::Id::IPA: {
+                const auto& attribute = instr.attribute.fmt28;
 
+                if (attribute.index == Attribute::Index::Position) {
+                    LOG_CRITICAL(HW_GPU, "Unimplemented");
+                    break;
+                }
+
+                std::string dest = GetRegister(instr.gpr0);
+                SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()),

From 8d4899d6ead9413a6bf8f237508a1941dc12493d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 15 Apr 2018 20:26:45 -0400
Subject: [PATCH 05/12] gl_shader_decompiler: Allow vertex position to be used
 in fragment shader.

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 28 +++++++++----------
 .../renderer_opengl/gl_shader_gen.cpp         |  6 +++-
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 045ccdb0f..1d8057927 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -156,23 +156,27 @@ private:
 
     /// Generates code representing an input attribute register.
     std::string GetInputAttribute(Attribute::Index attribute) {
-        declr_input_attribute.insert(attribute);
+        switch (attribute) {
+        case Attribute::Index::Position:
+            return "position";
+        default:
+            const u32 index{static_cast<u32>(attribute) -
+                            static_cast<u32>(Attribute::Index::Attribute_0)};
+            if (attribute >= Attribute::Index::Attribute_0) {
+                declr_input_attribute.insert(attribute);
+                return "input_attribute_" + std::to_string(index);
+            }
 
-        const u32 index{static_cast<u32>(attribute) -
-                        static_cast<u32>(Attribute::Index::Attribute_0)};
-        if (attribute >= Attribute::Index::Attribute_0) {
-            return "input_attribute_" + std::to_string(index);
+            LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
+            UNREACHABLE();
         }
-
-        LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
-        UNREACHABLE();
     }
 
     /// Generates code representing an output attribute register.
     std::string GetOutputAttribute(Attribute::Index attribute) {
         switch (attribute) {
         case Attribute::Index::Position:
-            return "gl_Position";
+            return "position";
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -381,12 +385,6 @@ private:
             }
             case OpCode::Id::IPA: {
                 const auto& attribute = instr.attribute.fmt28;
-
-                if (attribute.index == Attribute::Index::Position) {
-                    LOG_CRITICAL(HW_GPU, "Unimplemented");
-                    break;
-                }
-
                 std::string dest = GetRegister(instr.gpr0);
                 SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
                 break;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index aeea1c805..8b7f17601 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -27,10 +27,13 @@ out gl_PerVertex {
     vec4 gl_Position;
 };
 
+out vec4 position;
+
 void main() {
     exec_shader();
-}
 
+    gl_Position = position;
+}
 )";
     out += program.first;
     return {out, program.second};
@@ -46,6 +49,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
                                 .get_value_or({});
     out += R"(
 
+in vec4 position;
 out vec4 color;
 
 uniform sampler2D tex[32];

From 5a28dce9eb8db4571cc47352174c78f2c3cfd606 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 15 Apr 2018 20:45:56 -0400
Subject: [PATCH 06/12] gl_shader_decompiler: Implement FMUL/FADD/FFMA
 immediate instructions.

---
 src/video_core/engines/shader_bytecode.h      | 14 +++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 51 ++++++++++++++-----
 2 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 51cf4af9f..c368fa7fd 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <cstring>
 #include <map>
 #include <string>
 #include "common/bit_field.h"
@@ -289,6 +290,7 @@ enum class SubOp : u64 {
     Lg2 = 0x3,
     Rcp = 0x4,
     Rsq = 0x5,
+    Min = 0x8,
 };
 
 union Instruction {
@@ -307,11 +309,22 @@ union Instruction {
     BitField<39, 8, Register> gpr39;
 
     union {
+        BitField<20, 19, u64> imm20;
         BitField<45, 1, u64> negate_b;
         BitField<46, 1, u64> abs_a;
         BitField<48, 1, u64> negate_a;
         BitField<49, 1, u64> abs_b;
         BitField<50, 1, u64> abs_d;
+        BitField<56, 1, u64> negate_imm;
+
+        float GetImm20() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20)};
+            imm <<= 12;
+            imm |= negate_imm ? 0x80000000 : 0;
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
     } alu;
 
     union {
@@ -319,6 +332,7 @@ union Instruction {
         BitField<49, 1, u64> negate_c;
     } ffma;
 
+    BitField<61, 1, u64> is_b_imm;
     BitField<60, 1, u64> is_b_gpr;
     BitField<59, 1, u64> is_c_gpr;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1d8057927..aa435e5cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -190,6 +190,11 @@ private:
         }
     }
 
+    /// Generates code representing an immediate value
+    static std::string GetImmediate(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20());
+    }
+
     /// Generates code representing a temporary (GPR) register.
     std::string GetRegister(const Register& reg, unsigned elem = 0) {
         if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
@@ -269,24 +274,32 @@ private:
             }
 
             std::string op_b = instr.alu.negate_b ? "-" : "";
-            if (instr.is_b_gpr) {
-                op_b += GetRegister(instr.gpr20);
+
+            if (instr.is_b_imm) {
+                op_b += GetImmediate(instr);
             } else {
-                op_b += GetUniform(instr.uniform);
+                if (instr.is_b_gpr) {
+                    op_b += GetRegister(instr.gpr20);
+                } else {
+                    op_b += GetUniform(instr.uniform);
+                }
             }
+
             if (instr.alu.abs_b) {
                 op_b = "abs(" + op_b + ")";
             }
 
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::FMUL_C:
-            case OpCode::Id::FMUL_R: {
-                SetDest(0, dest, op_a + " * " + op_b, 1, 1);
+            case OpCode::Id::FMUL_R:
+            case OpCode::Id::FMUL_IMM: {
+                SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
                 break;
             }
             case OpCode::Id::FADD_C:
-            case OpCode::Id::FADD_R: {
-                SetDest(0, dest, op_a + " + " + op_b, 1, 1);
+            case OpCode::Id::FADD_R:
+            case OpCode::Id::FADD_IMM: {
+                SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
                 break;
             }
             case OpCode::Id::MUFU: {
@@ -316,16 +329,28 @@ private:
 
             std::string dest = GetRegister(instr.gpr0);
             std::string op_a = GetRegister(instr.gpr8);
-
             std::string op_b = instr.ffma.negate_b ? "-" : "";
-            op_b += GetUniform(instr.uniform);
-
             std::string op_c = instr.ffma.negate_c ? "-" : "";
-            op_c += GetRegister(instr.gpr39);
 
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::FFMA_CR: {
-                SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
+                op_b += GetUniform(instr.uniform);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            case OpCode::Id::FFMA_RR: {
+                op_b += GetRegister(instr.gpr20);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            case OpCode::Id::FFMA_RC: {
+                op_b += GetRegister(instr.gpr39);
+                op_c += GetUniform(instr.uniform);
+                break;
+            }
+            case OpCode::Id::FFMA_IMM: {
+                op_b += GetImmediate(instr);
+                op_c += GetRegister(instr.gpr39);
                 break;
             }
             default: {
@@ -336,6 +361,8 @@ private:
                 break;
             }
             }
+
+            SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
             break;
         }
         case OpCode::Type::Memory: {

From 59f4ff465904f59193102b024ac838e848c3272f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 15 Apr 2018 20:47:41 -0400
Subject: [PATCH 07/12] gl_shader_decompiler: Fix swizzle in GetRegister.

---
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index aa435e5cc..1bb8174e4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -199,7 +199,7 @@ private:
     std::string GetRegister(const Register& reg, unsigned elem = 0) {
         if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
             // GPRs 0-3 are output color for the fragment shader
-            return std::string{"color."} + "rgba"[reg + elem];
+            return std::string{"color."} + "rgba"[(reg + elem) & 3];
         }
 
         return *declr_register.insert("register_" + std::to_string(reg + elem)).first;

From ef2d5ab0c1b75feb848f9987723edc35afb11ce6 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 15 Apr 2018 20:59:37 -0400
Subject: [PATCH 08/12] gl_shader_decompiler: Implement several MUFU subops and
 abs_d.

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 28 ++++++++++++++-----
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1bb8174e4..8912d4c5e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -241,12 +241,13 @@ private:
      * @param value the code representing the value to assign.
      */
     void SetDest(u64 elem, const std::string& reg, const std::string& value,
-                 u64 dest_num_components, u64 value_num_components) {
+                 u64 dest_num_components, u64 value_num_components, bool is_abs = false) {
         std::string swizzle = ".";
         swizzle += "xyzw"[elem];
 
         std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
         std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
+        src = is_abs ? "abs(" + src + ")" : src;
 
         shader.AddLine(dest + " = " + src + ";");
     }
@@ -264,8 +265,6 @@ private:
 
         switch (OpCode::GetInfo(instr.opcode).type) {
         case OpCode::Type::Arithmetic: {
-            ASSERT_MSG(!instr.alu.abs_d, "unimplemented");
-
             std::string dest = GetRegister(instr.gpr0);
             std::string op_a = instr.alu.negate_a ? "-" : "";
             op_a += GetRegister(instr.gpr8);
@@ -304,8 +303,26 @@ private:
             }
             case OpCode::Id::MUFU: {
                 switch (instr.sub_op) {
+                case SubOp::Cos:
+                    SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Sin:
+                    SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Ex2:
+                    SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Lg2:
+                    SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
                 case SubOp::Rcp:
-                    SetDest(0, dest, "1.0 / " + op_a, 1, 1);
+                    SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rsq:
+                    SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Min:
+                    SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
                     break;
                 default:
                     LOG_ERROR(HW_GPU, "Unhandled sub op: 0x%02x", (int)instr.sub_op.Value());
@@ -324,9 +341,6 @@ private:
             break;
         }
         case OpCode::Type::Ffma: {
-            ASSERT_MSG(!instr.ffma.negate_b, "untested");
-            ASSERT_MSG(!instr.ffma.negate_c, "untested");
-
             std::string dest = GetRegister(instr.gpr0);
             std::string op_a = GetRegister(instr.gpr8);
             std::string op_b = instr.ffma.negate_b ? "-" : "";

From ed542a73098650fbba77db2b52d4423fda6d5c30 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 17 Apr 2018 16:28:47 -0400
Subject: [PATCH 09/12] gl_shader_decompiler: Cleanup logging, updating to
 NGLOG_*.

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 46 +++++++++----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8912d4c5e..bc5b92dfb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -167,7 +167,7 @@ private:
                 return "input_attribute_" + std::to_string(index);
             }
 
-            LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
+            NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
             UNREACHABLE();
         }
     }
@@ -185,7 +185,7 @@ private:
                 return "output_attribute_" + std::to_string(index);
             }
 
-            LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index);
+            NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
             UNREACHABLE();
         }
     }
@@ -325,17 +325,17 @@ private:
                     SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
                     break;
                 default:
-                    LOG_ERROR(HW_GPU, "Unhandled sub op: 0x%02x", (int)instr.sub_op.Value());
-                    throw DecompileFail("Unhandled sub op");
+                    NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}",
+                                   static_cast<unsigned>(instr.sub_op.Value()));
+                    UNREACHABLE();
                 }
                 break;
             }
             default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
-                break;
+                NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
             }
             }
             break;
@@ -368,11 +368,10 @@ private:
                 break;
             }
             default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
-                break;
+                NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
             }
             }
 
@@ -407,11 +406,10 @@ private:
                 break;
             }
             default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
-                break;
+                NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
             }
             }
             break;
@@ -431,10 +429,10 @@ private:
                 break;
             }
             default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                break;
+                NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
             }
             }
 
@@ -600,7 +598,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
         GLSLGenerator generator(subroutines, program_code, main_offset, stage);
         return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
     } catch (const DecompileFail& exception) {
-        LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what());
+        NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
     }
     return boost::none;
 }

From 1f6fe062ca5891349465fc67b3874500a61b672f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 17 Apr 2018 16:33:05 -0400
Subject: [PATCH 10/12] gl_shader_decompiler: Fix warnings with MarkAsUsed.

---
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bc5b92dfb..6233ee358 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -207,7 +207,8 @@ private:
 
     /// Generates code representing a uniform (C buffer) register.
     std::string GetUniform(const Uniform& reg) {
-        declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage);
+        declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index),
+                                                  static_cast<unsigned>(reg.offset), stage);
         return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
     }
 

From e59126809c9c092d0913e6c1446f6d0ecf20bca2 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 17 Apr 2018 18:00:18 -0400
Subject: [PATCH 11/12] bit_field: Remove is_pod check, add
 is_trivially_copyable_v.

---
 src/common/bit_field.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 5638bdbba..65e357dec 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -192,11 +192,6 @@ private:
     static_assert(position < 8 * sizeof(T), "Invalid position");
     static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
     static_assert(bits > 0, "Invalid number of bits");
-    static_assert(std::is_pod<T>::value, "Invalid base type");
+    static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
 };
 #pragma pack()
-
-#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
-static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value,
-              "BitField must be trivially copyable");
-#endif

From 531c25386e62cf3349d56d3d79ecdfba26b8b530 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 17 Apr 2018 18:06:10 -0400
Subject: [PATCH 12/12] shader_bytecode: Make ctor's constexpr and explicit.

---
 src/video_core/engines/shader_bytecode.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c368fa7fd..ed66d893a 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -13,7 +13,7 @@ namespace Tegra {
 namespace Shader {
 
 struct Register {
-    Register() = default;
+    constexpr Register() = default;
 
     constexpr Register(u64 value) : value(value) {}
 
@@ -40,13 +40,13 @@ struct Register {
     }
 
 private:
-    u64 value;
+    u64 value{};
 };
 
 union Attribute {
     Attribute() = default;
 
-    constexpr Attribute(u64 value) : value(value) {}
+    constexpr explicit Attribute(u64 value) : value(value) {}
 
     enum class Index : u64 {
         Position = 7,
@@ -65,20 +65,20 @@ union Attribute {
     } fmt28;
 
     BitField<39, 8, u64> reg;
-    u64 value;
+    u64 value{};
 };
 
 union Sampler {
     Sampler() = default;
 
-    constexpr Sampler(u64 value) : value(value) {}
+    constexpr explicit Sampler(u64 value) : value(value) {}
 
     enum class Index : u64 {
         Sampler_0 = 8,
     };
 
     BitField<36, 13, Index> index;
-    u64 value;
+    u64 value{};
 };
 
 union Uniform {
@@ -248,7 +248,7 @@ union OpCode {
     BitField<55, 9, Id> op3;
     BitField<52, 12, Id> op4;
     BitField<51, 13, Id> op5;
-    u64 value;
+    u64 value{};
 };
 static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");