From 5ff32062076cb810f4fb1d20e1a0afd176b14bbb Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Mon, 12 Dec 2016 21:44:15 -0800
Subject: [PATCH] shader_jit_x64: Use Reg32 for LOOP* registers, eliminating
 casts

---
 src/video_core/shader/shader_jit_x64.cpp | 32 ++++++++++++------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index a1f1f8d30..cfdeb8d6a 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -109,11 +109,11 @@ static const Reg64 SETUP = r9;
 static const Reg64 ADDROFFS_REG_0 = r10;
 static const Reg64 ADDROFFS_REG_1 = r11;
 /// VS loop count register (Multiplied by 16)
-static const Reg64 LOOPCOUNT_REG = r12;
+static const Reg32 LOOPCOUNT_REG = r12d;
 /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
-static const Reg64 LOOPCOUNT = rsi;
+static const Reg32 LOOPCOUNT = esi;
 /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
-static const Reg64 LOOPINC = rdi;
+static const Reg32 LOOPINC = edi;
 /// Result of the previous CMP instruction for the X-component comparison
 static const Reg64 COND0 = r13;
 /// Result of the previous CMP instruction for the Y-component comparison
@@ -734,24 +734,24 @@ void JitShader::Compile_LOOP(Instruction instr) {
     // 4 bits) to be used as an offset into the 16-byte vector registers later
     size_t offset =
         ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
-    mov(LOOPCOUNT.cvt32(), dword[SETUP + offset]);
-    mov(LOOPCOUNT_REG.cvt32(), LOOPCOUNT.cvt32());
-    shr(LOOPCOUNT_REG.cvt32(), 4);
-    and(LOOPCOUNT_REG.cvt32(), 0xFF0); // Y-component is the start
-    mov(LOOPINC.cvt32(), LOOPCOUNT.cvt32());
-    shr(LOOPINC.cvt32(), 12);
-    and(LOOPINC.cvt32(), 0xFF0);                // Z-component is the incrementer
-    movzx(LOOPCOUNT.cvt32(), LOOPCOUNT.cvt8()); // X-component is iteration count
-    add(LOOPCOUNT.cvt32(), 1);                  // Iteration count is X-component + 1
+    mov(LOOPCOUNT, dword[SETUP + offset]);
+    mov(LOOPCOUNT_REG, LOOPCOUNT);
+    shr(LOOPCOUNT_REG, 4);
+    and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
+    mov(LOOPINC, LOOPCOUNT);
+    shr(LOOPINC, 12);
+    and(LOOPINC, 0xFF0);                // Z-component is the incrementer
+    movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
+    add(LOOPCOUNT, 1);                  // Iteration count is X-component + 1
 
     Label l_loop_start;
     L(l_loop_start);
 
     Compile_Block(instr.flow_control.dest_offset + 1);
 
-    add(LOOPCOUNT_REG.cvt32(), LOOPINC.cvt32()); // Increment LOOPCOUNT_REG by Z-component
-    sub(LOOPCOUNT.cvt32(), 1);                   // Increment loop count by 1
-    jnz(l_loop_start);                           // Loop if not equal
+    add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
+    sub(LOOPCOUNT, 1);           // Increment loop count by 1
+    jnz(l_loop_start);           // Loop if not equal
 
     looping = false;
 }
@@ -856,7 +856,7 @@ void JitShader::Compile() {
     // Zero address/loop  registers
     xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
     xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
-    xor(LOOPCOUNT_REG.cvt32(), LOOPCOUNT_REG.cvt32());
+    xor(LOOPCOUNT_REG, LOOPCOUNT_REG);
 
     // Used to set a register to one
     static const __m128 one = {1.f, 1.f, 1.f, 1.f};