From e1df7ae6219c0b19576646969d76f72567caccdd Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 24 Jan 2018 15:55:59 +0000
Subject: [PATCH] IR: Add IR instructions A64Memory{Read,Write}128

This implementation only works on macOS and Linux.
---
 include/dynarmic/A64/config.h            |  2 ++
 src/backend_x64/a64_emit_x64.cpp         | 41 ++++++++++++++++++++++++
 src/frontend/A64/ir_emitter.cpp          |  8 +++++
 src/frontend/A64/ir_emitter.h            |  2 ++
 src/frontend/A64/translate/impl/impl.cpp |  9 ++++--
 src/frontend/A64/translate/impl/impl.h   |  4 +--
 src/frontend/ir/opcodes.inc              |  2 ++
 src/frontend/ir/value.h                  |  1 +
 tests/A64/testenv.h                      |  7 ++++
 9 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h
index a2ab289b..6e30bf3d 100644
--- a/include/dynarmic/A64/config.h
+++ b/include/dynarmic/A64/config.h
@@ -42,12 +42,14 @@ struct UserCallbacks {
     virtual std::uint16_t MemoryRead16(VAddr vaddr) = 0;
     virtual std::uint32_t MemoryRead32(VAddr vaddr) = 0;
     virtual std::uint64_t MemoryRead64(VAddr vaddr) = 0;
+    virtual Vector MemoryRead128(VAddr vaddr) = 0;
 
     // Writes through these callbacks may not be aligned.
     virtual void MemoryWrite8(VAddr vaddr, std::uint8_t value) = 0;
     virtual void MemoryWrite16(VAddr vaddr, std::uint16_t value) = 0;
     virtual void MemoryWrite32(VAddr vaddr, std::uint32_t value) = 0;
     virtual void MemoryWrite64(VAddr vaddr, std::uint64_t value) = 0;
+    virtual void MemoryWrite128(VAddr vaddr, Vector value) = 0;
 
     // If this callback returns true, the JIT will assume MemoryRead* callbacks will always
     // return the same value at any point in time for this vaddr. The JIT may use this information
diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp
index 8c529c92..541c650f 100644
--- a/src/backend_x64/a64_emit_x64.cpp
+++ b/src/backend_x64/a64_emit_x64.cpp
@@ -319,6 +319,25 @@ void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
     });
 }
 
+void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
+    DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
+        ASSERT(vaddr == code->ABI_PARAM2);
+        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+        ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
+    });
+    Xbyak::Xmm result = xmm0;
+    if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
+        code->movq(result, code->ABI_RETURN);
+        code->pinsrq(result, code->ABI_RETURN2, 1);
+    } else {
+        Xbyak::Xmm tmp = xmm1;
+        code->movq(result, code->ABI_RETURN);
+        code->movq(tmp, code->ABI_RETURN2);
+        code->punpcklqdq(result, tmp);
+    }
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
 void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
     DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
         ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
@@ -351,6 +370,28 @@ void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
     });
 }
 
+void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
+    DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value0, Xbyak::Reg64 value1) {
+        ASSERT(vaddr == code->ABI_PARAM2 && value0 == code->ABI_PARAM3 && value1 == code->ABI_PARAM4);
+        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+        ctx.reg_alloc.Use(args[0], ABI_PARAM2);
+        ctx.reg_alloc.ScratchGpr({ABI_PARAM3});
+        ctx.reg_alloc.ScratchGpr({ABI_PARAM4});
+        if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
+            Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
+            code->movq(code->ABI_PARAM3, xmm_value);
+            code->pextrq(code->ABI_PARAM4, xmm_value, 1);
+        } else {
+            Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]);
+            code->movq(code->ABI_PARAM3, xmm_value);
+            code->punpckhqdq(xmm_value, xmm_value);
+            code->movq(code->ABI_PARAM4, xmm_value);
+        }
+        ctx.reg_alloc.EndOfAllocScope();
+        ctx.reg_alloc.HostCall(nullptr);
+    });
+}
+
 void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {
     code->SwitchMxcsrOnExit();
     DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp
index ee65cff0..e2bf70e8 100644
--- a/src/frontend/A64/ir_emitter.cpp
+++ b/src/frontend/A64/ir_emitter.cpp
@@ -58,6 +58,10 @@ IR::U64 IREmitter::ReadMemory64(const IR::U64& vaddr) {
     return Inst<IR::U64>(Opcode::A64ReadMemory64, vaddr);
 }
 
+IR::U128 IREmitter::ReadMemory128(const IR::U64& vaddr) {
+    return Inst<IR::U128>(Opcode::A64ReadMemory128, vaddr);
+}
+
 void IREmitter::WriteMemory8(const IR::U64& vaddr, const IR::U8& value) {
     Inst(Opcode::A64WriteMemory8, vaddr, value);
 }
@@ -74,6 +78,10 @@ void IREmitter::WriteMemory64(const IR::U64& vaddr, const IR::U64& value) {
     Inst(Opcode::A64WriteMemory64, vaddr, value);
 }
 
+void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value) {
+    Inst(Opcode::A64WriteMemory128, vaddr, value);
+}
+
 IR::U32 IREmitter::GetW(Reg reg) {
     if (reg == Reg::ZR)
         return Imm32(0);
diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h
index 24eb7a57..4bf6c9b7 100644
--- a/src/frontend/A64/ir_emitter.h
+++ b/src/frontend/A64/ir_emitter.h
@@ -44,10 +44,12 @@ public:
     IR::U16 ReadMemory16(const IR::U64& vaddr);
     IR::U32 ReadMemory32(const IR::U64& vaddr);
     IR::U64 ReadMemory64(const IR::U64& vaddr);
+    IR::U128 ReadMemory128(const IR::U64& vaddr);
     void WriteMemory8(const IR::U64& vaddr, const IR::U8& value);
     void WriteMemory16(const IR::U64& vaddr, const IR::U16& value);
     void WriteMemory32(const IR::U64& vaddr, const IR::U32& value);
     void WriteMemory64(const IR::U64& vaddr, const IR::U64& value);
+    void WriteMemory128(const IR::U64& vaddr, const IR::U128& value);
 
     IR::U32 GetW(Reg source_reg);
     IR::U64 GetX(Reg source_reg);
diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp
index 1fd5d8c2..1fd54a49 100644
--- a/src/frontend/A64/translate/impl/impl.cpp
+++ b/src/frontend/A64/translate/impl/impl.cpp
@@ -147,7 +147,7 @@ void TranslatorVisitor::V(size_t bitsize, Vec vec, IR::U128 value) {
     }
 }
 
-IR::UAny TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/) {
+IR::UAnyU128 TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/) {
     switch (bytesize) {
     case 1:
         return ir.ReadMemory8(address);
@@ -157,13 +157,15 @@ IR::UAny TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acct
         return ir.ReadMemory32(address);
     case 8:
         return ir.ReadMemory64(address);
+    case 16:
+        return ir.ReadMemory128(address);
     default:
         ASSERT_MSG(false, "Invalid bytesize parameter %zu", bytesize);
         return {};
     }
 }
 
-void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/, IR::UAny value) {
+void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/, IR::UAnyU128 value) {
     switch (bytesize) {
     case 1:
         ir.WriteMemory8(address, value);
@@ -177,6 +179,9 @@ void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*
     case 8:
         ir.WriteMemory64(address, value);
         return;
+    case 16:
+        ir.WriteMemory128(address, value);
+        return;
     default:
         ASSERT_MSG(false, "Invalid bytesize parameter %zu", bytesize);
         return;
diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h
index b838e423..7e3426f5 100644
--- a/src/frontend/A64/translate/impl/impl.h
+++ b/src/frontend/A64/translate/impl/impl.h
@@ -51,8 +51,8 @@ struct TranslatorVisitor final {
     IR::U128 V(size_t bitsize, Vec vec);
     void V(size_t bitsize, Vec vec, IR::U128 value);
 
-    IR::UAny Mem(IR::U64 address, size_t size, AccType acctype);
-    void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAny value);
+    IR::UAnyU128 Mem(IR::U64 address, size_t size, AccType acctype);
+    void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAnyU128 value);
 
     IR::U32U64 SignExtend(IR::UAny value, size_t to_size);
     IR::U32U64 ZeroExtend(IR::UAny value, size_t to_size);
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index f08b8c4e..da922bfc 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -237,10 +237,12 @@ A64OPC(ReadMemory8,             T::U8,          T::U64
 A64OPC(ReadMemory16,            T::U16,         T::U64                                          )
 A64OPC(ReadMemory32,            T::U32,         T::U64                                          )
 A64OPC(ReadMemory64,            T::U64,         T::U64                                          )
+A64OPC(ReadMemory128,           T::U128,        T::U64                                          )
 A64OPC(WriteMemory8,            T::Void,        T::U64,         T::U8                           )
 A64OPC(WriteMemory16,           T::Void,        T::U64,         T::U16                          )
 A64OPC(WriteMemory32,           T::Void,        T::U64,         T::U32                          )
 A64OPC(WriteMemory64,           T::Void,        T::U64,         T::U64                          )
+A64OPC(WriteMemory128,          T::Void,        T::U64,         T::U128                         )
 
 // Coprocessor
 A32OPC(CoprocInternalOperation, T::Void,        T::CoprocInfo                                   )
diff --git a/src/frontend/ir/value.h b/src/frontend/ir/value.h
index 52809d57..f83a7fb3 100644
--- a/src/frontend/ir/value.h
+++ b/src/frontend/ir/value.h
@@ -100,6 +100,7 @@ using U64 = TypedValue<Type::U64>;
 using U128 = TypedValue<Type::U128>;
 using U32U64 = TypedValue<Type::U32 | Type::U64>;
 using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
+using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
 using NZCV = TypedValue<Type::NZCVFlags>;
 
 } // namespace IR
diff --git a/tests/A64/testenv.h b/tests/A64/testenv.h
index e5f67c82..7ff86e8d 100644
--- a/tests/A64/testenv.h
+++ b/tests/A64/testenv.h
@@ -50,6 +50,9 @@ public:
     std::uint64_t MemoryRead64(u64 vaddr) override {
         return u64(MemoryRead32(vaddr)) | u64(MemoryRead32(vaddr + 4)) << 32;
     }
+    Vector MemoryRead128(u64 vaddr) override {
+        return {MemoryRead64(vaddr), MemoryRead64(vaddr + 8)};
+    }
 
     void MemoryWrite8(u64 vaddr, std::uint8_t value) override {
         if (vaddr < code_mem.size() * sizeof(u32)) {
@@ -69,6 +72,10 @@ public:
         MemoryWrite32(vaddr, static_cast<u32>(value));
         MemoryWrite32(vaddr + 4, static_cast<u32>(value >> 32));
     }
+    void MemoryWrite128(u64 vaddr, Vector value) override {
+        MemoryWrite64(vaddr, value[0]);
+        MemoryWrite64(vaddr + 4, value[1]);
+    }
 
     void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback(%" PRIx64 ", %zu)", pc, num_instructions); }