diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h
index 427db234..23a0f957 100644
--- a/src/backend/x64/emit_x64.h
+++ b/src/backend/x64/emit_x64.h
@@ -40,6 +40,9 @@ using A64FullVectorWidth = std::integral_constant<size_t, 128>;
 template <typename T>
 using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
 
+template <typename T>
+using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>;
+
 struct EmitContext {
     EmitContext(RegAlloc& reg_alloc, IR::Block& block);
 
diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp
index 9b40e65b..f1892a89 100644
--- a/src/backend/x64/emit_x64_vector.cpp
+++ b/src/backend/x64/emit_x64_vector.cpp
@@ -4029,7 +4029,174 @@ void EmitX64::EmitVectorTable(EmitContext&, IR::Inst* inst) {
     ASSERT_MSG(inst->UseCount() == 1, "Table cannot be used multiple times");
 }
 
-void EmitX64::EmitVectorTableLookup(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
+    ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
+
+    const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
+    const bool is_defaults_zero = inst->GetArg(0).IsZero();
+
+    // TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
+
+    const std::array<u64, 5> sat_const{
+        0,
+        0x7878787878787878,
+        0x7070707070707070,
+        0x6868686868686868,
+        0x6060606060606060,
+    };
+
+    if (code.HasSSSE3() && is_defaults_zero && table_size <= 2) {
+        const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]);
+        const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
+
+        if (table_size == 2) {
+            const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]);
+            code.punpcklqdq(xmm_table0, xmm_table0_upper);
+            ctx.reg_alloc.Release(xmm_table0_upper);
+        }
+
+        code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
+        code.pshufb(xmm_table0, indicies);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_table0);
+        return;
+    }
+
+    if (code.HasSSE41() && table_size <= 2) {
+        const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
+        const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
+        const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
+
+        if (table_size == 2) {
+            const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]);
+            code.punpcklqdq(xmm_table0, xmm_table0_upper);
+            ctx.reg_alloc.Release(xmm_table0_upper);
+        }
+
+        if (code.HasAVX()) {
+            code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
+        } else {
+            code.movaps(xmm0, indicies);
+            code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
+        }
+        code.pshufb(xmm_table0, indicies);
+        code.pblendvb(xmm_table0, defaults);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_table0);
+        return;
+    }
+
+    if (code.HasSSE41() && is_defaults_zero) {
+        const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]);
+        const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
+        const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[2]);
+
+        {
+            const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]);
+            code.punpcklqdq(xmm_table0, xmm_table0_upper);
+            ctx.reg_alloc.Release(xmm_table0_upper);
+        }
+        if (table_size == 4) {
+            const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(table[3]);
+            code.punpcklqdq(xmm_table1, xmm_table1_upper);
+            ctx.reg_alloc.Release(xmm_table1_upper);
+        }
+
+        if (code.HasAVX()) {
+            code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
+        } else {
+            code.movaps(xmm0, indicies);
+            code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
+        }
+        code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF));
+        code.pshufb(xmm_table0, xmm0);
+        code.pshufb(xmm_table1, indicies);
+        code.pblendvb(xmm_table0, xmm_table1);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_table0);
+        return;
+    }
+
+    if (code.HasSSE41()) {
+        const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]);
+        const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
+        const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
+        const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[2]);
+
+        {
+            const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]);
+            code.punpcklqdq(xmm_table0, xmm_table0_upper);
+            ctx.reg_alloc.Release(xmm_table0_upper);
+        }
+        if (table_size == 4) {
+            const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(table[3]);
+            code.punpcklqdq(xmm_table1, xmm_table1_upper);
+            ctx.reg_alloc.Release(xmm_table1_upper);
+        }
+
+        if (code.HasAVX()) {
+            code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
+        } else {
+            code.movaps(xmm0, indicies);
+            code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
+        }
+        code.pshufb(xmm_table0, indicies);
+        code.pshufb(xmm_table1, indicies);
+        code.pblendvb(xmm_table0, xmm_table1);
+        if (code.HasAVX()) {
+            code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
+        } else {
+            code.movaps(xmm0, indicies);
+            code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
+        }
+        code.pblendvb(xmm_table0, defaults);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_table0);
+        return;
+    }
+
+    const u32 stack_space = static_cast<u32>(6 * 8);
+    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
+    for (size_t i = 0; i < table_size; ++i) {
+        const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
+        code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value);
+        ctx.reg_alloc.Release(table_value);
+    }
+    const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    ctx.reg_alloc.EndOfAllocScope();
+    ctx.reg_alloc.HostCall(nullptr);
+
+    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
+    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 4 * 8]);
+    code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 5 * 8]);
+    code.mov(code.ABI_PARAM4.cvt32(), table_size);
+    code.movq(qword[code.ABI_PARAM2], defaults);
+    code.movq(qword[code.ABI_PARAM3], indicies);
+
+    code.CallLambda(
+        [](const HalfVectorArray<u8>* table, HalfVectorArray<u8>& result, const HalfVectorArray<u8>& indicies, size_t table_size) {
+            for (size_t i = 0; i < result.size(); ++i) {
+                const size_t index = indicies[i] / table[0].size();
+                const size_t elem = indicies[i] % table[0].size();
+                if (index < table_size) {
+                    result[i] = table[index][elem];
+                }
+            }
+        }
+    );
+
+    code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
+    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
     ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable);
 
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
diff --git a/src/frontend/A32/decoder/asimd.inc b/src/frontend/A32/decoder/asimd.inc
index 642a2a62..f4b0f939 100644
--- a/src/frontend/A32/decoder/asimd.inc
+++ b/src/frontend/A32/decoder/asimd.inc
@@ -112,7 +112,7 @@ INST(asimd_VRSQRTE,         "VRSQRTE",                  "111100111D11zz11dddd010
 // Miscellaneous
 INST(asimd_VEXT,            "VEXT",                     "111100101D11nnnnddddiiiiNQM0mmmm") // ASIMD
 INST(asimd_VTBL,            "VTBL",                     "111100111D11nnnndddd10zzN0M0mmmm") // ASIMD
-//INST(asimd_VTBX,            "VTBX",                     "111100111D11nnnndddd10zzN1M0mmmm") // ASIMD
+INST(asimd_VTBX,            "VTBX",                     "111100111D11nnnndddd10zzN1M0mmmm") // ASIMD
 //INST(asimd_VDUP_scalar,     "VDUP (scalar)",            "111100111D11iiiidddd11000QM0mmmm") // ASIMD
 
 // One register and modified immediate
diff --git a/src/frontend/A32/translate/impl/asimd_misc.cpp b/src/frontend/A32/translate/impl/asimd_misc.cpp
index 72371856..1518ce48 100644
--- a/src/frontend/A32/translate/impl/asimd_misc.cpp
+++ b/src/frontend/A32/translate/impl/asimd_misc.cpp
@@ -10,6 +10,31 @@
 
 namespace Dynarmic::A32 {
 
+static bool TableLookup(ArmTranslatorVisitor& v, bool is_vtbl, bool D, size_t Vn, size_t Vd, size_t len, bool N, bool M, size_t Vm) {
+    const size_t length = len + 1;
+    const auto d = ToVector(false, Vd, D);
+    const auto m = ToVector(false, Vm, M);
+    const auto n = ToVector(false, Vn, N);
+
+    if (RegNumber(n) + length > 32) {
+        return v.UnpredictableInstruction();
+    }
+
+    const IR::Table table = v.ir.VectorTable([&]{
+        std::vector<IR::U64> result;
+        for (size_t i = 0; i < length; ++i) {
+            result.emplace_back(v.ir.GetExtendedRegister(n + i));
+        }
+        return result;
+    }());
+    const IR::U64 indicies = v.ir.GetExtendedRegister(m);
+    const IR::U64 defaults = is_vtbl ? v.ir.Imm64(0) : IR::U64{v.ir.GetExtendedRegister(d)};
+    const IR::U64 result = v.ir.VectorTableLookup(defaults, table, indicies);
+
+    v.ir.SetExtendedRegister(d, result);
+    return true;
+}
+
 bool ArmTranslatorVisitor::asimd_VEXT(bool D, size_t Vn, size_t Vd, Imm<4> imm4, bool N, bool Q, bool M, size_t Vm) {
     if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vn) || Common::Bit<0>(Vm))) {
         return UndefinedInstruction();
@@ -33,28 +58,11 @@ bool ArmTranslatorVisitor::asimd_VEXT(bool D, size_t Vn, size_t Vd, Imm<4> imm4,
 }
 
 bool ArmTranslatorVisitor::asimd_VTBL(bool D, size_t Vn, size_t Vd, size_t len, bool N, bool M, size_t Vm) {
-    const size_t length = len + 1;
-    const auto d = ToVector(false, Vd, D);
-    const auto m = ToVector(false, Vm, M);
-    const auto n = ToVector(false, Vn, N);
+    return TableLookup(*this, true, D, Vn, Vd, len, N, M, Vm);
+}
 
-    if (RegNumber(n) + length > 32) {
-        return UnpredictableInstruction();
-    }
-
-    const IR::U64 table0 = ir.GetExtendedRegister(n);
-    const IR::U64 table1 = length >= 2 ? IR::U64{ir.GetExtendedRegister(n + 1)} : ir.Imm64(0);
-    const IR::U64 table2 = length >= 3 ? IR::U64{ir.GetExtendedRegister(n + 2)} : ir.Imm64(0);
-    const IR::U64 table3 = length == 4 ? IR::U64{ir.GetExtendedRegister(n + 3)} : ir.Imm64(0);
-
-    const IR::Table table = ir.VectorTable(length <= 2
-                                           ? std::vector<IR::U128>{ir.Pack2x64To1x128(table0, table1)}
-                                           : std::vector<IR::U128>{ir.Pack2x64To1x128(table0, table1), ir.Pack2x64To1x128(table2, table3)});
-    const IR::U128 indicies = ir.GetVector(m);
-    const IR::U128 result = ir.VectorTableLookup(ir.ZeroVector(), table, indicies);
-
-    ir.SetVector(d, result);
-    return true;
+bool ArmTranslatorVisitor::asimd_VTBX(bool D, size_t Vn, size_t Vd, size_t len, bool N, bool M, size_t Vm) {
+    return TableLookup(*this, false, D, Vn, Vd, len, N, M, Vm);
 }
 
 } // namespace Dynarmic::A32
diff --git a/src/frontend/A32/translate/impl/translate_arm.h b/src/frontend/A32/translate/impl/translate_arm.h
index 4735497b..216abcef 100644
--- a/src/frontend/A32/translate/impl/translate_arm.h
+++ b/src/frontend/A32/translate/impl/translate_arm.h
@@ -514,6 +514,7 @@ struct ArmTranslatorVisitor final {
     // Advanced SIMD miscellaneous
     bool asimd_VEXT(bool D, size_t Vn, size_t Vd, Imm<4> imm4, bool N, bool Q, bool M, size_t Vm);
     bool asimd_VTBL(bool D, size_t Vn, size_t Vd, size_t len, bool N, bool M, size_t Vm);
+    bool asimd_VTBX(bool D, size_t Vn, size_t Vd, size_t len, bool N, bool M, size_t Vm);
 
     // Advanced SIMD load/store structures
     bool v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t sz, size_t align, Reg m);
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 69684973..16b9371e 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -1739,14 +1739,26 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
     UNREACHABLE();
 }
 
+Table IREmitter::VectorTable(std::vector<U64> values) {
+    ASSERT(values.size() >= 1 && values.size() <= 4);
+    values.resize(4);
+    return Inst<Table>(Opcode::VectorTable, values[0], values[1], values[2], values[3]);
+}
+
 Table IREmitter::VectorTable(std::vector<U128> values) {
     ASSERT(values.size() >= 1 && values.size() <= 4);
     values.resize(4);
     return Inst<Table>(Opcode::VectorTable, values[0], values[1], values[2], values[3]);
 }
 
+U64 IREmitter::VectorTableLookup(const U64& defaults, const Table& table, const U64& indices) {
+    ASSERT(table.GetInst()->GetArg(0).GetType() == Type::U64);
+    return Inst<U64>(Opcode::VectorTableLookup64, defaults, table, indices);
+}
+
 U128 IREmitter::VectorTableLookup(const U128& defaults, const Table& table, const U128& indices) {
-    return Inst<U128>(Opcode::VectorTableLookup, defaults, table, indices);
+    ASSERT(table.GetInst()->GetArg(0).GetType() == Type::U128);
+    return Inst<U128>(Opcode::VectorTableLookup128, defaults, table, indices);
 }
 
 U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 0311d057..03607636 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -297,7 +297,9 @@ public:
     U128 VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b);
     U128 VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, const U128& b);
     U128 VectorSub(size_t esize, const U128& a, const U128& b);
+    Table VectorTable(std::vector<U64> values);
     Table VectorTable(std::vector<U128> values);
+    U64 VectorTableLookup(const U64& defaults, const Table& table, const U64& indices);
     U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
     U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
     U128 VectorUnsignedRecipEstimate(const U128& a);
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index d94fe7bd..1ede0e8b 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -470,8 +470,9 @@ OPCODE(VectorSub8,                                          U128,           U128
 OPCODE(VectorSub16,                                         U128,           U128,           U128                                            )
 OPCODE(VectorSub32,                                         U128,           U128,           U128                                            )
 OPCODE(VectorSub64,                                         U128,           U128,           U128                                            )
-OPCODE(VectorTable,                                         Table,          U128,           Opaque,         Opaque,         Opaque          )
-OPCODE(VectorTableLookup,                                   U128,           U128,           Table,          U128                            )
+OPCODE(VectorTable,                                         Table,          Opaque,         Opaque,         Opaque,         Opaque          )
+OPCODE(VectorTableLookup64,                                 U64,            U64,            Table,          U64                             )
+OPCODE(VectorTableLookup128,                                U128,           U128,           Table,          U128                            )
 OPCODE(VectorUnsignedAbsoluteDifference8,                   U128,           U128,           U128                                            )
 OPCODE(VectorUnsignedAbsoluteDifference16,                  U128,           U128,           U128                                            )
 OPCODE(VectorUnsignedAbsoluteDifference32,                  U128,           U128,           U128                                            )