diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index 64f66967..6df8bc64 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -16,6 +17,7 @@ #include "backend_x64/reg_alloc.h" #include "common/address_range.h" +#include "common/bit_util.h" #include "common/fp/rounding_mode.h" #include "frontend/ir/location_descriptor.h" #include "frontend/ir/terminal.h" @@ -29,6 +31,14 @@ namespace Dynarmic::BackendX64 { class BlockOfCode; +using A64FullVectorWidth = std::integral_constant; + +// Array alias that always sizes itself according to the given type T +// relative to the size of a vector register. e.g. T = u32 would result +// in a std::array. +template +using VectorArray = std::array()>; + struct EmitContext { EmitContext(RegAlloc& reg_alloc, IR::Block& block); diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 56a14d8e..d5e5bd9e 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -752,7 +752,7 @@ void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { for (size_t i = 0; i < result.size(); ++i) { result[i] = (a[i] > b[i]) ? ~u64(0) : 0; } @@ -1140,49 +1140,49 @@ static constexpr T LogicalVShift(T x, T y) { } void EmitX64::EmitVectorLogicalVShiftS8(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftS16(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftS32(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftS64(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftU8(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftU16(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftU32(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } void EmitX64::EmitVectorLogicalVShiftU64(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } @@ -1239,7 +1239,7 @@ void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); }); } @@ -1297,7 +1297,7 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); }); } @@ -1354,7 +1354,7 @@ void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b){ std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); }); } @@ -1417,7 +1417,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b){ std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); }); } @@ -1878,7 +1878,7 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { return; } - EmitOneArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a){ + EmitOneArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { std::transform(a.begin(), a.end(), result.begin(), [](u8 val) { return static_cast(Common::BitCount(val)); }); @@ -2105,7 +2105,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { return; } - EmitOneArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a){ + EmitOneArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { for (size_t i = 0; i < result.size(); ++i) { result[i] = Common::SignExtend<32, u64>(a[i]); } @@ -2113,7 +2113,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { - EmitOneArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a){ + EmitOneArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { result[1] = (a[0] >> 63) ? ~u64(0) : 0; result[0] = a[0]; }); diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 261bd6c8..16c71623 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -64,10 +64,8 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm code.movaps(xword[code.ABI_PARAM2], xmm_a); code.movaps(xword[code.ABI_PARAM3], xmm_b); - using Elements = std::integral_constant()>; - using RegArray = std::array; - code.CallFunction(static_cast( - [](RegArray& result, const RegArray& a, const RegArray& b) { + code.CallFunction(static_cast&, const VectorArray&, const VectorArray&)>( + [](VectorArray& result, const VectorArray& a, const VectorArray& b) { for (size_t i = 0; i < result.size(); ++i) { auto [first, second] = IndexFunction(i, a, b); if (auto r = FP::ProcessNaNs(first, second)) { @@ -87,26 +85,26 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm code.SwitchToNearCode(); } -static std::tuple DefaultIndexFunction32(size_t i, const std::array& a, const std::array& b) { +static std::tuple DefaultIndexFunction32(size_t i, const VectorArray& a, const VectorArray& b) { return std::make_tuple(a[i], b[i]); } -static std::tuple DefaultIndexFunction64(size_t i, const std::array& a, const std::array& b) { +static std::tuple DefaultIndexFunction64(size_t i, const VectorArray& a, const VectorArray& b) { return std::make_tuple(a[i], b[i]); } -static std::tuple PairedIndexFunction32(size_t i, const std::array& a, const std::array& b) { +static std::tuple PairedIndexFunction32(size_t i, const VectorArray& a, const VectorArray& b) { if (i < 2) { return std::make_tuple(a[2 * i], a[2 * i + 1]); } return std::make_tuple(b[2 * (i - 2)], b[2 * (i - 2) + 1]); } -static std::tuple PairedIndexFunction64(size_t i, const std::array& a, const std::array& b) { +static std::tuple PairedIndexFunction64(size_t i, const VectorArray& a, const VectorArray& b) { return i == 0 ? std::make_tuple(a[0], a[1]) : std::make_tuple(b[0], b[1]); } -static std::tuple PairedLowerIndexFunction32(size_t i, const std::array& a, const std::array& b) { +static std::tuple PairedLowerIndexFunction32(size_t i, const VectorArray& a, const VectorArray& b) { switch (i) { case 0: return std::make_tuple(a[0], a[1]); @@ -117,7 +115,7 @@ static std::tuple PairedLowerIndexFunction32(size_t i, const std::arra } } -static std::tuple PairedLowerIndexFunction64(size_t i, const std::array& a, const std::array& b) { +static std::tuple PairedLowerIndexFunction64(size_t i, const VectorArray& a, const VectorArray& b) { return i == 0 ? std::make_tuple(a[0], b[0]) : std::make_tuple(u64(0), u64(0)); }