backend\A64\emit_a64_packed.cpp: Implement UnsignedPacked*- ADD and SUB...
with few other in the emitter
This commit is contained in:
parent
7b6cc4ec70
commit
a698e35422
@ -362,7 +362,7 @@ elseif(ARCHITECTURE_Aarch64)
|
||||
# backend/A64/emit_a64_crc32.cpp
|
||||
backend/A64/emit_a64_data_processing.cpp
|
||||
backend/A64/emit_a64_floating_point.cpp
|
||||
# backend/A64/emit_a64_packed.cpp
|
||||
backend/A64/emit_a64_packed.cpp
|
||||
# backend/A64/emit_a64_saturation.cpp
|
||||
# backend/A64/emit_a64_sm4.cpp
|
||||
# backend/A64/emit_a64_vector.cpp
|
||||
|
99
src/backend/A64/emit_a64_packed.cpp
Normal file
99
src/backend/A64/emit_a64_packed.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/A64/block_of_code.h"
|
||||
#include "backend/A64/emit_a64.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::BackendA64 {
|
||||
|
||||
void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||
|
||||
code.fp_emitter.ADD(B, sum, sum, b);
|
||||
|
||||
if (ge_inst) {
|
||||
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||
|
||||
code.fp_emitter.CMHI(B, ge, b, sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, sum);
|
||||
}
|
||||
|
||||
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||
|
||||
code.fp_emitter.ADD(H, sum, sum, b);
|
||||
|
||||
if (ge_inst) {
|
||||
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||
|
||||
code.fp_emitter.CMHI(H, ge, b, sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, sum);
|
||||
}
|
||||
|
||||
|
||||
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||
|
||||
if (ge_inst) {
|
||||
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||
|
||||
code.fp_emitter.CMHS(B, ge, a, b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.fp_emitter.SUB(B, a, a, b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
|
||||
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
|
||||
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
|
||||
|
||||
if (ge_inst) {
|
||||
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
|
||||
|
||||
code.fp_emitter.CMHS(H, ge, a, b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.fp_emitter.SUB(H, a, a, b);
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendA64
|
@ -2837,12 +2837,32 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8) {
|
||||
}
|
||||
|
||||
// Vector
|
||||
void ARM64FloatEmitter::ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(0, static_cast<u32>(esize), 0b10000, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(1, static_cast<u32>(esize), 0b10000, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(0, static_cast<u32>(esize), 0b00110, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(1, static_cast<u32>(esize), 0b00110, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(1, static_cast<u32>(esize), 0b00111, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) {
|
||||
u32 imm5 = 0;
|
||||
|
||||
@ -2928,6 +2948,14 @@ void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(0, static_cast<u32>(esize), 0b01101, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
ASSERT(!(IsDouble(Rd) && esize == D));
|
||||
EmitThreeSame(1, static_cast<u32>(esize), 0b01101, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
|
||||
}
|
||||
|
@ -963,8 +963,13 @@ public:
|
||||
void FMOV(ARM64Reg Rd, uint8_t imm8);
|
||||
|
||||
// Vector
|
||||
void ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
@ -992,6 +997,8 @@ public:
|
||||
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
@ -1092,6 +1099,7 @@ private:
|
||||
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
|
||||
ARM64Reg Rm);
|
||||
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,
|
||||
|
@ -180,13 +180,13 @@ OPCODE(CountLeadingZeros64, U64, U64
|
||||
//OPCODE(UnsignedSaturation, U32, U32, U8 )
|
||||
|
||||
// Packed instructions
|
||||
//OPCODE(PackedAddU8, U32, U32, U32 )
|
||||
OPCODE(PackedAddU8, U32, U32, U32 )
|
||||
//OPCODE(PackedAddS8, U32, U32, U32 )
|
||||
//OPCODE(PackedSubU8, U32, U32, U32 )
|
||||
OPCODE(PackedSubU8, U32, U32, U32 )
|
||||
//OPCODE(PackedSubS8, U32, U32, U32 )
|
||||
//OPCODE(PackedAddU16, U32, U32, U32 )
|
||||
OPCODE(PackedAddU16, U32, U32, U32 )
|
||||
//OPCODE(PackedAddS16, U32, U32, U32 )
|
||||
//OPCODE(PackedSubU16, U32, U32, U32 )
|
||||
OPCODE(PackedSubU16, U32, U32, U32 )
|
||||
//OPCODE(PackedSubS16, U32, U32, U32 )
|
||||
//OPCODE(PackedAddSubU16, U32, U32, U32 )
|
||||
//OPCODE(PackedAddSubS16, U32, U32, U32 )
|
||||
|
@ -250,12 +250,12 @@ INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn
|
||||
//INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6
|
||||
//INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6
|
||||
//INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6
|
||||
//INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
|
||||
//INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
|
||||
INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
|
||||
INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
|
||||
//INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6
|
||||
//INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6
|
||||
//INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6
|
||||
//INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
|
||||
INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6
|
||||
INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
|
||||
|
||||
// Parallel Add/Subtract (Saturating) instructions
|
||||
//INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
|
||||
|
Loading…
x
Reference in New Issue
Block a user