backend/A64/a32_jitstate: Upstream changes from x64 backend

This commit is contained in:
SachinVin 2020-01-19 18:04:34 +05:30
parent 968e8cddd3
commit c99ad2a4f3
9 changed files with 201 additions and 212 deletions

View File

@ -264,12 +264,9 @@ void A32EmitA64::GenTerminalHandlers() {
// PC ends up in fast_dispatch_entry_reg, location_descriptor ends up in location_descriptor_reg.
const auto calculate_location_descriptor = [this, fast_dispatch_entry_reg, location_descriptor_reg] {
// This calculation has to match up with IREmitter::PushRSB
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
code.LDR(INDEX_UNSIGNED, DecodeReg(location_descriptor_reg), X28, offsetof(A32JitState, FPSCR_mode));
code.LDR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et));
code.ORR(DecodeReg(location_descriptor_reg), DecodeReg(location_descriptor_reg), DecodeReg(code.ABI_SCRATCH1));
code.LDR(INDEX_UNSIGNED, DecodeReg(location_descriptor_reg), X28, offsetof(A32JitState, upper_location_descriptor));
code.LDR(INDEX_UNSIGNED, DecodeReg(fast_dispatch_entry_reg), X28, MJitStateReg(A32::Reg::PC));
code.ORR(location_descriptor_reg, location_descriptor_reg, fast_dispatch_entry_reg, ArithOption{fast_dispatch_entry_reg, ST_LSL, 32});
code.ORR(location_descriptor_reg, fast_dispatch_entry_reg, location_descriptor_reg, ArithOption{location_descriptor_reg, ST_LSL, 32});
};
FixupBranch fast_dispatch_cache_miss, rsb_cache_miss;
@ -430,7 +427,7 @@ void A32EmitA64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
ARM64Reg a = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
code.ANDI2R(a, a, 0xF0000000);
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, cpsr_nzcv));
}
void A32EmitA64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
@ -445,17 +442,17 @@ void A32EmitA64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
ARM64Reg a = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.MOVI2R(a, u32(imm & 0xF0000000));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, cpsr_nzcv));
code.MOVI2R(a, u8((imm & 0x08000000) != 0 ? 1 : 0));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_q));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, cpsr_q));
} else {
ARM64Reg a = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
ARM64Reg q = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.UBFX(q, a, 27, 1);
code.STR(INDEX_UNSIGNED, q, X28, offsetof(A32JitState, CPSR_q));
code.STR(INDEX_UNSIGNED, q, X28, offsetof(A32JitState, cpsr_q));
code.ANDI2R(a, a, 0xF0000000);
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, cpsr_nzcv));
}
// Since this is one of the only places where the ~sticky~
@ -468,7 +465,7 @@ void A32EmitA64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitA64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) {
Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, cpsr_nzcv));
code.UBFX(result, result, 31, 1);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -479,7 +476,7 @@ void A32EmitA64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.ORRI2R(nzcv, nzcv, flag_mask);
@ -491,12 +488,12 @@ void A32EmitA64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) {
code.BFI(nzcv, to_store, flag_bit, 1);
}
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
}
void A32EmitA64::EmitA32GetZFlag(A32EmitContext& ctx, IR::Inst* inst) {
Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, cpsr_nzcv));
code.UBFX(result, result, 30, 1);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -507,7 +504,7 @@ void A32EmitA64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.ORRI2R(nzcv, nzcv, flag_mask);
@ -519,7 +516,7 @@ void A32EmitA64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) {
code.BFI(nzcv, to_store, flag_bit, 1);
}
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
}
void A32EmitA64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
@ -530,7 +527,7 @@ void A32EmitA64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitA64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, cpsr_nzcv));
code.UBFX(result, result, 29, 1);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -541,7 +538,7 @@ void A32EmitA64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.ORRI2R(nzcv, nzcv, flag_mask);
@ -552,12 +549,12 @@ void A32EmitA64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
Arm64Gen::ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
code.BFI(nzcv, to_store, flag_bit, 1);
}
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
}
void A32EmitA64::EmitA32GetVFlag(A32EmitContext& ctx, IR::Inst* inst) {
Arm64Gen::ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, cpsr_nzcv));
code.UBFX(result, result, 28, 1);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -568,7 +565,7 @@ void A32EmitA64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.LDR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.ORRI2R(nzcv, nzcv, flag_mask);
@ -580,7 +577,7 @@ void A32EmitA64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) {
code.BFI(nzcv, to_store, flag_bit, 1);
}
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, CPSR_nzcv));
code.STR(INDEX_UNSIGNED, nzcv, X28, offsetof(A32JitState, cpsr_nzcv));
}
void A32EmitA64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
@ -588,21 +585,21 @@ void A32EmitA64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
ARM64Reg to_store = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, CPSR_q));
code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, cpsr_q));
}
} else {
ARM64Reg to_store = ctx.reg_alloc.UseGpr(args[0]);
ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, scratch, X28, offsetof(A32JitState, CPSR_q));
code.LDR(INDEX_UNSIGNED, scratch, X28, offsetof(A32JitState, cpsr_q));
code.ORR(scratch, scratch, to_store);
code.STR(INDEX_UNSIGNED, scratch, X28, offsetof(A32JitState, CPSR_q));
code.STR(INDEX_UNSIGNED, scratch, X28, offsetof(A32JitState, cpsr_q));
}
}
void A32EmitA64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, CPSR_ge));
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, cpsr_ge));
ctx.reg_alloc.DefineValue(inst, result);
}
@ -615,7 +612,7 @@ void A32EmitA64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
} else {
to_store = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
}
code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, CPSR_ge));
code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, cpsr_ge));
}
void A32EmitA64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst) {
@ -630,7 +627,7 @@ void A32EmitA64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
ge |= Common::Bit<16>(imm) ? 0x000000FF : 0;
code.MOVI2R(to_store, ge);
code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, CPSR_ge));
code.STR(INDEX_UNSIGNED, to_store, X28, offsetof(A32JitState, cpsr_ge));
} else {
ARM64Reg a = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
@ -642,7 +639,7 @@ void A32EmitA64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.ANDI2R(a, a, 0x01010101, scratch);
code.MOVI2R(scratch, 0xFF);
code.MUL(a, a, scratch);
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, CPSR_ge));
code.STR(INDEX_UNSIGNED, a, X28, offsetof(A32JitState, cpsr_ge));
}
}
@ -650,6 +647,8 @@ void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& arg = args[0];
const u32 upper_without_t = (ctx.Location().UniqueHash() >> 32) & 0xFFFFFFFE;
// Pseudocode:
// if (new_pc & 1) {
// new_pc &= 0xFFFFFFFE;
@ -661,41 +660,28 @@ void A32EmitA64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
// We rely on the fact we disallow EFlag from changing within a block.
if (arg.IsImmediate()) {
ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
u32 new_pc = arg.GetImmediateU32();
u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
u32 et = 0;
et |= ctx.Location().EFlag() ? 2 : 0;
et |= Common::Bit<0>(new_pc) ? 1 : 0;
const u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
const u32 new_upper = upper_without_t | (Common::Bit<0>(new_pc) ? 1 : 0);
code.MOVI2R(scratch, new_pc & mask);
code.STR(INDEX_UNSIGNED, scratch, X28, MJitStateReg(A32::Reg::PC));
code.MOVI2R(scratch, et);
code.STR(INDEX_UNSIGNED, scratch, X28, offsetof(A32JitState, CPSR_et));
code.MOVI2R(scratch, new_upper);
code.STR(INDEX_UNSIGNED, scratch, X28, offsetof(A32JitState, upper_location_descriptor));
} else {
if (ctx.Location().EFlag()) {
ARM64Reg new_pc = DecodeReg(ctx.reg_alloc.UseScratchGpr(arg));
ARM64Reg mask = DecodeReg(ctx.reg_alloc.ScratchGpr());
ARM64Reg et = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg new_pc = DecodeReg(ctx.reg_alloc.UseScratchGpr(arg));
const ARM64Reg mask = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg new_upper = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.ANDI2R(mask, new_pc, 1);
code.ADDI2R(et, mask, 2);
code.STR(INDEX_UNSIGNED, et, X28, offsetof(A32JitState, CPSR_et));
code.LSL(mask, mask, 1);
code.SUB(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code.AND(new_pc, new_pc, mask);
code.STR(INDEX_UNSIGNED, new_pc, X28, MJitStateReg(A32::Reg::PC));
} else {
ARM64Reg new_pc = DecodeReg(ctx.reg_alloc.UseScratchGpr(arg));
ARM64Reg mask = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.ANDI2R(mask, new_pc, 1);
code.STR(INDEX_UNSIGNED, mask, X28, offsetof(A32JitState, CPSR_et));
code.LSL(mask, mask, 1);
code.SUB(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code.AND(new_pc, new_pc, mask);
code.STR(INDEX_UNSIGNED, new_pc, X28, MJitStateReg(A32::Reg::PC));
}
code.ANDI2R(mask, new_pc, 1);
code.MOVI2R(new_upper, upper_without_t);
code.ADD(new_upper, new_upper, mask);
code.STR(INDEX_UNSIGNED, new_upper, X28, offsetof(A32JitState, upper_location_descriptor));
code.LSL(mask, mask, 1);
code.SUBI2R(mask, mask, 4); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code.AND(new_pc, new_pc, mask);
code.STR(INDEX_UNSIGNED, new_pc, X28, MJitStateReg(A32::Reg::PC));
}
}
@ -743,7 +729,7 @@ void A32EmitA64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
code.MOV(code.ABI_PARAM1, X28);
code.MRS(fpsr, FIELD_FPSR);
code.STR(INDEX_UNSIGNED, fpsr, X28, offsetof(A32JitState, guest_FPSR));
code.STR(INDEX_UNSIGNED, fpsr, X28, offsetof(A32JitState, guest_fpsr));
code.QuickCallFunction(&GetFpscrImpl);
}
@ -761,13 +747,13 @@ void A32EmitA64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
code.QuickCallFunction(&SetFpscrImpl);
code.LDR(INDEX_UNSIGNED, fpsr, X28, offsetof(A32JitState, guest_FPSR));
code.LDR(INDEX_UNSIGNED, fpsr, X28, offsetof(A32JitState, guest_fpsr));
code._MSR(FIELD_FPSR, fpsr);
}
void A32EmitA64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, FPSCR_nzcv));
code.LDR(INDEX_UNSIGNED, result, X28, offsetof(A32JitState, fpsr_nzcv));
ctx.reg_alloc.DefineValue(inst, result);
}
@ -777,7 +763,7 @@ void A32EmitA64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
code.ANDI2R(value, value, 0xF0000000);
code.STR(INDEX_UNSIGNED, value, X28, offsetof(A32JitState, FPSCR_nzcv));
code.STR(INDEX_UNSIGNED, value, X28, offsetof(A32JitState, fpsr_nzcv));
}
void A32EmitA64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) {
@ -1261,7 +1247,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc
code.MOVI2R(DecodeReg(code.ABI_PARAM2), A32::LocationDescriptor{terminal.next}.PC());
code.MOVI2R(DecodeReg(code.ABI_PARAM3), terminal.num_instructions);
code.STR(INDEX_UNSIGNED,DecodeReg(code.ABI_PARAM2), X28, MJitStateReg(A32::Reg::PC));
code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_PARAM2), X28, MJitStateReg(A32::Reg::PC));
code.SwitchFpscrOnExit();
Devirtualize<&A32::UserCallbacks::InterpreterFallback>(config.callbacks).EmitCall(code);
code.ReturnFromRunCode(true); // TODO: Check cycles
@ -1271,19 +1257,25 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
code.ReturnFromRunCode();
}
static u32 CalculateCpsr_et(const IR::LocationDescriptor& arg) {
const A32::LocationDescriptor desc{arg};
u32 et = 0;
et |= desc.EFlag() ? 2 : 0;
et |= desc.TFlag() ? 1 : 0;
return et;
void A32EmitA64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
auto get_upper = [](const IR::LocationDescriptor &desc) -> u32 {
return static_cast<u32>(desc.Value() >> 32);
};
const u32 old_upper = get_upper(old_location);
const u32 new_upper = [&] {
const u32 mask = ~u32(config.always_little_endian ? 0x2 : 0);
return get_upper(new_location) & mask;
}();
if (old_upper != new_upper) {
code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), new_upper);
code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, upper_location_descriptor));
}
}
void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), CalculateCpsr_et(terminal.next));
code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et));
}
EmitSetUpperLocationDescriptor(terminal.next, initial_location);
code.LDR(INDEX_UNSIGNED, code.ABI_SCRATCH1, X28, offsetof(A32JitState, cycles_remaining));
code.CMP(code.ABI_SCRATCH1, ZR);
@ -1311,10 +1303,7 @@ void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
}
void A32EmitA64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
code.MOVI2R(DecodeReg(code.ABI_SCRATCH1), CalculateCpsr_et(terminal.next));
code.STR(INDEX_UNSIGNED, DecodeReg(code.ABI_SCRATCH1), X28, offsetof(A32JitState, CPSR_et));
}
EmitSetUpperLocationDescriptor(terminal.next, initial_location);
patch_information[terminal.next].jmp.emplace_back(code.GetCodePtr());
if (auto next_bb = GetBasicBlock(terminal.next)) {

View File

@ -88,6 +88,7 @@ protected:
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) override;

View File

@ -9,6 +9,9 @@
#include <boost/icl/interval_set.hpp>
#include <fmt/format.h>
#include <dynarmic/A32/a32.h>
#include <dynarmic/A32/context.h>
#include "backend/A64/a32_emit_a64.h"
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
@ -19,8 +22,6 @@
#include "common/common_types.h"
#include "common/llvm_disassemble.h"
#include "common/scope_exit.h"
#include "dynarmic/A32/a32.h"
#include "dynarmic/A32/context.h"
#include "frontend/A32/translate/translate.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/location_descriptor.h"
@ -40,8 +41,11 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig config)
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state}), emitter(block_of_code, config, jit),
config(config), jit_interface(jit) {}
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
, emitter(block_of_code, config, jit)
, config(std::move(config))
, jit_interface(jit)
{}
A32JitState jit_state;
BlockOfCode block_of_code;
@ -118,7 +122,7 @@ private:
u32 pc = jit_state.Reg[15];
A32::PSR cpsr{jit_state.Cpsr()};
A32::FPSCR fpscr{jit_state.FPSCR_mode};
A32::FPSCR fpscr{jit_state.upper_location_descriptor};
A32::LocationDescriptor descriptor{pc, cpsr, fpscr};
return this_.GetBasicBlock(descriptor).entrypoint;
@ -135,9 +139,7 @@ private:
PerformCacheInvalidation();
}
IR::Block ir_block =
A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); },
{config.define_unpredictable_behaviour, config.hook_hint_instructions});
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions});
Optimization::A32GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
@ -149,16 +151,14 @@ private:
}
};
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, config)) {}
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {}
Jit::~Jit() {}
Jit::~Jit() = default;
void Jit::Run() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT {
this->is_executing = false;
};
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = false;
@ -228,9 +228,7 @@ struct Context::Impl {
size_t invalid_cache_generation;
};
Context::Context() : impl(std::make_unique<Context::Impl>()) {
impl->jit_state.ResetRSB();
}
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
@ -272,37 +270,14 @@ void Context::SetFpscr(std::uint32_t value) {
return impl->jit_state.SetFpscr(value);
}
void TransferJitState(A32JitState& dest, const A32JitState& src, bool reset_rsb) {
dest.CPSR_ge = src.CPSR_ge;
dest.CPSR_et = src.CPSR_et;
dest.CPSR_q = src.CPSR_q;
dest.CPSR_nzcv = src.CPSR_nzcv;
dest.CPSR_jaifm = src.CPSR_jaifm;
dest.Reg = src.Reg;
dest.ExtReg = src.ExtReg;
dest.guest_FPCR = src.guest_FPCR;
dest.guest_FPSR = src.guest_FPSR;
dest.FPSCR_IDC = src.FPSCR_IDC;
dest.FPSCR_UFC = src.FPSCR_UFC;
dest.FPSCR_mode = src.FPSCR_mode;
dest.FPSCR_nzcv = src.FPSCR_nzcv;
if (reset_rsb) {
dest.ResetRSB();
} else {
dest.rsb_ptr = src.rsb_ptr;
dest.rsb_location_descriptors = src.rsb_location_descriptors;
dest.rsb_codeptrs = src.rsb_codeptrs;
}
}
void Jit::SaveContext(Context& ctx) const {
TransferJitState(ctx.impl->jit_state, impl->jit_state, false);
ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
}
void Jit::LoadContext(const Context& ctx) {
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
TransferJitState(impl->jit_state, ctx.impl->jit_state, reset_rsb);
impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
}
std::string Jit::Disassemble(const IR::LocationDescriptor& descriptor) {

View File

@ -19,19 +19,21 @@ namespace Dynarmic::BackendA64 {
*
* ARM CPSR flags
* --------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
* Q bit 27 Saturation flag
* J bit 24 Jazelle instruction set flag
* GE bits 16-19 Greater than or Equal flags
* E bit 9 Data Endianness flag
* A bit 8 Disable imprecise Aborts
* I bit 7 Disable IRQ interrupts
* F bit 6 Disable FIQ interrupts
* T bit 5 Thumb instruction set flag
* M bits 0-4 Processor Mode bits
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
* Q bit 27 Saturation flag
* IT[1:0] bits 25-26 If-Then execution state (lower 2 bits)
* J bit 24 Jazelle instruction set flag
* GE bits 16-19 Greater than or Equal flags
* IT[7:2] bits 10-15 If-Then execution state (upper 6 bits)
* E bit 9 Data Endianness flag
* A bit 8 Disable imprecise Aborts
* I bit 7 Disable IRQ interrupts
* F bit 6 Disable FIQ interrupts
* T bit 5 Thumb instruction set flag
* M bits 0-4 Processor Mode bits
*
* A64 flags
* -------------------
@ -42,48 +44,55 @@ namespace Dynarmic::BackendA64 {
*/
u32 A32JitState::Cpsr() const {
ASSERT((CPSR_nzcv & ~0xF0000000) == 0);
ASSERT((CPSR_q & ~1) == 0);
ASSERT((CPSR_et & ~3) == 0);
ASSERT((CPSR_jaifm & ~0x010001DF) == 0);
DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
DEBUG_ASSERT((cpsr_q & ~1) == 0);
DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
u32 cpsr = 0;
// NZCV flags
cpsr |= CPSR_nzcv;
cpsr |= cpsr_nzcv;
// Q flag
cpsr |= CPSR_q ? 1 << 27 : 0;
cpsr |= cpsr_q ? 1 << 27 : 0;
// GE flags
cpsr |= Common::Bit<31>(CPSR_ge) ? 1 << 19 : 0;
cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0;
cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0;
cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
// IT state
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
// Other flags
cpsr |= CPSR_jaifm;
cpsr |= cpsr_jaifm;
return cpsr;
}
void A32JitState::SetCpsr(u32 cpsr) {
// NZCV flags
CPSR_nzcv = cpsr & 0xF0000000;
cpsr_nzcv = cpsr & 0xF0000000;
// Q flag
CPSR_q = Common::Bit<27>(cpsr) ? 1 : 0;
cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
// GE flags
CPSR_ge = 0;
CPSR_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
cpsr_ge = 0;
cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
upper_location_descriptor &= 0xFFFF0000;
// E flag, T flag
CPSR_et = 0;
CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0;
CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0;
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
// IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags
CPSR_jaifm = cpsr & 0x07F0FDDF;
cpsr_jaifm = cpsr & 0x010001DF;
}
void A32JitState::ResetRSB() {
@ -115,52 +124,49 @@ void A32JitState::ResetRSB() {
*
* VFP FPSCR mode bits
* -------------------
* DN bit 25 Default NaN
* FZ bit 24 Flush to Zero
* AHP bit 26 Alternate half-precision
* DN bit 25 Default NaN
* FZ bit 24 Flush to Zero
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
* Stride bits 20-21 Vector stride
* Len bits 16-18 Vector length
* Len bits 16-18 Vector length
*/
// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
u32 A32JitState::Fpscr() const {
ASSERT((FPSCR_mode & ~FPSCR_MODE_MASK) == 0);
ASSERT((FPSCR_nzcv & ~FPSCR_NZCV_MASK) == 0);
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
u32 FPSCR = FPSCR_mode | FPSCR_nzcv;
FPSCR |= (guest_FPSR & 0x1F);
FPSCR |= FPSCR_IDC;
FPSCR |= FPSCR_UFC;
const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
u32 FPSCR = fpcr_mode | fpsr_nzcv;
FPSCR |= (guest_fpsr & 0x1F);
FPSCR |= fpsr_exc;
return FPSCR;
}
void A32JitState::SetFpscr(u32 FPSCR) {
old_FPSCR = FPSCR;
FPSCR_mode = FPSCR & FPSCR_MODE_MASK;
FPSCR_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_FPCR = 0;
guest_FPSR = 0;
// Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
upper_location_descriptor &= 0x0000FFFF;
upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_fpcr = 0;
guest_fpsr = 0;
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
FPSCR_IDC = 0;
FPSCR_UFC = 0;
fpsr_exc = FPSCR & 0x9F;
// Mode Bits
guest_FPCR |= FPSCR & 0x07C09F00;
guest_fpcr |= FPSCR & 0x07C09F00;
// Exceptions
guest_FPSR |= FPSCR & 0x9F;
guest_fpsr |= FPSCR & 0x9F;
}
u64 A32JitState::GetUniqueHash() const {
return CPSR_et | FPSCR_mode | (static_cast<u64>(Reg[15]) << 32);
}
} // namespace Dynarmic::BackendX64
} // namespace Dynarmic::BackendA64

View File

@ -27,12 +27,12 @@ struct A32JitState {
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
u32 CPSR_et = 0;
u32 CPSR_ge = 0;
u32 CPSR_q = 0;
u32 CPSR_nzcv = 0;
u32 CPSR_jaifm = 0;
u32 upper_location_descriptor = 0;
u32 cpsr_ge = 0;
u32 cpsr_q = 0;
u32 cpsr_nzcv = 0;
u32 cpsr_jaifm = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
@ -45,8 +45,8 @@ struct A32JitState {
}
// For internal use (See: BlockOfCode::RunCode)
u64 guest_FPCR = 0;
u64 guest_FPSR = 0;
u64 guest_fpcr = 0;
u64 guest_fpsr = 0;
u64 save_host_FPCR = 0;
s64 cycles_to_run = 0;
s64 cycles_remaining = 0;
@ -67,15 +67,39 @@ struct A32JitState {
u32 fpsr_exc = 0;
u32 fpsr_qc = 0; // Dummy value
u32 FPSCR_IDC = 0;
u32 FPSCR_UFC = 0;
u32 FPSCR_mode = 0;
u32 FPSCR_nzcv = 0;
u32 old_FPSCR = 0;
u32 fpsr_nzcv = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);
u64 GetUniqueHash() const;
u64 GetUniqueHash() const noexcept {
return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
}
void TransferJitState(const A32JitState& src, bool reset_rsb) {
Reg = src.Reg;
upper_location_descriptor = src.upper_location_descriptor;
cpsr_ge = src.cpsr_ge;
cpsr_q = src.cpsr_q;
cpsr_nzcv = src.cpsr_nzcv;
cpsr_jaifm = src.cpsr_jaifm;
ExtReg = src.ExtReg;
guest_fpcr = src.guest_fpcr;
guest_fpsr = src.guest_fpsr;
fpsr_exc = src.fpsr_exc;
fpsr_qc = src.fpsr_qc;
fpsr_nzcv = src.fpsr_nzcv;
exclusive_state = 0;
exclusive_address = 0;
if (reset_rsb) {
ResetRSB();
} else {
rsb_ptr = src.rsb_ptr;
rsb_location_descriptors = src.rsb_location_descriptors;
rsb_codeptrs = src.rsb_codeptrs;
}
}
};
#ifdef _MSC_VER

View File

@ -228,17 +228,17 @@ void BlockOfCode::SwitchFpscrOnEntry() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPSR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
_MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1);
}
void BlockOfCode::SwitchFpscrOnExit() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPSR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_FPSR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);

View File

@ -158,7 +158,7 @@ FixupBranch EmitA64::EmitCond(IR::Cond cond) {
FixupBranch label;
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_CPSR_nzcv);
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
code._MSR(FIELD_NZCV, cpsr);
switch (cond) {

View File

@ -120,7 +120,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
then_ = bitsize == 64 ? then_ : DecodeReg(then_);
else_ = bitsize == 64 ? else_ : DecodeReg(else_);
code.LDR(INDEX_UNSIGNED, DecodeReg(nzcv), X28, code.GetJitStateInfo().offsetof_CPSR_nzcv);
code.LDR(INDEX_UNSIGNED, DecodeReg(nzcv), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
// TODO: Flag optimization
code._MSR(FIELD_NZCV, nzcv);

View File

@ -8,8 +8,6 @@
#include <cstddef>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
struct JitStateInfo {
@ -18,15 +16,13 @@ struct JitStateInfo {
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
, offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR))
, offsetof_guest_FPCR(offsetof(JitStateType, guest_FPCR))
, offsetof_guest_FPSR(offsetof(JitStateType, guest_FPSR))
, offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr))
, offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr))
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
, rsb_ptr_mask(JitStateType::RSBPtrMask)
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
{}
@ -34,15 +30,13 @@ struct JitStateInfo {
const size_t offsetof_cycles_remaining;
const size_t offsetof_cycles_to_run;
const size_t offsetof_save_host_FPCR;
const size_t offsetof_guest_FPCR;
const size_t offsetof_guest_FPSR;
const size_t offsetof_guest_fpcr;
const size_t offsetof_guest_fpsr;
const size_t offsetof_rsb_ptr;
const size_t rsb_ptr_mask;
const size_t offsetof_rsb_location_descriptors;
const size_t offsetof_rsb_codeptrs;
const size_t offsetof_CPSR_nzcv;
const size_t offsetof_FPSCR_IDC;
const size_t offsetof_FPSCR_UFC;
const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc;
};